 ### algoritmos nuevos, mejoras varias.

parent dd9a0033
 ... ... @@ -548,7 +548,6 @@ binary_matrix& mul_AB(const binary_matrix& A, const binary_matrix& B, binary_mat const idx_t M = A.rows; const idx_t Ns = B.blocks_per_row; const idx_t K = B.rows; C.clear(); block_t mask = MSB; block_t koff = 0; for (idx_t k = 0; k < K; ++k) { ... ... @@ -577,7 +576,6 @@ binary_matrix& mul_AtB(const binary_matrix& A, const binary_matrix& B, binary_ma const idx_t K = A.rows; const idx_t M = A.cols; const idx_t Ns = B.blocks_per_row; C.clear(); for (idx_t k = 0; k < K; ++k) { block_t mask = MSB; block_t ioff = 0; ... ... @@ -614,7 +612,7 @@ binary_matrix& mul_ABt(const binary_matrix& A, const binary_matrix& B, binary_ma a ^= block_sum(A.get_block(i,k) & B.get_block(j,k)); // pA[k] & pB[k]); } //std::cout << "i=" << i << " j=" << j << " C(i,j)=" << a << std::endl; C.set(i,j,a); C.set(i,j,C.get(i,j) ^ a); } } return C; ... ...
 ... ... @@ -25,8 +25,8 @@ idx_t learn_model_traditional(binary_matrix& X, clock_t tic = clock(); clock_t t0 = clock(); const idx_t me = get_max_err_weight(); X.copy_to(E); mul(A,false,D,false,E); add(E,X,E); // // not memory efficient; good for studying algorithm // ... ... @@ -109,8 +109,8 @@ idx_t learn_model_alter1(binary_matrix& X, const idx_t ma = K; const idx_t me = 0; X.copy_to(E); mul(A,false,D,false,E); add(E,X,E); binary_matrix Dt(M,K); binary_matrix At(K,N); binary_matrix Et(M,N); ... ... @@ -180,9 +180,8 @@ idx_t learn_model_alter2(binary_matrix& X, const idx_t K = D.get_rows(); const idx_t ma = K; const idx_t me = 0; X.copy_to(E); mul(A,false,D,false,E); add(E,X,E); binary_matrix Dt(M,K); binary_matrix At(K,N); binary_matrix Et(M,N); ... ... @@ -258,8 +257,8 @@ idx_t learn_model_alter3(binary_matrix& X, const idx_t K = D.get_rows(); const idx_t ma = K; const idx_t me = 0; mul(A,false,D,false,E); add(E,X,E); X.copy_to(E); mul(A,false,D,false,E); binary_matrix Dt(M,K); binary_matrix At(K,N); ... ... @@ -454,8 +453,8 @@ idx_t learn_model_mdl_backward_selection(binary_matrix& X, const idx_t N = E.get_rows(); idx_t K = D.get_rows(); learn_model_inner(X,H,E,D,A); //mul(A,false,D,false,E); //add(E,X,E); X.copy_to(E); //mul(A,false,D,false,E); codelength Lparts = model_codelength(E,D,A); idx_t bestL = Lparts.X; idx_t bestK = K; ... ...
 ... ... @@ -23,7 +23,8 @@ mi_algorithm_t mi_algorithm_catalog[] = { initialize_dictionary_hamming, initialize_dictionary_random, initialize_dictionary_samples, initialize_dictionary_association, initialize_dictionary_coverage, initialize_dictionary_normalized_coverage, 0}; const char* mi_algorithm_names[] = { ... ... @@ -34,7 +35,8 @@ const char* mi_algorithm_names[] = { "Hamming basis", "purely random dictionary", "random samples from data", "Association initialization", "Coverage initialization", "Normalized coverage initialization", 0 }; ... ... @@ -43,7 +45,7 @@ es_algorithm_t es_algorithm_catalog[] = { coefficients_update_basic, coefficients_update_bmp, coefficients_update_bmp_omp, coefficients_update_lh, coefficients_update_greedy_global, 0}; const char* es_algorithm_names[] = { ... ... @@ -51,7 +53,7 @@ const char* es_algorithm_names[] = { "Basic coefficients update", "BMP coefficients update", "BMP coefficients update (OpenMP)", "Least Hamming coefficients update", "Global greedy Hamming error reduction", 0 }; ... ... @@ -84,13 +86,27 @@ const char* lm_algorithm_names[] = {"Model learning by traditional alternate des "MDL/full search" }; static int count_options(const char* names[]) { int n = 0; while (names[n]) { n++; } return n; } void learn_model_setup(int mi_algo, int es_algo, int du_algo, int lm_algo, int lmi_algo) { if (mi_algo > 7) { std::cerr << "Invalid model initialization algorithm (0-" << 7 << ')' << std::endl; exit(-1); } if (es_algo > 4) { std::cerr << "Invalid coefficients update algorithm (0-" << 4 << ')' << std::endl; exit(-1); } if (du_algo > 3) { std::cerr << "Invalid dictionary update algorithm (0-" << 3 << ')' << std::endl; exit(-1); } if (lm_algo > 6) { std::cerr << "Invalid model learning algorithm (0-" << 6 << ')' << std::endl; exit(-1); } if (lmi_algo > 3) { std::cerr << "Invalid inner model learning algorithm (0-" << 3 << ')' << std::endl; exit(-1); } void learn_model_setup(int mi_algo, int es_algo, int du_algo, int lm_algo, int lmi_algo) { int n; n = count_options(mi_algorithm_names); if (mi_algo > n) { std::cerr << "Invalid model initialization algorithm (0-" << n << ')' << std::endl; exit(-1); } n = count_options(es_algorithm_names); if (es_algo > n) { std::cerr << "Invalid coefficients update algorithm (0-" << n << ')' << std::endl; exit(-1); } n = count_options(du_algorithm_names); if (du_algo > n) { std::cerr << "Invalid dictionary update algorithm (0-" << n << ')' << std::endl; exit(-1); } n = count_options(lm_algorithm_names); if (lm_algo > n) { std::cerr << "Invalid model learning algorithm (0-" << n << ')' << std::endl; exit(-1); } n = count_options(lm_algorithm_names); if (lmi_algo > n) { std::cerr << "Invalid inner model learning algorithm (0-" << n << ')' << std::endl; exit(-1); } initialize_dictionary = mi_algorithm_catalog[mi_algo]; std::cout << "Using " << mi_algorithm_names[mi_algo] << std::endl; ... ...
 ... ... @@ -177,6 +177,7 @@ int main(int argc, char **argv) { X.copy_to(E); // at this point X contains the residual std::cout << "Average residual weight=" << (double)E.weight()/(double)rows << std::endl; std::cout << "Average coefficients weight=" << (double)A.weight()/(double)rows << std::endl; X.clear(); mul(A,false,D,false,X); // now X is the estimated denoised signal // // 3. write output ... ...
 ... ... @@ -33,14 +33,14 @@ void initialize_dictionary_random(const binary_matrix& E, A.clear(); } /// computes an association score for each row i /// computes an coverage score for each row i /// s_i = \sum_{j neq i} C(i,j) where C(i,j) = h(E_i AND E_j) (*) /// and then chooses the K rows with the highest s_i /// as initial dictionary atoms. /// /// (*) actually, original version is normalized: C(i,j) = h(E_i AND E_j) / h(E_i) /// void initialize_dictionary_association(const binary_matrix& E, void initialize_dictionary_coverage(const binary_matrix& E, const binary_matrix& H, binary_matrix& D, binary_matrix& A) { ... ... @@ -78,6 +78,64 @@ void initialize_dictionary_association(const binary_matrix& E, A.clear(); } /// computes an coverage score for each row i /// s_i = \sum_{j neq i} C(i,j) where C(i,j) = h(E_i AND E_j) / h(E_i) /// and then chooses the K rows with the highest s_i /// as initial dictionary atoms. void initialize_dictionary_normalized_coverage(const binary_matrix& E, const binary_matrix& H, binary_matrix& D, binary_matrix& A) { const idx_t K = D.get_rows(); const idx_t M = D.get_cols(); const idx_t N = E.get_rows(); binary_matrix rowi,rowj,iandj; rowi.allocate(1, M); rowj.allocate(1, M); iandj.allocate ( 1, M ); aux_t* scores = new aux_t[N]; idx_t* weights = new idx_t[N]; idx_t max_w = 0; for (idx_t i = 0; i < N; i++) { weights[i] = E.row_weight(i); if (weights[i] > max_w) max_w = weights[i]; } for (idx_t i = 0; i < N; i++) { scores[i].second = i; // original index scores[i].first = 0; // score if (!weights[i]) { // i is all zeroes: ignore continue; } E.copy_row_to(i,rowi); for (idx_t j = 0; j < N; j++) { if ( (j == i) || !weights[j] ) { // i=j or row j is all zeroes continue; } E.copy_row_to(j,rowj); bool_and(rowi,rowj,iandj); scores[i].first += iandj.weight(); } scores[i].first = scores[i].first * max_w / weights[i]; //integer is ok } counting_sort(scores,N); for (idx_t i = 0; i < K; i++) { std::cout << i << ":" << scores[N-i-1].first << " " << scores[N-i-1].second << std::endl; E.copy_row_to(scores[N-i-1].second, rowi); D.set_row(i,rowi); } delete[] weights; delete[] scores; rowi.destroy(); rowj.destroy(); iandj.destroy(); A.clear(); } /** Haar basis **/ void initialize_dictionary_haar(const binary_matrix& E, const binary_matrix& H, ... ...
 ... ... @@ -4,15 +4,27 @@ #include "binmat.h" /** * Initialize dictionary to the K samples that are more similar to * the rest of the rows. * Initialize dictionary to the K samples that cover most of * the ones in the rest of the data. This does not take * into account the weight of the sample, so an all 1's will * always win here. */ void initialize_dictionary_association(const binary_matrix& E, void initialize_dictionary_coverage(const binary_matrix& E, const binary_matrix& H, binary_matrix& D, binary_matrix& A); /** * Initialize dictionary to the K samples that cover most of the ones * in the rest of the data, taking into account their relative weight, * so that candidates that are too dense are discouraged. */ void initialize_dictionary_normalized_coverage(const binary_matrix& E, const binary_matrix& H, binary_matrix& D, binary_matrix& A); /** * Each atom $d_k$ is initialized using a randomly drawn sample from $X$, * $c_k$. Then $d_k$ is computed as the Hamming average of all samples $x$ ... ...
 ... ... @@ -155,6 +155,7 @@ int main(int argc, char **argv) { X.copy_to(E); // at this point X contains the residual std::cout << "Average residual weight=" << (double)E.weight()/(double)rows << std::endl; std::cout << "Average coefficients weight=" << (double)A.weight()/(double)rows << std::endl; X.clear(); mul(A,false,D,false,X); // now X is the estimated denoised signal // // 3. write output ... ...
 ... ... @@ -253,8 +253,8 @@ int main(int argc, char **argv) { if (force_residual_mosaic) { render_mosaic(E,"__final_residual_mosaic__.pbm"); } X.copy_to(E); mul(A,false,D,false,E); add(E,X,E); std::cout << "FINAL: h(E)=" << E.weight() << " h(A)=" << A.weight() << " h(D)=" << D.weight() ... ...
 ... ... @@ -735,13 +735,77 @@ idx_t coefficients_update_missing_data_omp(binary_matrix& E, * max_a_weight is the maximum allowed number of ones in any row of A * max_e_weight is the maximum allowed number of ones in any row of E */ idx_t coefficients_update_lh(binary_matrix& E, idx_t coefficients_update_greedy_global(binary_matrix& E, const binary_matrix& H, const binary_matrix& D, binary_matrix& A, const idx_t max_a_weight, const idx_t max_e_weight) { const idx_t K = D.get_rows(); const idx_t N = E.get_rows(); const idx_t M = D.get_cols(); binary_matrix Dk, Ak, Abest, Ei; A.clear(); Dk.allocate(1,M); Ei.allocate(1,M); Ak.allocate(1,N); Abest.allocate(1,N); unsigned char* used = new unsigned char[K]; memset(used,0,K*sizeof(unsigned char)); idx_t unused = K; idx_t prev_score = M*N+1; while (unused) { idx_t best_score = prev_score; std::cout << "unused=" << unused << "prev score=" << prev_score << std::endl; idx_t best_k = K+1; for (idx_t k = 0; k < K; k++) { if (used[k]) continue; idx_t score = 0; D.copy_row_to(k,Dk); if (!Dk.weight()) { continue; } for (idx_t i = 0; i < N; i++) { E.copy_row_to(i,Ei); idx_t w0 = Ei.weight(); add(Dk,Ei,Ei); idx_t w1 = Ei.weight(); if (w0 > w1) { Ak.set(i); score += w1; } else { Ak.clear(i); score += w0; } } if (score < best_score) { best_score = score; // new best score best_k = k; // this is so far the best cand. Ak.copy_to(Abest); // save its companion A^k } } if (best_k == K+1) { std::cout << "No further improvement." << std::endl; break; } // we've got our winner // add it to solution and set its corresponding column A.set_col(best_k,Abest); // update error matrix D.copy_col_to(best_k, Dk); std::cout << "|Dk|=" << Dk.weight() << " |Ak|="<< Abest.weight() << "|E|=" << E.weight() << " best_k=" << best_k << " score=" << best_score << " unused=" << unused << std::endl; mul(Abest,true,Dk,false,E); std::cout << "|E|=" << E.weight() << " best_k=" << best_k << " score=" << best_score << " unused=" << unused << std::endl; used[best_k] = 1; unused--; } Abest.destroy(); Ak.destroy(); Ei.destroy(); Dk.destroy(); delete[] used; return 0; } ... ...
 ... ... @@ -61,26 +61,27 @@ idx_t coefficients_update_omp(binary_matrix& E, const idx_t max_a_weight, const idx_t max_e_weight); /** * Given a current error E, dictionary D and coefficients A, update E and A. * This algorithm seeks to minimize the Least Hamming Error (LH) * by repeatedly adding a pair (d_k,a_k) each time to the approximation so * that the error is minimal w.r.t all other possible atoms d_k' != d_k. * (note that, given an atom d_k, the choice of a_k is deterministic, so there * are only k choices at each iteration). * * Given a current error E, dictionary D and coefficients A, update coefficients and error, * so that the total weight of the error E is reduced. * * Instead of working one sample at a time, this algorithm operates on the whole signal, * starting with A = 0 and selecting, at each iteration, the atom in D that minimizes * E XOR A^k D_k * This implies an exhaustive, greedy search on D, but for each D_k, the corresponding A^k * has a closed form, so the algorithm requires O(k^2) iterations. * E = AD is an n x m matrix * D is a p x m matrix, where each row is an atom of dimension m * A is a n x p matrix, where each row contains the coefficients for representing the corresponding row of X=AD+E * max_a_weight is the maximum allowed number of ones in any row of A * max_e_weight is the maximum allowed number of ones in any row of E */ idx_t coefficients_update_lh(binary_matrix& E, const binary_matrix& H, const binary_matrix& D, binary_matrix& A, const idx_t max_a_weight, const idx_t max_e_weight); idx_t coefficients_update_greedy_global(binary_matrix& E, const binary_matrix& H, const binary_matrix& D, binary_matrix& A, const idx_t max_a_weight, const idx_t max_e_weight); #endif
 ... ... @@ -31,8 +31,8 @@ int main(int argc, char **argv) { binary_matrix A(N,K); binary_matrix E(N,M); binary_matrix V(1,M), P(sqrt(M),sqrt(M)); X.copy_to(E); mul(A,false,D,false,E); add(E,X,E); initialize_model(X,D,A); std::cout << "M=" << M << " N=" << N << " K=" << K << std::endl; ... ...
 ... ... @@ -31,8 +31,8 @@ int main(int argc, char **argv) { binary_matrix A(N,K); binary_matrix E(N,M); binary_matrix V(1,M), P(sqrt(M),sqrt(M)); X.copy_to(E); mul(A,false,D,false,E); add(E,X,E); initialize_model(X,D,A); std::cout << "M=" << M << " N=" << N << " K=" << K << std::endl; ... ...
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!