@@ -195,23 +195,23 @@ namespace {
195195 }
196196
197197
198- inline auto hash_insert (unsigned int const amp) -> bool {
198+ inline auto hash_insert (Hashtable & hash_table, unsigned int const amp) -> bool {
199199 /* find the first empty bucket */
200200 const auto hash = db_gethash (amp);
201- auto index = hash_getindex (hash);
201+ auto index = hash_table. getindex (hash);
202202 auto has_duplicate = false ;
203- while (hash_is_occupied (index)) {
204- auto const is_same_amplicon = hash_compare_value (index, hash) and
205- check_amp_identical (amp, hash_get_data (index));
203+ while (hash_table. is_occupied (index)) {
204+ auto const is_same_amplicon = hash_table. compare_value (index, hash) and
205+ check_amp_identical (amp, hash_table. get_data (index));
206206 if (is_same_amplicon) {
207207 has_duplicate = true ;
208208 }
209- index = hash_getnextindex (index);
209+ index = hash_table. getnextindex (index);
210210 }
211211
212- hash_set_occupied (index);
213- hash_set_value (index, hash);
214- hash_set_data (index, amp);
212+ hash_table. set_occupied (index);
213+ hash_table. set_value (index, hash);
214+ hash_table. set_data (index, amp);
215215 bloomflex_set (bloom_a, hash);
216216
217217 return has_duplicate;
@@ -347,7 +347,8 @@ namespace {
347347 }
348348
349349
350- auto hash_check_attach (char * seed_sequence,
350+ auto hash_check_attach (Hashtable const & hash_table,
351+ char * seed_sequence,
351352 unsigned int seed_seqlen,
352353 struct var_s & var,
353354 unsigned int seed,
@@ -357,16 +358,16 @@ namespace {
357358
358359 /* compute hash and corresponding hash table index */
359360 const auto hash = var.hash ;
360- auto index = hash_getindex (hash);
361+ auto index = hash_table. getindex (hash);
361362
362363 /* find matching buckets */
363364
364- while (hash_is_occupied (index))
365+ while (hash_table. is_occupied (index))
365366 {
366- if (hash_compare_value (index, hash))
367+ if (hash_table. compare_value (index, hash))
367368 {
368369 /* check that mass is below threshold */
369- const auto amp = hash_get_data (index);
370+ const auto amp = hash_table. get_data (index);
370371
371372 /* make absolutely sure sequences are identical */
372373 auto const * amp_sequence = db_getsequence (amp);
@@ -377,13 +378,14 @@ namespace {
377378 return true ;
378379 }
379380 }
380- index = hash_getnextindex (index);
381+ index = hash_table. getnextindex (index);
381382 }
382383 return false ;
383384 }
384385
385386
386- inline auto check_heavy_var_2 (std::vector<char >& seq,
387+ inline auto check_heavy_var_2 (Hashtable const & hash_table,
388+ std::vector<char >& seq,
387389 unsigned int seqlen,
388390 unsigned int seed,
389391 std::vector<struct var_s >& variant_list,
@@ -399,7 +401,7 @@ namespace {
399401
400402 for (auto i = 0U ; i < variant_count; ++i) {
401403 if (bloomflex_get (bloom_a, variant_list[i].hash ) and
402- hash_check_attach (seq.data (), seqlen, variant_list[i], seed, graft_state)) {
404+ hash_check_attach (hash_table, seq.data (), seqlen, variant_list[i], seed, graft_state)) {
403405 ++matches;
404406 }
405407 }
@@ -408,7 +410,8 @@ namespace {
408410 }
409411
410412
411- auto check_heavy_var (struct bloomflex_s * bloom,
413+ auto check_heavy_var (Hashtable const & hash_table,
414+ struct bloomflex_s * bloom,
412415 std::vector<char >& varseq,
413416 unsigned int seed,
414417 uint64_t & number_of_matches,
@@ -452,7 +455,8 @@ namespace {
452455 auto varlen = 0U ;
453456 generate_variant_sequence (sequence, seqlen,
454457 var, varseq, varlen);
455- matches += check_heavy_var_2 (varseq,
458+ matches += check_heavy_var_2 (hash_table,
459+ varseq,
456460 varlen,
457461 seed,
458462 variant_list2,
@@ -466,6 +470,7 @@ namespace {
466470
467471
468472 auto check_heavy_thread (struct Parameters const & parameters,
473+ Hashtable const & hash_table,
469474 int64_t nth_thread,
470475 struct Heavy_state & heavy_state,
471476 struct Graft_state & graft_state,
@@ -500,7 +505,7 @@ namespace {
500505 lock.unlock ();
501506 uint64_t number_of_matches {0 };
502507 uint64_t number_of_variants {0 };
503- check_heavy_var (bloom_f, buffer1, heavy_amplicon_id,
508+ check_heavy_var (hash_table, bloom_f, buffer1, heavy_amplicon_id,
504509 number_of_matches, number_of_variants,
505510 variant_list, variant_list2,
506511 graft_state);
@@ -511,7 +516,8 @@ namespace {
511516 }
512517
513518
514- auto mark_light_var (struct bloomflex_s * bloom,
519+ auto mark_light_var (Hashtable & hash_table,
520+ struct bloomflex_s * bloom,
515521 unsigned int seed,
516522 std::vector<struct var_s >& variant_list) -> uint64_t
517523 {
@@ -522,7 +528,7 @@ namespace {
522528 seed is the original seed
523529 */
524530
525- hash_insert (seed);
531+ hash_insert (hash_table, seed);
526532
527533 auto const * sequence = db_getsequence (seed);
528534 const auto seqlen = db_getsequencelen (seed);
@@ -538,6 +544,7 @@ namespace {
538544
539545
540546 auto mark_light_thread (struct Parameters const & parameters,
547+ Hashtable & hash_table,
541548 int64_t nth_thread,
542549 struct Light_state & state,
543550 struct Progress_status & progress) -> void
@@ -564,7 +571,7 @@ namespace {
564571 {
565572 progress_update (progress, ++state.progress ); // refactoring: separate operations?
566573 lock.unlock ();
567- const auto variant_count = mark_light_var (bloom_f, light_amplicon_id,
574+ const auto variant_count = mark_light_var (hash_table, bloom_f, light_amplicon_id,
568575 variant_list);
569576 lock.lock ();
570577 state.variants += variant_count;
@@ -577,6 +584,7 @@ namespace {
577584
578585
579586 inline auto find_variant_matches (struct Parameters const & parameters,
587+ Hashtable const & hash_table,
580588 unsigned int seed,
581589 struct var_s & var,
582590 std::vector<unsigned int >& hits_data,
@@ -588,15 +596,15 @@ namespace {
588596
589597 /* compute hash and corresponding hash table index */
590598
591- auto index = hash_getindex (var.hash );
599+ auto index = hash_table. getindex (var.hash );
592600
593601 /* find matching buckets */
594602
595- while (hash_is_occupied (index))
603+ while (hash_table. is_occupied (index))
596604 {
597- if (hash_compare_value (index, var.hash ))
605+ if (hash_table. compare_value (index, var.hash ))
598606 {
599- const auto amp = hash_get_data (index);
607+ const auto amp = hash_table. get_data (index);
600608
601609 /* avoid self */
602610 if (seed != amp) {
@@ -620,12 +628,13 @@ namespace {
620628 }
621629 }
622630 }
623- index = hash_getnextindex (index);
631+ index = hash_table. getnextindex (index);
624632 }
625633 }
626634
627635
628636 auto check_variants (struct Parameters const & parameters,
637+ Hashtable const & hash_table,
629638 unsigned int seed,
630639 std::vector<struct var_s > & variant_list,
631640 std::vector<unsigned int >& hits_data) -> unsigned int
@@ -640,17 +649,18 @@ namespace {
640649 // C++17 refactoring:
641650 // std::for_each_n(variant_list.begin(), variant_count,
642651 // [seed, &hits_data, &hits_count](auto& variant) {
643- // find_variant_matches(parameters, seed, variant, hits_data, hits_count);
652+ // find_variant_matches(parameters, hash_table, seed, variant, hits_data, hits_count);
644653 // });
645654 for (auto i = 0U ; i < variant_count; ++i) {
646- find_variant_matches (parameters, seed, variant_list[i], hits_data, hits_count);
655+ find_variant_matches (parameters, hash_table, seed, variant_list[i], hits_data, hits_count);
647656 }
648657
649658 return hits_count;
650659 }
651660
652661
653662 auto network_thread (struct Parameters const & parameters,
663+ Hashtable const & hash_table,
654664 int64_t nth_thread,
655665 struct Network_state & state,
656666 struct Progress_status & progress) -> void
@@ -673,7 +683,7 @@ namespace {
673683
674684 lock.unlock ();
675685
676- const auto hits_count = check_variants (parameters, amp, variant_list, hits_data);
686+ const auto hits_count = check_variants (parameters, hash_table, amp, variant_list, hits_data);
677687 lock.lock ();
678688
679689 assert (amp <= std::numeric_limits<std::ptrdiff_t >::max ());
@@ -1150,13 +1160,8 @@ auto algo_d1_run(struct Parameters const & parameters) -> void
11501160
11511161
11521162 /* compute hash for all amplicons and store them in a hash table */
1153- std::vector<unsigned char > hash_occupied_v;
1154- std::vector<uint64_t > hash_values_v;
1155- std::vector<unsigned int > hash_data_v;
1156- const auto hashtablesize = hash_alloc (amplicons,
1157- hash_occupied_v,
1158- hash_values_v,
1159- hash_data_v);
1163+ Hashtable hash_table;
1164+ const auto hashtablesize = hash_table.allocate (amplicons);
11601165 static constexpr unsigned int amplicon_pattern_shift {10 };
11611166 static constexpr unsigned int amplicon_n_hash_functions {8 };
11621167 struct bloomflex_s bloom_filter;
@@ -1169,7 +1174,7 @@ auto algo_d1_run(struct Parameters const & parameters) -> void
11691174 bool has_duplicate {false };
11701175 for (auto k = 0U ; k < amplicons; ++k)
11711176 {
1172- has_duplicate = hash_insert (k);
1177+ has_duplicate = hash_insert (hash_table, k);
11731178 progress_update (progress, k);
11741179 if (has_duplicate) {
11751180 break ;
@@ -1200,8 +1205,8 @@ auto algo_d1_run(struct Parameters const & parameters) -> void
12001205 // refactoring C++14: use std::make_unique
12011206 std::unique_ptr<ThreadRunner> network_tr (new ThreadRunner (
12021207 static_cast <int >(parameters.opt_threads ),
1203- [¶meters, &network_state, &progress](int64_t nth_thread) {
1204- network_thread (parameters, nth_thread, network_state, progress);
1208+ [¶meters, &hash_table, & network_state, &progress](int64_t nth_thread) {
1209+ network_thread (parameters, hash_table, nth_thread, network_state, progress);
12051210 }));
12061211 network_tr->run ();
12071212 }
@@ -1448,7 +1453,7 @@ auto algo_d1_run(struct Parameters const & parameters) -> void
14481453 /* Empty the old hash and bloom filter
14491454 before we reinsert only the light swarm amplicons */
14501455
1451- std::fill (hash_occupied_v. begin (), hash_occupied_v. end (), 0U );
1456+ hash_table. clear ( );
14521457 bloomflex_zap (bloom_filter);
14531458
14541459 progress_init (progress, " Adding light swarm amplicons to Bloom filter" ,
@@ -1465,8 +1470,8 @@ auto algo_d1_run(struct Parameters const & parameters) -> void
14651470 // refactoring C++14: use std::make_unique
14661471 std::unique_ptr<ThreadRunner> light_tr (new ThreadRunner (
14671472 static_cast <int >(parameters.opt_threads ),
1468- [¶meters, &light_state, &progress](int64_t nth_thread) {
1469- mark_light_thread (parameters, nth_thread, light_state, progress);
1473+ [¶meters, &hash_table, & light_state, &progress](int64_t nth_thread) {
1474+ mark_light_thread (parameters, hash_table, nth_thread, light_state, progress);
14701475 }));
14711476 light_tr->run ();
14721477 }
@@ -1492,8 +1497,8 @@ auto algo_d1_run(struct Parameters const & parameters) -> void
14921497 // refactoring C++14: use std::make_unique
14931498 std::unique_ptr<ThreadRunner> heavy_tr (new ThreadRunner (
14941499 static_cast <int >(parameters.opt_threads ),
1495- [¶meters, &heavy_state, &graft_state, &progress](int64_t nth_thread) {
1496- check_heavy_thread (parameters, nth_thread, heavy_state, graft_state, progress);
1500+ [¶meters, &hash_table, & heavy_state, &graft_state, &progress](int64_t nth_thread) {
1501+ check_heavy_thread (parameters, hash_table, nth_thread, heavy_state, graft_state, progress);
14971502 }));
14981503 heavy_tr->run ();
14991504 }
@@ -1523,7 +1528,6 @@ auto algo_d1_run(struct Parameters const & parameters) -> void
15231528 std::fprintf (parameters.logfile , " Max generations: %u\n " , maxgen);
15241529
15251530 bloomflex_exit (bloom_filter);
1526- hash_free ();
15271531
15281532 swarminfo = nullptr ;
15291533 ampinfo = nullptr ;
0 commit comments