Skip to content

Commit b6f8148

Browse files
committed
Add robin_hood maps
1 parent 5e48429 commit b6f8148

22 files changed

Lines changed: 2405 additions & 50 deletions

BMEAN

src/CONSENT-correction.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,22 @@
1313
#include "../CTPL/ctpl_stl.h"
1414

1515
std::mutex outMtx;
16-
std::unordered_map<std::string, std::vector<bool>> readIndex;
16+
robin_hood::unordered_map<std::string, std::vector<bool>> readIndex;
1717
bool doTrimRead = true;
1818

1919
std::pair<std::string, std::string> processRead(int id, std::vector<Overlap>& alignments, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors,unsigned solidThresh, unsigned windowOverlap, unsigned maxMSA, std::string path) {
2020
std::string readId = alignments.begin()->qName;
21-
std::unordered_map<std::string, std::string> sequences = getSequencesMap(alignments, readIndex);
21+
robin_hood::unordered_map<std::string, std::string> sequences = getSequencesMap(alignments, readIndex);
2222
std::vector<std::pair<unsigned, unsigned>> pilesPos = getAlignmentWindowsPositions(alignments.begin()->qLength, alignments, minSupport, maxSupport, windowSize, windowOverlap);
2323
if (pilesPos.size() == 0) {
2424
return std::make_pair(readId, "");
2525
}
2626
unsigned i = 0;
2727

2828
// Compute consensuses for all the piles
29-
std::pair<std::string, std::unordered_map<kmer, unsigned>> resCons;
29+
std::pair<std::string, robin_hood::unordered_map<kmer, unsigned>> resCons;
3030
std::vector<std::string> consensuses(pilesPos.size());
31-
std::vector<std::unordered_map<kmer, unsigned>> merCounts(pilesPos.size());
31+
std::vector<robin_hood::unordered_map<kmer, unsigned>> merCounts(pilesPos.size());
3232
std::vector<std::string> curPile;
3333
std::vector<std::string> templates(pilesPos.size());
3434
for (i = 0; i < pilesPos.size(); i++) {

src/CONSENT-correction.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
#include <map>
1515
#include "utils.h"
1616
#include "Overlap.h"
17+
#include "robin_hood.h"
1718

1819
std::pair<std::string, std::string> processRead(int id, std::vector<Overlap>& alignments, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned maxMSA, std::string path);
1920

20-
void runCorrection(std::string PAFIndex, std::string alignmentFile, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned nbThreads, std::string readsFile, std::string proofFile, unsigned maxMSA, std::string path);
21+
void runCorrection(std::string PAFIndex, std::string alignmentFile, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned nbThreads, std::string readsFile, std::string proofFile, unsigned maxMSA, std::string path);

src/CONSENT-polishing.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,21 @@
1515
#include "../CTPL/ctpl_stl.h"
1616

1717
std::mutex outMtx;
18-
std::unordered_map<std::string, std::vector<bool>> readIndex;
18+
robin_hood::unordered_map<std::string, std::vector<bool>> readIndex;
1919
bool doTrimRead = false;
2020

2121
std::pair<std::string, std::string> processContig(std::vector<Overlap>& alignments, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors,unsigned solidThresh, unsigned windowOverlap, unsigned maxMSA, std::string path, unsigned nbThreads) {
2222
std::string readId = alignments.begin()->qName;
23-
std::unordered_map<std::string, std::string> sequences = getSequencesMap(alignments, readIndex);
23+
robin_hood::unordered_map<std::string, std::string> sequences = getSequencesMap(alignments, readIndex);
2424
std::vector<std::pair<unsigned, unsigned>> pilesPos = getAlignmentWindowsPositions(alignments.begin()->qLength, alignments, minSupport, maxSupport, windowSize, windowOverlap);
2525
if (pilesPos.size() == 0) {
2626
return std::make_pair(readId, "");
2727
}
2828

2929
// Compute consensuses for all the piles
30-
std::pair<std::string, std::unordered_map<kmer, unsigned>> resCons;
30+
std::pair<std::string, robin_hood::unordered_map<kmer, unsigned>> resCons;
3131
std::vector<std::string> consensuses(pilesPos.size());
32-
std::vector<std::unordered_map<kmer, unsigned>> merCounts(pilesPos.size());
32+
std::vector<robin_hood::unordered_map<kmer, unsigned>> merCounts(pilesPos.size());
3333
std::vector<std::string> curPile;
3434
std::vector<std::string> templates(pilesPos.size());
3535

@@ -42,7 +42,7 @@ std::pair<std::string, std::string> processContig(std::vector<Overlap>& alignmen
4242
std::string curTpl;
4343

4444
// Load the first jobs
45-
vector<std::future<std::pair<std::string, std::unordered_map<kmer, unsigned>>>> results(poolSize);
45+
vector<std::future<std::pair<std::string, robin_hood::unordered_map<kmer, unsigned>>>> results(poolSize);
4646
while (jobsLoaded < poolSize && jobsLoaded < jobsToProcess) {
4747
curPile = getAlignmentWindowsSequences(alignments, minSupport, windowSize, windowOverlap, sequences, pilesPos[jobsLoaded].first, pilesPos[jobsLoaded].second, merSize, maxSupport, commonKMers);
4848
templates[jobsLoaded] = curPile[0];
@@ -52,7 +52,7 @@ std::pair<std::string, std::string> processContig(std::vector<Overlap>& alignmen
5252

5353
// Load the remaining jobs as other jobs terminate
5454
int curJob = 0;
55-
std::pair<std::string, std::unordered_map<kmer, unsigned>> curRes;
55+
std::pair<std::string, robin_hood::unordered_map<kmer, unsigned>> curRes;
5656
while(jobsLoaded < jobsToProcess) {
5757
// Get the job results
5858
curRes = results[curJob].get();

src/CONSENT-polishing.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
#include <map>
1717
#include "utils.h"
1818
#include "Overlap.h"
19+
#include "robin_hood.h"
1920

2021
std::pair<std::string, std::string> processContig(std::vector<Overlap>& alignments, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned maxMSA, std::string path, unsigned nbThreads);
2122

22-
void runCorrection(std::string PAFIndex, std::string alignmentFile, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned nbThreads, std::string readsFile, std::string proofFile, unsigned maxMSA, std::string path);
23+
void runCorrection(std::string PAFIndex, std::string alignmentFile, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned nbThreads, std::string readsFile, std::string proofFile, unsigned maxMSA, std::string path);

src/DBG.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ std::string concatNucR(std::string f, int i) {
1515
}
1616
}
1717

18-
std::vector<std::string> getNeighbours(std::string kMer, unsigned merSize, int left, std::unordered_map<kmer, unsigned> merCounts, unsigned solidThresh) {
18+
std::vector<std::string> getNeighbours(std::string kMer, unsigned merSize, int left, robin_hood::unordered_map<kmer, unsigned> merCounts, unsigned solidThresh) {
1919
std::vector<std::string> neighbours;
2020
std::string f, n, t = "";
2121
kmer k;
@@ -53,7 +53,7 @@ std::vector<std::string> getNeighbours(std::string kMer, unsigned merSize, int l
5353
return neighbours;
5454
}
5555

56-
unsigned extendLeft(std::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh) {
56+
unsigned extendLeft(robin_hood::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh) {
5757
vector<string> neighbours;
5858
vector<string>::iterator it;
5959
unsigned dist = 0;
@@ -74,7 +74,7 @@ unsigned extendLeft(std::unordered_map<kmer, unsigned> merCounts, unsigned curK,
7474
return dist;
7575
}
7676

77-
unsigned extendRight(std::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh) {
77+
unsigned extendRight(robin_hood::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh) {
7878
vector<string> neighbours;
7979
vector<string>::iterator it;
8080
unsigned dist = 0;
@@ -96,7 +96,7 @@ unsigned extendLeft(std::unordered_map<kmer, unsigned> merCounts, unsigned curK,
9696
}
9797

9898

99-
int link(std::unordered_map<kmer, unsigned> merCounts, std::string srcSeed, std::string tgtSeed, unsigned curK, std::set<std::string> &visited, unsigned* curBranches, unsigned dist, std::string curExt, std::string &missingPart, unsigned merSize, unsigned LRLen, unsigned maxBranches, unsigned solidThresh, unsigned minOrder) {
99+
int link(robin_hood::unordered_map<kmer, unsigned> merCounts, std::string srcSeed, std::string tgtSeed, unsigned curK, std::set<std::string> &visited, unsigned* curBranches, unsigned dist, std::string curExt, std::string &missingPart, unsigned merSize, unsigned LRLen, unsigned maxBranches, unsigned solidThresh, unsigned minOrder) {
100100
if (curK < minOrder || *curBranches > maxBranches || dist > LRLen) {
101101
missingPart = std::string();
102102
return 0;

src/DBG.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
#include <unordered_map>
77
#include "../BMEAN/utils.h"
88
#include "reverseComplement.h"
9+
#include "robin_hood.h"
910

1011
using namespace std;
1112

12-
unsigned extendLeft(std::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh);
13+
unsigned extendLeft(robin_hood::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh);
1314

14-
unsigned extendRight(std::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh);
15+
unsigned extendRight(robin_hood::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh);
1516

16-
int link(std::unordered_map<kmer, unsigned> mapMerCounts, std::string srcSeed, std::string tgtSeed, unsigned curK, std::set<std::string> &visited, unsigned* curBranches, unsigned dist, std::string curExt, std::string &missingPart, unsigned merSize, unsigned LRLen, unsigned maxBranches, unsigned solidThresh, unsigned minOrder);
17+
int link(robin_hood::unordered_map<kmer, unsigned> mapMerCounts, std::string srcSeed, std::string tgtSeed, unsigned curK, std::set<std::string> &visited, unsigned* curBranches, unsigned dist, std::string curExt, std::string &missingPart, unsigned merSize, unsigned LRLen, unsigned maxBranches, unsigned solidThresh, unsigned minOrder);

src/OLDCONSENT.h

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#include <mutex>
2+
#include <future>
3+
#include <fstream>
4+
#include <sstream>
5+
#include <utility>
6+
#include <vector>
7+
#include <set>
8+
#include <algorithm>
9+
#include <string>
10+
#include <utility>
11+
#include <vector>
12+
#include <algorithm>
13+
#include <string>
14+
#include <iostream>
15+
#include <unistd.h>
16+
#include <map>
17+
#include "../BMEAN/utils.h"
18+
#include "../CTPL/ctpl_stl.h"
19+
#include "robin_hood.h"
20+
21+
struct POASeq {
22+
std::string seq;
23+
int beg;
24+
int end;
25+
26+
bool operator<(const POASeq& s2) const {
27+
if (beg < s2.beg) {
28+
return true;
29+
} else if (beg == s2.beg and end < s2.end) {
30+
return true;
31+
} else {
32+
return false;
33+
}
34+
}
35+
36+
POASeq() {
37+
38+
}
39+
40+
POASeq(std::string s, int b, int e) {
41+
seq = s;
42+
beg = b;
43+
end = e;
44+
}
45+
};
46+
47+
std::string polishCorrection(std::string correctedRead, robin_hood::unordered_map<kmer, unsigned>& merCounts, unsigned merSize, int solidThresh);
48+
49+
// std::vector<std::pair<std::string, std::string>> polishCorrection(std::string correctedRead, std::vector<std::pair<std::pair<int, int>, int>>& corPosPiles, std::vector<std::vector<std::string>>& piles, robin_hood::unordered_map<std::string, unsigned>& pilesMers, unsigned merSize, int solidThresh, int minGap, int maxGap);
50+
51+
// std::vector<std::pair<std::string, std::string>> polishCorrection(std::string correctedRead, std::vector<std::pair<std::pair<int, int>, int>>& corPosPiles, std::vector<std::vector<std::string>>& piles, unsigned merSize, int solidThresh, int minGap, int maxGap);
52+
53+
void removeBadSequences(std::vector<std::string>& sequences, std::string tplSeq, robin_hood::unordered_map<std::string, unsigned>& merCounts, unsigned merSize, unsigned commonKMers, unsigned solidThresh, unsigned windowSize);
54+
55+
std::string alignConsensuses(std::string rawRead, std::string sequence, std::vector<std::string>& consensuses, std::vector<robin_hood::unordered_map<kmer, unsigned>>& merCounts, std::vector<std::pair<unsigned, unsigned>>& pilesPos, std::vector<std::string>& templates, int startPos, unsigned windowSize, unsigned windowOverlap, unsigned solidThresh, unsigned merSize);
56+
57+
void processReads(std::vector<std::vector<std::string>>& reads, unsigned minSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned solidThresh, unsigned windowOverlap, std::string path);
58+
59+
void runCorrection(std::string PAFIndex, std::string alignmentFile, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned nbThreads, std::string readsFile, std::string proofFile, unsigned maxMSA, std::string path);

src/Overlap.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <sstream>
55
#include <iostream>
6+
#include "robin_hood.h"
67

78
struct Overlap {
89
std::string qName;
@@ -88,4 +89,4 @@ struct Overlap {
8889

8990
};
9091

91-
#endif
92+
#endif

src/alignmentPiles.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
#include <iostream>
33
#include "alignmentPiles.h"
44

5-
std::unordered_map<std::string, std::string> getSequencesMap(std::vector<Overlap>& alignments, std::unordered_map<std::string, std::vector<bool>>& readIndex) {
6-
std::unordered_map<std::string, std::string> sequences;
5+
robin_hood::unordered_map<std::string, std::string> getSequencesMap(std::vector<Overlap>& alignments, robin_hood::unordered_map<std::string, std::vector<bool>>& readIndex) {
6+
robin_hood::unordered_map<std::string, std::string> sequences;
77
std::string header, seq;
88

99
// Insert template sequence

0 commit comments

Comments
 (0)