Skip to content

Commit a89c71b

Browse files
authored
Merge pull request #157 from ate47/fix_multisection
Fix #104 "java.lang.NegativeArraySizeException parsing Wikidata HDT file" and fix some warnings
2 parents 8ddb098 + a2db323 commit a89c71b

17 files changed

Lines changed: 1163 additions & 404 deletions

hdt-java-core/src/main/java/org/rdfhdt/hdt/compact/integer/VByte.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import java.nio.ByteBuffer;
3535

3636
import org.rdfhdt.hdt.util.Mutable;
37+
import org.rdfhdt.hdt.util.io.BigByteBuffer;
3738
import org.rdfhdt.hdt.util.io.BigMappedByteBuffer;
3839

3940
/**
@@ -142,7 +143,7 @@ public static int encode(byte[] data, int offset, int value) {
142143

143144
return i;
144145
}
145-
146+
146147
public static int decode(byte[] data, int offset, Mutable<Long> value) {
147148
long out = 0;
148149
int i=0;
@@ -157,6 +158,21 @@ public static int decode(byte[] data, int offset, Mutable<Long> value) {
157158
value.setValue(out);
158159
return i;
159160
}
161+
162+
public static int decode(BigByteBuffer data, long offset, Mutable<Long> value) {
163+
long out = 0;
164+
int i = 0;
165+
int shift=0;
166+
while( (0x80 & data.get(offset+i))==0) {
167+
out |= (data.get(offset+i) & 127) << shift;
168+
i++;
169+
shift+=7;
170+
}
171+
out |= (data.get(offset+i) & 127) << shift;
172+
i++;
173+
value.setValue(out);
174+
return i;
175+
}
160176

161177
public static void show(byte[] data, int len) {
162178
for(int i=0;i<len;i++) {

hdt-java-core/src/main/java/org/rdfhdt/hdt/dictionary/DictionaryCat.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
import org.rdfhdt.hdt.dictionary.impl.utilCat.CatMappingBack;
55
import org.rdfhdt.hdt.listener.ProgressListener;
66

7+
import java.io.IOException;
78
import java.util.HashMap;
89

910
public interface DictionaryCat {
10-
void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener listener);
11+
void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener listener) throws IOException;
1112
CatMappingBack getMappingS();
1213
long getNumShared();
1314
HashMap<String, CatMapping> getAllMappings();

hdt-java-core/src/main/java/org/rdfhdt/hdt/dictionary/impl/FourSectionDictionaryCat.java

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,20 +36,18 @@
3636

3737
public class FourSectionDictionaryCat implements DictionaryCat {
3838

39-
private String location;
40-
private int DEFAULT_BLOCK_SIZE = 16;
41-
private int BLOCK_PER_BUFFER = 1000000;
39+
private final HashMap<String,CatMapping> allMappings = new HashMap<>();
40+
private final String location;
4241
private long numShared;
4342

44-
private HashMap<String,CatMapping> allMappings = new HashMap<>();
4543

4644
private CatMappingBack mappingS;
4745

4846
public FourSectionDictionaryCat(String location) {
4947
this.location = location;
5048
}
5149

52-
public void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener listener){
50+
public void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener listener) throws IOException {
5351
allMappings.put("P1",new CatMapping(location,"P1",dictionary1.getPredicates().getNumberOfElements()));
5452
allMappings.put("P2",new CatMapping(location,"P2",dictionary2.getPredicates().getNumberOfElements()));
5553
allMappings.put("S1",new CatMapping(location,"S1",dictionary1.getSubjects().getNumberOfElements()));
@@ -64,7 +62,6 @@ public void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener
6462

6563
int numCommonPredicates = 0;
6664
CatIntersection commonP1P2 = new CatIntersection(new CatWrapper(dictionary1.getPredicates().getSortedEntries(),"P1"),new CatWrapper(dictionary2.getPredicates().getSortedEntries(),"P2"));
67-
long maxPredicates = dictionary1.getPredicates().getNumberOfElements()+dictionary2.getPredicates().getNumberOfElements();
6865
while (commonP1P2.hasNext()){
6966
commonP1P2.next();
7067
numCommonPredicates++;
@@ -76,8 +73,6 @@ public void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener
7673
addPredicatesList.add(new CatWrapper(dictionary1.getPredicates().getSortedEntries(),"P1"));
7774
addPredicatesList.add(new CatWrapper(dictionary2.getPredicates().getSortedEntries(),"P2"));
7875
CatUnion itAddPredicates = new CatUnion(addPredicatesList);
79-
// while (itAddPredicates.hasNext())
80-
// System.out.println(itAddPredicates.next().entity);
8176
SectionUtil.createSection(location,numPredicates, 4,itAddPredicates, new CatUnion(new ArrayList<>()),allMappings,0, listener);
8277
System.out.println("SUBJECTS-------------------");
8378
ArrayList<Iterator<CatElement>> skipSubjectList = new ArrayList<>();
@@ -160,7 +155,6 @@ public void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener
160155
i2.next();
161156
numCommonS1O2++;
162157
}
163-
Iterator<? extends CharSequence> it = dictionary2.getSubjects().getSortedEntries();
164158
i2 = new CatIntersection(new CatWrapper(dictionary1.getObjects().getSortedEntries(),"O1"), new CatWrapper(dictionary2.getSubjects().getSortedEntries(),"S2"));
165159
int numCommonO1S2=0;
166160
while (i2.hasNext()){
@@ -210,7 +204,7 @@ public void cat(Dictionary dictionary1, Dictionary dictionary2, ProgressListener
210204
}
211205
try {
212206
InputStream in = new FileInputStream(location + "section" + j);
213-
int b = 0;
207+
int b;
214208
while ((b = in.read(buf)) >= 0) {
215209
outFinal.write(buf, 0, b);
216210
outFinal.flush();

hdt-java-core/src/main/java/org/rdfhdt/hdt/dictionary/impl/FourSectionDictionaryDiff.java

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,28 @@
44
import org.rdfhdt.hdt.compact.bitmap.ModifiableBitmap;
55
import org.rdfhdt.hdt.dictionary.Dictionary;
66
import org.rdfhdt.hdt.dictionary.DictionaryDiff;
7-
import org.rdfhdt.hdt.dictionary.impl.utilCat.*;
7+
import org.rdfhdt.hdt.dictionary.impl.utilCat.CatElement;
8+
import org.rdfhdt.hdt.dictionary.impl.utilCat.CatIntersection;
9+
import org.rdfhdt.hdt.dictionary.impl.utilCat.CatMapping;
10+
import org.rdfhdt.hdt.dictionary.impl.utilCat.CatUnion;
11+
import org.rdfhdt.hdt.dictionary.impl.utilCat.SectionUtil;
812
import org.rdfhdt.hdt.dictionary.impl.utilDiff.DiffWrapper;
9-
import org.rdfhdt.hdt.hdt.HDTVocabulary;
1013
import org.rdfhdt.hdt.listener.ProgressListener;
1114
import org.rdfhdt.hdt.options.ControlInfo;
1215
import org.rdfhdt.hdt.options.ControlInformation;
1316

14-
import java.io.*;
17+
import java.io.FileInputStream;
18+
import java.io.FileOutputStream;
19+
import java.io.IOException;
20+
import java.io.InputStream;
21+
import java.io.OutputStream;
1522
import java.nio.file.Files;
1623
import java.nio.file.Paths;
17-
import java.util.*;
24+
import java.util.ArrayList;
25+
import java.util.HashMap;
26+
import java.util.Iterator;
27+
import java.util.List;
28+
import java.util.Map;
1829

1930
public class FourSectionDictionaryDiff implements DictionaryDiff {
2031

@@ -29,7 +40,7 @@ public FourSectionDictionaryDiff(String location) {
2940
}
3041

3142
@Override
32-
public void diff(Dictionary dictionary, Map<String, ModifiableBitmap> bitmaps, ProgressListener listener) {
43+
public void diff(Dictionary dictionary, Map<String, ModifiableBitmap> bitmaps, ProgressListener listener) throws IOException {
3344
allMappings.put("predicate", new CatMapping(location, "predicate", dictionary.getPredicates().getNumberOfElements()));
3445
allMappings.put("subject", new CatMapping(location, "subject", dictionary.getSubjects().getNumberOfElements()));
3546
allMappings.put("object", new CatMapping(location, "object", dictionary.getObjects().getNumberOfElements()));
@@ -60,11 +71,7 @@ public void diff(Dictionary dictionary, Map<String, ModifiableBitmap> bitmaps, P
6071
listSkipSubj.add(itSkipSubs);
6172

6273
SharedWrapper sharedWrapper = new SharedWrapper(0, bitmaps.get("SH_S"), bitmaps.get("SH_O"), dictionary.getShared().getSortedEntries());
63-
long numNewSubj = 0;
64-
while (sharedWrapper.hasNext()) {
65-
sharedWrapper.next();
66-
numNewSubj++;
67-
}
74+
long numNewSubj = sharedWrapper.count();
6875
sharedWrapper = new SharedWrapper(0, bitmaps.get("SH_S"), bitmaps.get("SH_O"), dictionary.getShared().getSortedEntries());
6976
listSkipSubj.add(sharedWrapper);
7077

@@ -84,11 +91,7 @@ public void diff(Dictionary dictionary, Map<String, ModifiableBitmap> bitmaps, P
8491

8592
// flag = 1 for objects
8693
sharedWrapper = new SharedWrapper(1, bitmaps.get("SH_S"), bitmaps.get("SH_O"), dictionary.getShared().getSortedEntries());
87-
long numNewObj = 0;
88-
while (sharedWrapper.hasNext()) {
89-
numNewObj++;
90-
sharedWrapper.next();
91-
}
94+
long numNewObj = sharedWrapper.count();
9295
sharedWrapper = new SharedWrapper(1, bitmaps.get("SH_S"), bitmaps.get("SH_O"), dictionary.getShared().getSortedEntries());
9396
listSkipObjs.add(sharedWrapper);
9497

@@ -215,6 +218,15 @@ public boolean hasNext() {
215218
public CatElement next() {
216219
return next;
217220
}
221+
222+
public int count() {
223+
int i = 0;
224+
while (hasNext()) {
225+
// next();
226+
i++;
227+
}
228+
return i;
229+
}
218230
}
219231

220232
@Override

0 commit comments

Comments
 (0)