Skip to content

Commit 9a6f19f

Browse files
Tim-BrooksChrisHegarty
authored andcommitted
Cache frozen FieldType to skip schema validation (#15886)
When indexing, every field in every document has its schema built via updateDocFieldSchema and validated via assertSameSchema against the existing FieldInfo. For the common case where a field consistently uses the same frozen FieldType instance, this work is redundant — a frozen type is immutable, so its schema contribution is identical every time. This change caches the frozen FieldType on each PerField and checks same object instance to detect when the type hasn't changed. When it matches, schema building and validation are skipped entirely, and FieldSchema only resets its docID. If a different type is encountered, the cache is invalidated and the full validation path runs. A deoptimize path handles multi-valued fields where a later value uses a different type than earlier values within the same document. Adds FieldType.isFrozen() to support the optimization, and new tests covering the fast-path, cache invalidation, deoptimize, cross-segment validation, and document blocks.
1 parent b858e81 commit 9a6f19f

4 files changed

Lines changed: 291 additions & 27 deletions

File tree

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ Optimizations
7272

7373
* GITHUB#15779: Improve BytesRefHash.add performance by optimize rehash operation (tyronecai)
7474

75+
* GITHUB#15886: Cache frozen FieldType to skip redundant schema validation. (Tim Brooks)
76+
7577
Bug Fixes
7678
---------------------
7779
* GITHUB#15754: Fix HTMLStripCharFilter to prevent tags from incorrectly consuming subsequent

lucene/core/src/java/org/apache/lucene/document/FieldType.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,15 @@ public void freeze() {
9696
this.frozen = true;
9797
}
9898

99+
/**
100+
* Returns the frozen FieldTypes's state.
101+
*
102+
* @return <code>true</code> if this FieldType is frozen against future modifications.
103+
*/
104+
public boolean isFrozen() {
105+
return frozen;
106+
}
107+
99108
/**
100109
* {@inheritDoc}
101110
*

lucene/core/src/java/org/apache/lucene/index/IndexingChain.java

Lines changed: 76 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -559,8 +559,8 @@ private void finishStoredFields() throws IOException {
559559
}
560560

561561
void processDocument(int docID, Iterable<? extends IndexableField> document) throws IOException {
562-
// number of unique fields by names (collapses multiple field instances by the same name)
563-
int fieldCount = 0;
562+
// number of unique fields by name which need to be init in segment or full validation
563+
int fieldsNeedInitOrValidate = 0;
564564
int indexedFieldCount = 0; // number of unique fields indexed with postings
565565
long fieldGen = nextFieldGen++;
566566
int docFieldIdx = 0;
@@ -577,40 +577,40 @@ void processDocument(int docID, Iterable<? extends IndexableField> document) thr
577577
// 1st pass over doc fields – verify that doc schema matches the index schema
578578
// build schema for each unique doc field
579579
for (IndexableField field : document) {
580-
IndexableFieldType fieldType = field.fieldType();
580+
final String fieldName = field.name();
581+
final IndexableFieldType fieldType = field.fieldType();
581582
final boolean isReserved = field.getClass() == ReservedField.class;
582583
PerField pf =
583584
getOrAddPerField(
584-
field.name(), false
585+
fieldName, false
585586
/* we never add reserved fields during indexing should be done during DWPT setup*/ );
586587
if (pf.reserved != isReserved) {
587588
throw new IllegalArgumentException(
588-
"\""
589-
+ field.name()
590-
+ "\" is a reserved field and should not be added to any document");
589+
"\"" + fieldName + "\" is a reserved field and should not be added to any document");
591590
}
592591
if (pf.fieldGen != fieldGen) { // first time we see this field in this document
593-
fields[fieldCount++] = pf;
594592
pf.fieldGen = fieldGen;
595-
pf.reset(docID);
593+
pf.reset(docID, fieldType);
594+
if (pf.validatedFrozenFieldType == null) {
595+
fields[fieldsNeedInitOrValidate++] = pf;
596+
}
597+
} else if (pf.multiValueForcesDeoptimize(fieldType)) {
598+
// Multi-valued field with a different field type than the cached frozen type.
599+
// Drop the validated frozen field type to force the validation path.
600+
pf.validatedFrozenFieldType = null;
601+
fields[fieldsNeedInitOrValidate++] = pf;
596602
}
597603
if (docFieldIdx >= docFields.length) oversizeDocFields();
598604
docFields[docFieldIdx++] = pf;
599-
updateDocFieldSchema(field.name(), pf.schema, fieldType);
600-
}
601-
// For each field, if it's the first time we see this field in this segment,
602-
// initialize its FieldInfo.
603-
// If we have already seen this field, verify that its schema
604-
// within the current doc matches its schema in the index.
605-
for (int i = 0; i < fieldCount; i++) {
606-
PerField pf = fields[i];
607-
if (pf.fieldInfo == null) {
608-
initializeFieldInfo(pf);
609-
} else {
610-
pf.schema.assertSameSchema(pf.fieldInfo);
605+
if (pf.validatedFrozenFieldType == null) {
606+
updateDocFieldSchema(fieldName, pf.schema, fieldType);
611607
}
612608
}
613609

610+
if (fieldsNeedInitOrValidate > 0) {
611+
initAndValidateFields(fieldsNeedInitOrValidate);
612+
}
613+
614614
// 2nd pass over doc fields – index each field
615615
// also count the number of unique fields indexed with postings
616616
docFieldIdx = 0;
@@ -641,6 +641,22 @@ void processDocument(int docID, Iterable<? extends IndexableField> document) thr
641641
}
642642
}
643643

644+
private void initAndValidateFields(int fieldCount) throws IOException {
645+
// For each field, if it's the first time we see this field in this segment,
646+
// initialize its FieldInfo.
647+
// If we have already seen this field, verify that its schema
648+
// within the current doc matches its schema in the index.
649+
for (int i = 0; i < fieldCount; i++) {
650+
PerField pf = fields[i];
651+
if (pf.fieldInfo == null) {
652+
initializeFieldInfo(pf);
653+
pf.trySetValidatedFrozenFieldType();
654+
} else {
655+
pf.schema.assertSameSchema(pf.fieldInfo);
656+
}
657+
}
658+
}
659+
644660
private void oversizeDocFields() {
645661
PerField[] newDocFields =
646662
new PerField
@@ -1108,6 +1124,14 @@ private final class PerField implements Comparable<PerField> {
11081124
private final Analyzer analyzer;
11091125
private boolean first; // first in a document
11101126

1127+
/**
1128+
* Allows IndexingChain to skip schema validation if fields keep using the same frozen field
1129+
* type
1130+
*/
1131+
private FieldType validatedFrozenFieldType;
1132+
1133+
private IndexableFieldType candidateFieldType;
1134+
11111135
PerField(
11121136
String fieldName,
11131137
int indexCreatedVersionMajor,
@@ -1125,9 +1149,33 @@ private final class PerField implements Comparable<PerField> {
11251149
this.reserved = reserved;
11261150
}
11271151

1128-
void reset(int docId) {
1152+
void reset(int docId, IndexableFieldType fieldType) {
11291153
first = true;
1130-
schema.reset(docId);
1154+
if (fieldInfo == null) {
1155+
// The first time we encounter this field in a segment propose a frozen field to optimize
1156+
// the validation step. This will be promoted in trySetValidatedFrozenFieldType if it is
1157+
// frozen and valid.
1158+
candidateFieldType = fieldType;
1159+
}
1160+
if (fieldType == validatedFrozenFieldType) {
1161+
schema.resetJustDocId(docId);
1162+
} else {
1163+
// Encountered new FieldType. Deoptimize the schema validation skip.
1164+
validatedFrozenFieldType = null;
1165+
schema.reset(docId);
1166+
}
1167+
}
1168+
1169+
boolean multiValueForcesDeoptimize(IndexableFieldType fieldType) {
1170+
return validatedFrozenFieldType != null && fieldType != validatedFrozenFieldType;
1171+
}
1172+
1173+
void trySetValidatedFrozenFieldType() {
1174+
assert fieldInfo != null;
1175+
if (candidateFieldType instanceof FieldType ft && ft.isFrozen()) {
1176+
validatedFrozenFieldType = ft;
1177+
}
1178+
candidateFieldType = null;
11311179
}
11321180

11331181
void setFieldInfo(FieldInfo fieldInfo) {
@@ -1545,8 +1593,12 @@ void setVectors(
15451593
}
15461594
}
15471595

1548-
void reset(int doc) {
1596+
void resetJustDocId(int doc) {
15491597
docID = doc;
1598+
}
1599+
1600+
void reset(int doc) {
1601+
resetJustDocId(doc);
15501602
omitNorms = false;
15511603
storeTermVector = false;
15521604
indexOptions = IndexOptions.NONE;

0 commit comments

Comments
 (0)