Skip to content

Commit be53c71

Browse files
committed
Complete UUID v1 coverage and fix CI regressions
1 parent 6cb505d commit be53c71

33 files changed

Lines changed: 800 additions & 190 deletions

File tree

pinot-common/src/main/java/org/apache/pinot/common/request/context/RequestContextUtils.java

Lines changed: 122 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,16 @@
4141
import org.apache.pinot.common.utils.RegexpPatternConverterUtils;
4242
import org.apache.pinot.common.utils.request.RequestUtils;
4343
import org.apache.pinot.segment.spi.AggregationFunctionType;
44+
import org.apache.pinot.spi.data.FieldSpec.DataType;
4445
import org.apache.pinot.spi.exception.BadQueryRequestException;
4546
import org.apache.pinot.sql.FilterKind;
4647
import org.apache.pinot.sql.parsers.CalciteSqlParser;
4748

4849

4950
public class RequestContextUtils {
51+
private static final String UNSUPPORTED_RHS_MESSAGE =
52+
"Pinot does not support column or function on the right-hand side of the predicate";
53+
5054
private RequestContextUtils() {
5155
}
5256

@@ -217,23 +221,23 @@ private static FilterContext getFilterInner(Function thriftFunction) {
217221
case GREATER_THAN:
218222
return FilterContext.forPredicate(
219223
new RangePredicate(getExpression(operands.get(0)), false, getStringValue(operands.get(1)), false,
220-
RangePredicate.UNBOUNDED, new LiteralContext(operands.get(1).getLiteral()).getType()));
224+
RangePredicate.UNBOUNDED, getLiteralType(operands.get(1))));
221225
case GREATER_THAN_OR_EQUAL:
222226
return FilterContext.forPredicate(
223227
new RangePredicate(getExpression(operands.get(0)), true, getStringValue(operands.get(1)), false,
224-
RangePredicate.UNBOUNDED, new LiteralContext(operands.get(1).getLiteral()).getType()));
228+
RangePredicate.UNBOUNDED, getLiteralType(operands.get(1))));
225229
case LESS_THAN:
226230
return FilterContext.forPredicate(
227231
new RangePredicate(getExpression(operands.get(0)), false, RangePredicate.UNBOUNDED, false,
228-
getStringValue(operands.get(1)), new LiteralContext(operands.get(1).getLiteral()).getType()));
232+
getStringValue(operands.get(1)), getLiteralType(operands.get(1))));
229233
case LESS_THAN_OR_EQUAL:
230234
return FilterContext.forPredicate(
231235
new RangePredicate(getExpression(operands.get(0)), false, RangePredicate.UNBOUNDED, true,
232-
getStringValue(operands.get(1)), new LiteralContext(operands.get(1).getLiteral()).getType()));
236+
getStringValue(operands.get(1)), getLiteralType(operands.get(1))));
233237
case BETWEEN:
234238
return FilterContext.forPredicate(
235239
new RangePredicate(getExpression(operands.get(0)), true, getStringValue(operands.get(1)), true,
236-
getStringValue(operands.get(2)), new LiteralContext(operands.get(1).getLiteral()).getType()));
240+
getStringValue(operands.get(2)), getLiteralType(operands.get(1))));
237241
case RANGE:
238242
return FilterContext.forPredicate(
239243
new RangePredicate(getExpression(operands.get(0)), getStringValue(operands.get(1))));
@@ -278,12 +282,7 @@ private static FilterContext getFilterInner(Function thriftFunction) {
278282
}
279283

280284
public static String getStringValue(Expression thriftExpression) {
281-
Literal literal = thriftExpression.getLiteral();
282-
if (literal == null) {
283-
throw new BadQueryRequestException(
284-
"Pinot does not support column or function on the right-hand side of the predicate");
285-
}
286-
return RequestUtils.getLiteralString(literal);
285+
return evaluateLiteralValue(thriftExpression).getStringValue();
287286
}
288287

289288
/**
@@ -402,23 +401,23 @@ private static FilterContext getFilterInner(FunctionContext filterFunction) {
402401
case GREATER_THAN:
403402
return FilterContext.forPredicate(
404403
new RangePredicate(operands.get(0), false, getStringValue(operands.get(1)), false, RangePredicate.UNBOUNDED,
405-
operands.get(1).getLiteral().getType()));
404+
getLiteralType(operands.get(1))));
406405
case GREATER_THAN_OR_EQUAL:
407406
return FilterContext.forPredicate(
408407
new RangePredicate(operands.get(0), true, getStringValue(operands.get(1)), false, RangePredicate.UNBOUNDED,
409-
operands.get(1).getLiteral().getType()));
408+
getLiteralType(operands.get(1))));
410409
case LESS_THAN:
411410
return FilterContext.forPredicate(
412411
new RangePredicate(operands.get(0), false, RangePredicate.UNBOUNDED, false, getStringValue(operands.get(1)),
413-
operands.get(1).getLiteral().getType()));
412+
getLiteralType(operands.get(1))));
414413
case LESS_THAN_OR_EQUAL:
415414
return FilterContext.forPredicate(
416415
new RangePredicate(operands.get(0), false, RangePredicate.UNBOUNDED, true, getStringValue(operands.get(1)),
417-
operands.get(1).getLiteral().getType()));
416+
getLiteralType(operands.get(1))));
418417
case BETWEEN:
419418
return FilterContext.forPredicate(
420419
new RangePredicate(operands.get(0), true, getStringValue(operands.get(1)), true,
421-
getStringValue(operands.get(2)), operands.get(1).getLiteral().getType()));
420+
getStringValue(operands.get(2)), getLiteralType(operands.get(1))));
422421
case RANGE:
423422
return FilterContext.forPredicate(new RangePredicate(operands.get(0), getStringValue(operands.get(1))));
424423
case REGEXP_LIKE:
@@ -462,11 +461,96 @@ private static FilterContext getFilterInner(FunctionContext filterFunction) {
462461
// literal context doesn't support float, and we cannot differentiate explicit string literal and literal
463462
// without explicit type, so we always convert the literal into string.
464463
private static String getStringValue(ExpressionContext expressionContext) {
465-
if (expressionContext.getType() != ExpressionContext.Type.LITERAL) {
466-
throw new BadQueryRequestException(
467-
"Pinot does not support column or function on the right-hand side of the predicate");
464+
return evaluateLiteralValue(expressionContext).getStringValue();
465+
}
466+
467+
private static DataType getLiteralType(Expression thriftExpression) {
468+
return evaluateLiteralValue(thriftExpression).getType();
469+
}
470+
471+
private static DataType getLiteralType(ExpressionContext expressionContext) {
472+
return evaluateLiteralValue(expressionContext).getType();
473+
}
474+
475+
private static EvaluatedLiteralValue evaluateLiteralValue(Expression thriftExpression) {
476+
Literal literal = thriftExpression.getLiteral();
477+
if (literal != null) {
478+
return fromLiteralContext(new LiteralContext(literal));
468479
}
469-
return expressionContext.getLiteral().getStringValue();
480+
Function function = thriftExpression.getFunctionCall();
481+
if (function != null) {
482+
return evaluateFunctionLiteral(function.getOperator(), function.getOperands(),
483+
RequestContextUtils::evaluateLiteralValue);
484+
}
485+
throw new BadQueryRequestException(UNSUPPORTED_RHS_MESSAGE);
486+
}
487+
488+
private static EvaluatedLiteralValue evaluateLiteralValue(ExpressionContext expressionContext) {
489+
if (expressionContext.getType() == ExpressionContext.Type.LITERAL) {
490+
return fromLiteralContext(expressionContext.getLiteral());
491+
}
492+
if (expressionContext.getType() == ExpressionContext.Type.FUNCTION) {
493+
FunctionContext function = expressionContext.getFunction();
494+
return evaluateFunctionLiteral(function.getFunctionName(), function.getArguments(),
495+
RequestContextUtils::evaluateLiteralValue);
496+
}
497+
throw new BadQueryRequestException(UNSUPPORTED_RHS_MESSAGE);
498+
}
499+
500+
private static <T> EvaluatedLiteralValue evaluateFunctionLiteral(String functionName, List<T> operands,
501+
java.util.function.Function<T, EvaluatedLiteralValue> evaluator) {
502+
if (!functionName.equalsIgnoreCase("cast")) {
503+
throw new BadQueryRequestException(UNSUPPORTED_RHS_MESSAGE);
504+
}
505+
Preconditions.checkState(operands.size() == 2, "CAST function must have exactly 2 operands");
506+
EvaluatedLiteralValue source = evaluator.apply(operands.get(0));
507+
EvaluatedLiteralValue target = evaluator.apply(operands.get(1));
508+
DataType targetType = getCastTargetType(target.getStringValue());
509+
if (source.getType() == DataType.UNKNOWN) {
510+
return new EvaluatedLiteralValue(source.getStringValue(), targetType);
511+
}
512+
Object converted = targetType.convert(source.getStringValue());
513+
return new EvaluatedLiteralValue(targetType.toString(converted), targetType);
514+
}
515+
516+
private static DataType getCastTargetType(String targetTypeString) {
517+
switch (targetTypeString.toUpperCase()) {
518+
case "INT":
519+
case "INTEGER":
520+
return DataType.INT;
521+
case "LONG":
522+
case "BIGINT":
523+
return DataType.LONG;
524+
case "FLOAT":
525+
return DataType.FLOAT;
526+
case "DOUBLE":
527+
return DataType.DOUBLE;
528+
case "DECIMAL":
529+
case "BIGDECIMAL":
530+
case "BIG_DECIMAL":
531+
return DataType.BIG_DECIMAL;
532+
case "BOOL":
533+
case "BOOLEAN":
534+
return DataType.BOOLEAN;
535+
case "TIMESTAMP":
536+
return DataType.TIMESTAMP;
537+
case "STRING":
538+
case "VARCHAR":
539+
return DataType.STRING;
540+
case "JSON":
541+
return DataType.JSON;
542+
case "BYTES":
543+
case "VARBINARY":
544+
return DataType.BYTES;
545+
case "UUID":
546+
return DataType.UUID;
547+
default:
548+
throw new BadQueryRequestException("Unable to cast expression to type - " + targetTypeString);
549+
}
550+
}
551+
552+
private static EvaluatedLiteralValue fromLiteralContext(LiteralContext literalContext) {
553+
return new EvaluatedLiteralValue(literalContext.getStringValue(), literalContext.getType());
470554
}
471555

472556
private static float[] getVectorValue(ExpressionContext expressionContext) {
@@ -556,4 +640,22 @@ private static float getFloatValue(Expression thriftExpression) {
556640
throw new IllegalStateException("Unsupported literal type: " + type);
557641
}
558642
}
643+
644+
private static final class EvaluatedLiteralValue {
645+
private final String _stringValue;
646+
private final DataType _type;
647+
648+
private EvaluatedLiteralValue(String stringValue, DataType type) {
649+
_stringValue = stringValue;
650+
_type = type;
651+
}
652+
653+
private String getStringValue() {
654+
return _stringValue;
655+
}
656+
657+
private DataType getType() {
658+
return _type;
659+
}
660+
}
559661
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.pinot.common.request.context;
20+
21+
import java.util.List;
22+
import org.apache.pinot.common.request.context.predicate.EqPredicate;
23+
import org.apache.pinot.common.request.context.predicate.InPredicate;
24+
import org.apache.pinot.sql.parsers.CalciteSqlParser;
25+
import org.testng.Assert;
26+
import org.testng.annotations.Test;
27+
28+
29+
/**
30+
* Tests filter conversion for literal-only expressions on the right-hand side.
31+
*/
32+
public class RequestContextUtilsTest {
33+
private static final String UUID_1 = "550e8400-e29b-41d4-a716-446655440000";
34+
private static final String UUID_2 = "550e8400-e29b-41d4-a716-446655440001";
35+
36+
@Test
37+
public void testGetFilterWithUuidCastLiteralRhs() {
38+
FilterContext filter =
39+
RequestContextUtils.getFilter(CalciteSqlParser.compileToExpression("uuidCol = CAST('" + UUID_1 + "' AS UUID)"));
40+
41+
Assert.assertEquals(filter.getType(), FilterContext.Type.PREDICATE);
42+
EqPredicate predicate = (EqPredicate) filter.getPredicate();
43+
Assert.assertEquals(predicate.getLhs().getIdentifier(), "uuidCol");
44+
Assert.assertEquals(predicate.getValue(), UUID_1);
45+
}
46+
47+
@Test
48+
public void testGetFilterExpressionContextWithUuidCastLiteralIn() {
49+
FilterContext filter = RequestContextUtils.getFilter(
50+
RequestContextUtils.getExpression("uuidCol IN (CAST('" + UUID_1 + "' AS UUID), CAST('" + UUID_2 + "' AS UUID))"));
51+
52+
Assert.assertEquals(filter.getType(), FilterContext.Type.PREDICATE);
53+
InPredicate predicate = (InPredicate) filter.getPredicate();
54+
Assert.assertEquals(predicate.getLhs().getIdentifier(), "uuidCol");
55+
Assert.assertEquals(predicate.getValues(), List.of(UUID_1, UUID_2));
56+
}
57+
}

pinot-common/src/test/java/org/apache/pinot/common/utils/DataSchemaTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
import java.math.BigDecimal;
2222
import java.nio.ByteBuffer;
2323
import java.sql.Timestamp;
24-
import org.apache.pinot.spi.utils.ByteArray;
2524
import org.apache.pinot.spi.data.FieldSpec;
25+
import org.apache.pinot.spi.utils.ByteArray;
2626
import org.apache.pinot.spi.utils.BytesUtils;
2727
import org.apache.pinot.spi.utils.UuidUtils;
2828
import org.testng.Assert;

pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RawValueInvertedIndexFilterOperator.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
import org.apache.pinot.segment.local.segment.index.readers.RawValueBitmapInvertedIndexReader;
3737
import org.apache.pinot.segment.spi.datasource.DataSource;
3838
import org.apache.pinot.spi.data.FieldSpec.DataType;
39+
import org.apache.pinot.spi.utils.BytesUtils;
40+
import org.apache.pinot.spi.utils.UuidUtils;
3941
import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
4042
import org.roaringbitmap.buffer.MutableRoaringBitmap;
4143

@@ -138,6 +140,12 @@ private void addMatchingValueBitmap(MutableRoaringBitmap bitmap, String value) {
138140
case STRING:
139141
valueBitmap = _invertedIndexReader.getDocIdsForString(value);
140142
break;
143+
case BYTES:
144+
valueBitmap = _invertedIndexReader.getDocIdsForBytes(BytesUtils.toBytes(value));
145+
break;
146+
case UUID:
147+
valueBitmap = _invertedIndexReader.getDocIdsForBytes(UuidUtils.toBytes(value));
148+
break;
141149
default:
142150
throw new IllegalStateException("Unsupported data type for raw inverted index: " + _dataType);
143151
}

pinot-core/src/main/java/org/apache/pinot/core/query/pruner/ValueBasedSegmentPruner.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ public void ensureDataType(DataType dt) {
236236
public boolean mightBeContained(BloomFilterReader bloomFilter) {
237237
if (!_hashed) {
238238
GuavaBloomFilterReaderUtils.Hash128AsLongs hash128AsLongs =
239-
GuavaBloomFilterReaderUtils.hashAsLongs(_comparableValue.toString());
239+
GuavaBloomFilterReaderUtils.hashAsLongs(_dt.toString(_comparableValue));
240240
_hash1 = hash128AsLongs.getHash1();
241241
_hash2 = hash128AsLongs.getHash2();
242242
_hashed = true;

pinot-core/src/test/java/org/apache/pinot/core/query/pruner/BloomFilterSegmentPrunerTest.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,22 @@ public void testBloomFilterPruning()
9696
assertTrue(runPruner(indexSegment, "SELECT COUNT(*) FROM testTable WHERE column = 21.0 AND column = 30.0"));
9797
}
9898

99+
@Test
100+
public void testUuidBloomFilterPruning()
101+
throws IOException {
102+
IndexSegment indexSegment = mockIndexSegment(new String[]{
103+
"550e8400-e29b-41d4-a716-446655440000",
104+
"550e8400-e29b-41d4-a716-446655440001"
105+
}, DataType.UUID);
106+
107+
assertFalse(runPruner(indexSegment,
108+
"SELECT COUNT(*) FROM testTable WHERE column = '550e8400-e29b-41d4-a716-446655440000'"));
109+
assertFalse(runPruner(indexSegment,
110+
"SELECT COUNT(*) FROM testTable WHERE column IN ('550e8400-e29b-41d4-a716-446655440001')"));
111+
assertTrue(runPruner(indexSegment,
112+
"SELECT COUNT(*) FROM testTable WHERE column = '550e8400-e29b-41d4-a716-44665544ffff'"));
113+
}
114+
99115
@Test(expectedExceptions = RuntimeException.class)
100116
public void testQueryTimeoutOnPruning()
101117
throws IOException {
@@ -162,6 +178,11 @@ public void testIsApplicableTo() {
162178

163179
private IndexSegment mockIndexSegment(String[] values)
164180
throws IOException {
181+
return mockIndexSegment(values, DataType.DOUBLE);
182+
}
183+
184+
private IndexSegment mockIndexSegment(String[] values, DataType dataType)
185+
throws IOException {
165186
IndexSegment indexSegment = mock(IndexSegment.class);
166187
when(indexSegment.getColumnNames()).thenReturn(ImmutableSet.of("column"));
167188
SegmentMetadata segmentMetadata = mock(SegmentMetadata.class);
@@ -176,7 +197,7 @@ private IndexSegment mockIndexSegment(String[] values)
176197
for (String v : values) {
177198
builder.put(v);
178199
}
179-
when(dataSourceMetadata.getDataType()).thenReturn(DataType.DOUBLE);
200+
when(dataSourceMetadata.getDataType()).thenReturn(dataType);
180201
when(dataSource.getDataSourceMetadata()).thenReturn(dataSourceMetadata);
181202
when(dataSource.getBloomFilter()).thenReturn(builder.build());
182203

pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/UuidTypeTest.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,21 @@ protected int getRealtimeSegmentFlushSize() {
8181
return 2;
8282
}
8383

84+
@Override
85+
protected List<String> getInvertedIndexColumns() {
86+
return List.of(UUID_FROM_BYTES_COLUMN);
87+
}
88+
89+
@Override
90+
protected List<String> getNoDictionaryColumns() {
91+
return List.of(UUID_FROM_BYTES_COLUMN);
92+
}
93+
94+
@Override
95+
protected List<String> getBloomFilterColumns() {
96+
return List.of(UUID_FROM_STRING_COLUMN, UUID_FROM_BYTES_COLUMN);
97+
}
98+
8499
@Override
85100
public Schema createSchema() {
86101
return new Schema.SchemaBuilder().setSchemaName(getTableName())

pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroIngestionSchemaValidator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ private void validateSchemas() {
139139
if (fieldSpec.getDataType() != dataTypeForSVColumn) {
140140
_dataTypeMismatch.addMismatchReason(String
141141
.format("The Pinot column: (%s: %s) doesn't match with the column (%s: %s) in input %s schema.",
142-
columnName, fieldSpec.getDataType().name(), avroColumnName, avroColumnType.name(),
142+
columnName, fieldSpec.getDataType().name(), avroColumnName, dataTypeForSVColumn.name(),
143143
getInputSchemaType()));
144144
}
145145
} else {

0 commit comments

Comments
 (0)