Skip to content

Commit 94e9c8b

Browse files
committed
Add UUID logical type support
1 parent 8ce285e commit 94e9c8b

47 files changed

Lines changed: 1240 additions & 44 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,5 +187,35 @@ Check out [Pinot documentation](https://docs.pinot.apache.org/) for a complete d
187187
- [Pinot Architecture](https://docs.pinot.apache.org/basics/architecture)
188188
- [Pinot Query Language](https://docs.pinot.apache.org/users/user-guide-query/pinot-query-language)
189189

190+
### UUID Logical Type
191+
192+
Pinot supports a logical `UUID` type for single-value columns. In v1, Pinot stores `UUID` values using the existing
193+
16-byte `BYTES` representation, while schema definitions and query results use canonical lowercase RFC 4122 strings.
194+
195+
Schema example:
196+
```json
197+
{
198+
"schemaName": "events",
199+
"dimensionFieldSpecs": [
200+
{
201+
"name": "eventId",
202+
"dataType": "UUID"
203+
}
204+
]
205+
}
206+
```
207+
208+
Query example:
209+
```sql
210+
SELECT eventId
211+
FROM events
212+
WHERE eventId = CAST('550e8400-e29b-41d4-a716-446655440000' AS UUID)
213+
```
214+
215+
Migration notes:
216+
- Existing `BYTES` columns keep returning hex strings. Pinot only renders canonical UUID strings for columns declared as `UUID`.
217+
- Existing `STRING` or `BYTES` UUID data can be migrated to `UUID` columns without a segment or wire format change in v1.
218+
- Multi-value UUID columns are not supported in v1.
219+
190220
## License
191221
Apache Pinot is under [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)

pinot-common/src/main/java/org/apache/pinot/common/function/FunctionUtils.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.util.Collection;
2424
import java.util.HashMap;
2525
import java.util.Map;
26+
import java.util.UUID;
2627
import javax.annotation.Nullable;
2728
import org.apache.calcite.rel.type.RelDataType;
2829
import org.apache.calcite.rel.type.RelDataTypeFactory;
@@ -52,6 +53,7 @@ private FunctionUtils() {
5253
put(Timestamp.class, PinotDataType.TIMESTAMP);
5354
put(String.class, PinotDataType.STRING);
5455
put(byte[].class, PinotDataType.BYTES);
56+
put(UUID.class, PinotDataType.UUID);
5557
put(int[].class, PinotDataType.PRIMITIVE_INT_ARRAY);
5658
put(long[].class, PinotDataType.PRIMITIVE_LONG_ARRAY);
5759
put(float[].class, PinotDataType.PRIMITIVE_FLOAT_ARRAY);
@@ -75,6 +77,7 @@ private FunctionUtils() {
7577
put(Timestamp.class, PinotDataType.TIMESTAMP);
7678
put(String.class, PinotDataType.STRING);
7779
put(byte[].class, PinotDataType.BYTES);
80+
put(UUID.class, PinotDataType.UUID);
7881
put(int[].class, PinotDataType.PRIMITIVE_INT_ARRAY);
7982
put(Integer[].class, PinotDataType.INTEGER_ARRAY);
8083
put(long[].class, PinotDataType.PRIMITIVE_LONG_ARRAY);
@@ -103,6 +106,7 @@ private FunctionUtils() {
103106
put(Timestamp.class, DataType.TIMESTAMP);
104107
put(String.class, DataType.STRING);
105108
put(byte[].class, DataType.BYTES);
109+
put(UUID.class, DataType.UUID);
106110
put(int[].class, DataType.INT);
107111
put(long[].class, DataType.LONG);
108112
put(float[].class, DataType.FLOAT);
@@ -125,6 +129,7 @@ private FunctionUtils() {
125129
put(Timestamp.class, ColumnDataType.TIMESTAMP);
126130
put(String.class, ColumnDataType.STRING);
127131
put(byte[].class, ColumnDataType.BYTES);
132+
put(UUID.class, ColumnDataType.UUID);
128133
put(int[].class, ColumnDataType.INT_ARRAY);
129134
put(long[].class, ColumnDataType.LONG_ARRAY);
130135
put(float[].class, ColumnDataType.FLOAT_ARRAY);
@@ -197,6 +202,8 @@ public static RelDataType getRelDataType(RelDataTypeFactory typeFactory, Class<?
197202
case STRING:
198203
case JSON:
199204
return typeFactory.createSqlType(SqlTypeName.VARCHAR);
205+
case UUID:
206+
return typeFactory.createSqlType(SqlTypeName.UUID);
200207
case BYTES:
201208
return typeFactory.createSqlType(SqlTypeName.VARBINARY);
202209
case INT_ARRAY:

pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,15 @@
2020

2121
import com.google.common.annotations.VisibleForTesting;
2222
import java.io.UnsupportedEncodingException;
23-
import java.nio.ByteBuffer;
2423
import java.nio.charset.StandardCharsets;
2524
import java.text.Normalizer;
2625
import java.util.Base64;
27-
import java.util.UUID;
2826
import javax.annotation.Nullable;
2927
import org.apache.commons.lang3.StringUtils;
3028
import org.apache.pinot.common.utils.URIUtils;
3129
import org.apache.pinot.spi.annotations.ScalarFunction;
3230
import org.apache.pinot.spi.utils.JsonUtils;
31+
import org.apache.pinot.spi.utils.UuidUtils;
3332

3433

3534
/**
@@ -442,12 +441,8 @@ public static String fromAscii(byte[] input) {
442441
@ScalarFunction
443442
public static byte[] toUUIDBytes(String input) {
444443
try {
445-
UUID uuid = UUID.fromString(input);
446-
ByteBuffer bb = ByteBuffer.wrap(new byte[16]);
447-
bb.putLong(uuid.getMostSignificantBits());
448-
bb.putLong(uuid.getLeastSignificantBits());
449-
return bb.array();
450-
} catch (IllegalArgumentException e) {
444+
return UuidUtils.toBytes(input);
445+
} catch (Exception e) {
451446
return null;
452447
}
453448
}
@@ -459,10 +454,7 @@ public static byte[] toUUIDBytes(String input) {
459454
*/
460455
@ScalarFunction
461456
public static String fromUUIDBytes(byte[] input) {
462-
ByteBuffer bb = ByteBuffer.wrap(input);
463-
long firstLong = bb.getLong();
464-
long secondLong = bb.getLong();
465-
return new UUID(firstLong, secondLong).toString();
457+
return UuidUtils.toString(input);
466458
}
467459

468460
/**

pinot-common/src/main/java/org/apache/pinot/common/request/context/predicate/BaseInPredicate.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.pinot.spi.utils.ByteArray;
2626
import org.apache.pinot.spi.utils.BytesUtils;
2727
import org.apache.pinot.spi.utils.TimestampUtils;
28+
import org.apache.pinot.spi.utils.UuidUtils;
2829

2930

3031
/**
@@ -42,6 +43,7 @@ public abstract class BaseInPredicate extends BasePredicate {
4243
private int[] _booleanValues;
4344
private long[] _timestampValues;
4445
private ByteArray[] _bytesValues;
46+
private ByteArray[] _uuidValues;
4547

4648
public BaseInPredicate(ExpressionContext lhs, List<String> values) {
4749
super(lhs);
@@ -155,4 +157,17 @@ public ByteArray[] getBytesValues() {
155157
}
156158
return bigDecimalValues;
157159
}
160+
161+
public ByteArray[] getUuidValues() {
162+
ByteArray[] uuidValues = _uuidValues;
163+
if (uuidValues == null) {
164+
int numValues = _values.size();
165+
uuidValues = new ByteArray[numValues];
166+
for (int i = 0; i < numValues; i++) {
167+
uuidValues[i] = new ByteArray(UuidUtils.toBytes(_values.get(i)));
168+
}
169+
_uuidValues = uuidValues;
170+
}
171+
return uuidValues;
172+
}
158173
}

pinot-common/src/main/java/org/apache/pinot/common/response/encoder/JsonResponseEncoder.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ private static Object extractValue(DataSchema.ColumnDataType columnDataType, Jso
220220
case DOUBLE:
221221
return jsonValue.asDouble();
222222
case STRING:
223+
case UUID:
223224
case BYTES:
224225
case TIMESTAMP:
225226
case JSON:

pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import java.util.HashMap;
4141
import java.util.List;
4242
import java.util.Map;
43+
import java.util.UUID;
4344
import org.apache.calcite.rel.type.RelDataType;
4445
import org.apache.calcite.rel.type.RelDataTypeFactory;
4546
import org.apache.calcite.sql.type.SqlTypeName;
@@ -50,6 +51,7 @@
5051
import org.apache.pinot.spi.utils.BytesUtils;
5152
import org.apache.pinot.spi.utils.CommonConstants.NullValuePlaceHolder;
5253
import org.apache.pinot.spi.utils.EqualityUtils;
54+
import org.apache.pinot.spi.utils.UuidUtils;
5355

5456
import static java.nio.charset.StandardCharsets.UTF_8;
5557

@@ -302,6 +304,12 @@ public RelDataType toType(RelDataTypeFactory typeFactory) {
302304
return typeFactory.createSqlType(SqlTypeName.VARBINARY);
303305
}
304306
},
307+
UUID(BYTES, NullValuePlaceHolder.INTERNAL_UUID_BYTES) {
308+
@Override
309+
public RelDataType toType(RelDataTypeFactory typeFactory) {
310+
return typeFactory.createSqlType(SqlTypeName.UUID);
311+
}
312+
},
305313
OBJECT(null) {
306314
@Override
307315
public RelDataType toType(RelDataTypeFactory typeFactory) {
@@ -452,6 +460,8 @@ public DataType toDataType() {
452460
return DataType.STRING;
453461
case JSON:
454462
return DataType.JSON;
463+
case UUID:
464+
return DataType.UUID;
455465
case BYTES:
456466
case BYTES_ARRAY:
457467
return DataType.BYTES;
@@ -490,6 +500,8 @@ public Object toInternal(Object value) {
490500
return ((boolean) value) ? 1 : 0;
491501
case TIMESTAMP:
492502
return ((Timestamp) value).getTime();
503+
case UUID:
504+
return new ByteArray(UuidUtils.toBytes(value));
493505
case BYTES:
494506
return new ByteArray((byte[]) value);
495507
case BOOLEAN_ARRAY:
@@ -505,6 +517,9 @@ public Object toInternal(Object value) {
505517
if (value instanceof Timestamp) {
506518
return ((Timestamp) value).getTime();
507519
}
520+
if (value instanceof UUID) {
521+
return new ByteArray(UuidUtils.toBytes((UUID) value));
522+
}
508523
if (value instanceof byte[]) {
509524
return new ByteArray((byte[]) value);
510525
}
@@ -538,6 +553,8 @@ public Object toExternal(Object value) {
538553
return ((int) value) == 1;
539554
case TIMESTAMP:
540555
return new Timestamp((long) value);
556+
case UUID:
557+
return UuidUtils.toUUID((ByteArray) value);
541558
case BYTES:
542559
return ((ByteArray) value).getBytes();
543560
case BOOLEAN_ARRAY:
@@ -569,6 +586,8 @@ public Serializable convert(Object value) {
569586
return ((int) value) == 1;
570587
case TIMESTAMP:
571588
return new Timestamp((long) value);
589+
case UUID:
590+
return UuidUtils.toUUID((ByteArray) value);
572591
case STRING:
573592
case JSON:
574593
return value.toString();
@@ -609,6 +628,8 @@ public Serializable format(Object value) {
609628
case TIMESTAMP:
610629
assert value instanceof Timestamp;
611630
return value.toString();
631+
case UUID:
632+
return formatUuid(value);
612633
case BYTES:
613634
return BytesUtils.toHexString((byte[]) value);
614635
case TIMESTAMP_ARRAY:
@@ -639,6 +660,8 @@ public Serializable convertAndFormat(Object value) {
639660
return ((int) value) == 1;
640661
case TIMESTAMP:
641662
return new Timestamp((long) value).toString();
663+
case UUID:
664+
return UuidUtils.toString((ByteArray) value);
642665
case STRING:
643666
case JSON:
644667
return value.toString();
@@ -844,6 +867,8 @@ public static ColumnDataType fromDataTypeSV(DataType dataType) {
844867
return STRING;
845868
case JSON:
846869
return JSON;
870+
case UUID:
871+
return UUID;
847872
case BYTES:
848873
return BYTES;
849874
case MAP:
@@ -878,6 +903,19 @@ public static ColumnDataType fromDataTypeMV(DataType dataType) {
878903
}
879904
}
880905

906+
private static String formatUuid(Object value) {
907+
if (value instanceof UUID) {
908+
return ((UUID) value).toString();
909+
}
910+
if (value instanceof byte[]) {
911+
return UuidUtils.toString((byte[]) value);
912+
}
913+
if (value instanceof ByteArray) {
914+
return UuidUtils.toString((ByteArray) value);
915+
}
916+
return UuidUtils.toString(UuidUtils.toBytes(value));
917+
}
918+
881919
public abstract RelDataType toType(RelDataTypeFactory typeFactory);
882920
}
883921
}

0 commit comments

Comments
 (0)