Skip to content

Commit e789a86

Browse files
AVRO-4225: Fix ClassCastException in FastReaderBuilder for java-class attribute
When using GenericDatumReader with schemas containing java-class attributes on string fields, FastReaderBuilder.getTransformingStringReader() was casting stringReader.read() directly to String, but GenericData returns Utf8, causing ClassCastException. Fix by explicitly handling both Utf8 and String types, consistent with the rest of the Avro codebase.
1 parent 2443073 commit e789a86

2 files changed

Lines changed: 121 additions & 1 deletion

File tree

lang/java/avro/src/main/java/org/apache/avro/io/FastReaderBuilder.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,11 @@ private FieldReader getTransformingStringReader(String valueClass, FieldReader s
437437
Function<String, ?> transformer = findClass(valueClass)
438438
.map(clazz -> ReflectionUtil.getConstructorAsFunction(String.class, clazz)).orElse(null);
439439
if (transformer != null) {
440-
return (old, decoder) -> transformer.apply((String) stringReader.read(null, decoder));
440+
return (old, decoder) -> {
441+
Object value = stringReader.read(null, decoder);
442+
String stringValue = value instanceof Utf8 ? ((Utf8) value).toString() : (String) value;
443+
return transformer.apply(stringValue);
444+
};
441445
}
442446
}
443447

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* https://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.avro.io;
19+
20+
import static org.junit.jupiter.api.Assertions.assertEquals;
21+
import static org.junit.jupiter.api.Assertions.assertNotNull;
22+
23+
import java.io.ByteArrayOutputStream;
24+
import java.io.IOException;
25+
26+
import org.apache.avro.Schema;
27+
import org.apache.avro.generic.GenericData;
28+
import org.apache.avro.generic.GenericDatumReader;
29+
import org.apache.avro.generic.GenericDatumWriter;
30+
import org.apache.avro.generic.GenericRecord;
31+
import org.junit.jupiter.api.Test;
32+
33+
/**
34+
* Tests for FastReaderBuilder behavior with schemas containing "java-class"
35+
* attributes.
36+
*/
37+
public class FastReaderBuilderJavaClassTest {
38+
39+
/**
40+
* Tests that GenericDatumReader can deserialize records with string fields that
41+
* have a "java-class" attribute (e.g., BigDecimal).
42+
*
43+
* This test reproduces a bug where
44+
* FastReaderBuilder.getTransformingStringReader() casts the result of
45+
* stringReader.read() directly to String, but in GenericData mode the reader
46+
* returns Utf8, causing a ClassCastException.
47+
*/
48+
@Test
49+
void genericDatumReaderWithJavaClassAttribute() throws IOException {
50+
// Schema with a string field that has "java-class": "java.math.BigDecimal"
51+
// This is a common pattern for representing decimal values as strings
52+
String schemaJson = "{\n" + " \"type\": \"record\",\n" + " \"name\": \"TestRecord\",\n" + " \"fields\": [\n"
53+
+ " {\"name\": \"id\", \"type\": \"string\"},\n" + " {\"name\": \"price\", \"type\": [\"null\", {\n"
54+
+ " \"type\": \"string\",\n" + " \"java-class\": \"java.math.BigDecimal\"\n" + " }]}\n" + " ]\n"
55+
+ "}";
56+
57+
Schema schema = new Schema.Parser().parse(schemaJson);
58+
59+
GenericRecord record = new GenericData.Record(schema);
60+
record.put("id", "123");
61+
record.put("price", "-0.0002");
62+
63+
ByteArrayOutputStream out = new ByteArrayOutputStream();
64+
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
65+
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
66+
writer.write(record, encoder);
67+
encoder.flush();
68+
69+
byte[] serialized = out.toByteArray();
70+
71+
// Deserialize using GenericDatumReader (which uses FastReaderBuilder by
72+
// default)
73+
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
74+
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serialized, null);
75+
76+
// AVRO-4225 this should not throw ClassCastException: Utf8 cannot be cast
77+
// to String
78+
GenericRecord result = reader.read(null, decoder);
79+
80+
assertNotNull(result);
81+
assertEquals("123", result.get("id").toString());
82+
assertEquals("-0.0002", result.get("price").toString());
83+
}
84+
85+
/**
86+
* Tests that GenericDatumReader can deserialize records with a direct string
87+
* field (not in a union) that has a "java-class" attribute.
88+
*/
89+
@Test
90+
void genericDatumReaderWithDirectJavaClassString() throws IOException {
91+
String schemaJson = "{\n" + " \"type\": \"record\",\n" + " \"name\": \"TestRecord\",\n" + " \"fields\": [\n"
92+
+ " {\"name\": \"amount\", \"type\": {\n" + " \"type\": \"string\",\n"
93+
+ " \"java-class\": \"java.math.BigDecimal\"\n" + " }}\n" + " ]\n" + "}";
94+
95+
Schema schema = new Schema.Parser().parse(schemaJson);
96+
97+
GenericRecord record = new GenericData.Record(schema);
98+
record.put("amount", "123.45");
99+
100+
ByteArrayOutputStream out = new ByteArrayOutputStream();
101+
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
102+
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
103+
writer.write(record, encoder);
104+
encoder.flush();
105+
106+
byte[] serialized = out.toByteArray();
107+
108+
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
109+
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(serialized, null);
110+
111+
GenericRecord result = reader.read(null, decoder);
112+
113+
assertNotNull(result);
114+
assertEquals("123.45", result.get("amount").toString());
115+
}
116+
}

0 commit comments

Comments
 (0)