Skip to content

Commit 87a9ef7

Browse files
peterenescumeta-codesync[bot]
authored andcommitted
fix: Allow Nimble writer to write flattened and wrapped FlatMapVector (#634)
Summary: Pull Request resolved: #634 Nimble writer expects a flattened FlatMapVector during `ingestFlatMap` function call. This will not work for wrapped FlatMapVectors, for example, vectors from MergeJoin. Updates our switch case to check the wrapped vector rather than the top level vector encoding. Also updates ingestFlatMap to handle flattened and decoded vectors. Reviewed By: HuamengJiang Differential Revision: D99153405 fbshipit-source-id: 10a5416e2190de838131ca4ab74cda4cc3f7b795
1 parent 1014d16 commit 87a9ef7

2 files changed

Lines changed: 100 additions & 10 deletions

File tree

dwio/nimble/velox/FieldWriter.cpp

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,7 +1410,7 @@ class FlatMapFieldWriter : public FieldWriter {
14101410
return;
14111411

14121412
case velox::TypeKind::MAP: {
1413-
switch (vector->encoding()) {
1413+
switch (vector->wrappedVector()->encoding()) {
14141414
case velox::VectorEncoding::Simple::FLAT_MAP:
14151415
ingestFlatMap(vector, ranges);
14161416
return;
@@ -1588,18 +1588,32 @@ class FlatMapFieldWriter : public FieldWriter {
15881588
NIMBLE_CHECK(
15891589
currentValueFields_.empty() && allValueFields_.empty(),
15901590
"Mixing map and flatmap vectors in the FlatMapFieldWriter is not supported");
1591-
const auto* flatMapVector = vector->asChecked<velox::FlatMapVector>();
15921591
const auto size = ranges.size();
1593-
nullsStream_.ensureAdditionalNullsCapacity(
1594-
flatMapVector->mayHaveNulls(), size);
1592+
const velox::FlatMapVector* flatMapVector =
1593+
vector->as<velox::FlatMapVector>();
15951594

1596-
// First write top-level nulls, collecting the non-nulls ranges to write.
15971595
OrderedRanges childRanges;
1598-
const uint64_t nonNullCount = iterateNonNullIndices<true>(
1599-
ranges,
1600-
nullsStream_.mutableNonNulls(),
1601-
FlatAdapter<>{vector},
1602-
[&](auto offset) { childRanges.add(offset, 1); });
1596+
uint64_t nonNullCount;
1597+
if (flatMapVector) {
1598+
nullsStream_.ensureAdditionalNullsCapacity(
1599+
flatMapVector->mayHaveNulls(), size);
1600+
nonNullCount = iterateNonNullIndices<true>(
1601+
ranges,
1602+
nullsStream_.mutableNonNulls(),
1603+
FlatAdapter<>{vector},
1604+
[&](auto offset) { childRanges.add(offset, 1); });
1605+
} else {
1606+
auto decodingContext = context_.decodingContext();
1607+
auto& decoded = decodingContext.decode(vector, ranges);
1608+
flatMapVector =
1609+
decoded.base()->template asChecked<velox::FlatMapVector>();
1610+
nullsStream_.ensureAdditionalNullsCapacity(decoded.mayHaveNulls(), size);
1611+
nonNullCount = iterateNonNullIndices<true>(
1612+
ranges,
1613+
nullsStream_.mutableNonNulls(),
1614+
DecodedAdapter<>{decoded},
1615+
[&](auto offset) { childRanges.add(offset, 1); });
1616+
}
16031617

16041618
collectStatistics(size - nonNullCount, size);
16051619
// For FlatMapVector ingestion, we need to compute the total key count by

dwio/nimble/velox/tests/VeloxReaderTest.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "velox/type/Type.h"
4545
#include "velox/vector/BaseVector.h"
4646
#include "velox/vector/ComplexVector.h"
47+
#include "velox/vector/FlatMapVector.h"
4748
#include "velox/vector/NullsBuilder.h"
4849
#include "velox/vector/fuzzer/VectorFuzzer.h"
4950
#include "velox/vector/tests/utils/VectorMaker.h"
@@ -3450,6 +3451,81 @@ bool compareMapToFlatMap(
34503451
return true;
34513452
}
34523453

3454+
TEST_P(VeloxReaderTest, flatMapDictionaryWrappedFlatMapVector) {
3455+
auto type =
3456+
velox::ROW({{"flat_map", velox::MAP(velox::INTEGER(), velox::BIGINT())}});
3457+
3458+
facebook::velox::test::VectorMaker vectorMaker(leafPool_.get());
3459+
3460+
constexpr int numRows = 4;
3461+
constexpr int numKeys = 3;
3462+
auto keysVector = vectorMaker.flatVector<int32_t>({1, 2, 3});
3463+
std::vector<velox::VectorPtr> mapValues;
3464+
mapValues.push_back(vectorMaker.flatVector<int64_t>({10, 11, 12, 13}));
3465+
mapValues.push_back(vectorMaker.flatVector<int64_t>({20, 21, 22, 23}));
3466+
mapValues.push_back(vectorMaker.flatVector<int64_t>({30, 31, 32, 33}));
3467+
3468+
std::vector<velox::BufferPtr> inMaps;
3469+
for (int i = 0; i < numKeys; ++i) {
3470+
auto inMap = velox::AlignedBuffer::allocate<bool>(numRows, leafPool_.get());
3471+
auto rawInMap = inMap->asMutable<uint64_t>();
3472+
for (int row = 0; row < numRows; ++row) {
3473+
velox::bits::setBit(rawInMap, row, true);
3474+
}
3475+
inMaps.push_back(std::move(inMap));
3476+
}
3477+
3478+
auto flatMapVector = std::make_shared<velox::FlatMapVector>(
3479+
leafPool_.get(),
3480+
velox::MAP(velox::INTEGER(), velox::BIGINT()),
3481+
velox::BufferPtr(nullptr),
3482+
numRows,
3483+
keysVector,
3484+
std::move(mapValues),
3485+
std::move(inMaps));
3486+
3487+
auto indices = velox::AlignedBuffer::allocate<velox::vector_size_t>(
3488+
numRows, leafPool_.get());
3489+
auto rawIndices = indices->asMutable<velox::vector_size_t>();
3490+
rawIndices[0] = 2;
3491+
rawIndices[1] = 0;
3492+
rawIndices[2] = 3;
3493+
rawIndices[3] = 1;
3494+
auto dictionaryWrapped = velox::BaseVector::wrapInDictionary(
3495+
velox::BufferPtr(nullptr), indices, numRows, flatMapVector);
3496+
3497+
auto rowVector = vectorMaker.rowVector({"flat_map"}, {dictionaryWrapped});
3498+
3499+
auto expected = vectorMaker.rowVector(
3500+
{"flat_map"},
3501+
{vectorMaker.mapVector<int32_t, int64_t>(
3502+
{{{1, 12}, {2, 22}, {3, 32}},
3503+
{{1, 10}, {2, 20}, {3, 30}},
3504+
{{1, 13}, {2, 23}, {3, 33}},
3505+
{{1, 11}, {2, 21}, {3, 31}}})});
3506+
3507+
std::string file;
3508+
auto writeFile = std::make_unique<velox::InMemoryWriteFile>(&file);
3509+
auto writerOptions = createFlatMapWriterOptions();
3510+
writerOptions.flatMapColumns["flat_map"];
3511+
nimble::VeloxWriter writer(
3512+
type, std::move(writeFile), *rootPool_, std::move(writerOptions));
3513+
writer.write(rowVector);
3514+
writer.close();
3515+
3516+
velox::InMemoryReadFile readFile(file);
3517+
auto selector = std::make_shared<velox::dwio::common::ColumnSelector>(type);
3518+
nimble::VeloxReader reader(
3519+
&readFile, *leafPool_, std::move(selector), createReadParams());
3520+
velox::VectorPtr output;
3521+
uint64_t rowCount = numRows;
3522+
ASSERT_TRUE(reader.next(rowCount, output));
3523+
ASSERT_EQ(output->size(), numRows);
3524+
for (auto i = 0; i < numRows; ++i) {
3525+
EXPECT_TRUE(vectorEquals(expected, output, i)) << "Mismatch at row " << i;
3526+
}
3527+
}
3528+
34533529
TEST_P(VeloxReaderTest, flatMapNullValues) {
34543530
testFlatMapNullValues<int8_t>();
34553531
testFlatMapNullValues<int16_t>();

0 commit comments

Comments
 (0)