Skip to content

Commit b5ac782

Browse files
Michael Norrisfacebook-github-bot
authored andcommitted
Support SVSVamana as IVF coarse quantizer via index_factory
Summary: Enable IndexSVSVamana to be used as a coarse quantizer for Faiss IVF indexes, following the same pattern as IndexHNSWFlat and IndexNSGFlat. Changes: 1. index_factory.cpp: Add `IVF<nlist>_SVSVamana<degree>` regex pattern in `parse_coarse_quantizer()` and explicit `get_trains_alone()` return for IndexSVSVamana (training mode 2: k-means on flat index, then centroids added to the graph). Both guarded by `#ifdef FAISS_ENABLE_SVS`. 2. IndexSVSVamana.h/.cpp: Add `reconstruct()` support with a `stored_vectors` buffer. The SVS runtime API does not expose vector retrieval, so we keep a copy of added vectors. This is needed for IVF residual computation (`by_residual = true`) and `check_compatible_for_merge()`. When used as a coarse quantizer, the buffer holds only `nlist` centroids (trivial memory cost). Also clears `stored_vectors` on `reset()`. Example factory string: `"IVF65536_SVSVamana32,SQ8"` Differential Revision: D103568560
1 parent 417c53e commit b5ac782

5 files changed

Lines changed: 53 additions & 2 deletions

File tree

faiss/impl/index_read.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2519,13 +2519,14 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
25192519
}
25202520
#ifdef FAISS_ENABLE_SVS
25212521
else if (
2522-
h == fourcc("ILVQ") || h == fourcc("ISVL") || h == fourcc("ISVD")) {
2522+
h == fourcc("ILVQ") || h == fourcc("ISVL") || h == fourcc("ISVD") ||
2523+
h == fourcc("ISV2")) {
25232524
std::unique_ptr<IndexSVSVamana> svs;
25242525
if (h == fourcc("ILVQ")) {
25252526
svs = std::make_unique<IndexSVSVamanaLVQ>();
25262527
} else if (h == fourcc("ISVL")) {
25272528
svs = std::make_unique<IndexSVSVamanaLeanVec>();
2528-
} else if (h == fourcc("ISVD")) {
2529+
} else if (h == fourcc("ISVD") || h == fourcc("ISV2")) {
25292530
svs = std::make_unique<IndexSVSVamana>();
25302531
}
25312532

@@ -2577,6 +2578,9 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
25772578
leanvec->deserialize_training_data(is);
25782579
}
25792580
}
2581+
if (h == fourcc("ISV2")) {
2582+
READVECTOR(svs->stored_vectors);
2583+
}
25802584
idx = std::move(svs);
25812585
} else if (h == fourcc("ISVF")) {
25822586
auto svs = std::make_unique<IndexSVSFlat>();

faiss/impl/index_write.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,8 @@ void write_index(const Index* idx, IOWriter* f, int io_flags) {
10271027
h = fourcc("ILVQ"); // LVQ
10281028
} else if (lean != nullptr) {
10291029
h = fourcc("ISVL"); // LeanVec
1030+
} else if (svs->stored_vectors_valid && !svs->stored_vectors.empty()) {
1031+
h = fourcc("ISV2"); // uncompressed + stored_vectors
10301032
} else {
10311033
h = fourcc("ISVD"); // uncompressed
10321034
}
@@ -1069,6 +1071,10 @@ void write_index(const Index* idx, IOWriter* f, int io_flags) {
10691071
os.flush();
10701072
}
10711073
}
1074+
1075+
if (h == fourcc("ISV2")) {
1076+
WRITEVECTOR(svs->stored_vectors);
1077+
}
10721078
} else if (
10731079
const IndexSVSFlat* svs = dynamic_cast<const IndexSVSFlat*>(idx)) {
10741080
uint32_t h = fourcc("ISVF");

faiss/index_factory.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,11 @@ char get_trains_alone(const Index* coarse_quantizer) {
130130
if (dynamic_cast<const IndexHNSWFlat*>(coarse_quantizer)) {
131131
return 2;
132132
}
133+
#ifdef FAISS_ENABLE_SVS
134+
if (dynamic_cast<const IndexSVSVamana*>(coarse_quantizer)) {
135+
return 2;
136+
}
137+
#endif
133138
return 2; // for complicated indexes, we assume they can't be used as a
134139
// kmeans index
135140
}
@@ -299,6 +304,13 @@ Index* parse_coarse_quantizer(
299304
int R = std::stoi(sm[2]);
300305
return new IndexNSGFlat(d, R, mt);
301306
}
307+
#ifdef FAISS_ENABLE_SVS
308+
if (match("IVF([0-9]+[kM]?)_SVSVamana([0-9]*)")) {
309+
nlist = parse_nlist(sm[1].str());
310+
int degree = sm[2].length() > 0 ? std::stoi(sm[2]) : 32;
311+
return new IndexSVSVamana(d, degree, mt);
312+
}
313+
#endif
302314
if (match("IVF([0-9]+[kM]?)\\(Index([0-9])\\)")) {
303315
nlist = parse_nlist(sm[1].str());
304316
int no = std::stoi(sm[2].str());

faiss/svs/IndexSVSVamana.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <svs/runtime/vamana_index.h>
3232

3333
#include <cstddef>
34+
#include <cstring>
3435
#include <numeric>
3536
#include <span>
3637
#include <type_traits>
@@ -114,13 +115,30 @@ void IndexSVSVamana::add(idx_t n, const float* x) {
114115
if (!status.ok()) {
115116
FAISS_THROW_MSG(status.message());
116117
}
118+
119+
size_t prev = static_cast<size_t>(ntotal) * d;
120+
stored_vectors.resize(prev + static_cast<size_t>(n) * d);
121+
std::memcpy(stored_vectors.data() + prev, x, sizeof(float) * n * d);
117122
ntotal += n;
118123
}
119124

125+
void IndexSVSVamana::reconstruct(idx_t key, float* recons) const {
126+
FAISS_THROW_IF_NOT_MSG(
127+
key >= 0 && key < ntotal,
128+
"IndexSVSVamana::reconstruct: key out of range");
129+
FAISS_THROW_IF_NOT_MSG(
130+
stored_vectors_valid && !stored_vectors.empty(),
131+
"IndexSVSVamana::reconstruct: stored_vectors unavailable "
132+
"(invalidated by remove_ids or not restored after deserialization)");
133+
std::memcpy(recons, stored_vectors.data() + key * d, sizeof(float) * d);
134+
}
135+
120136
void IndexSVSVamana::reset() {
121137
if (impl) {
122138
impl->reset();
123139
}
140+
stored_vectors.clear();
141+
stored_vectors_valid = true;
124142
is_trained = false;
125143
ntotal = 0;
126144
}
@@ -189,6 +207,8 @@ size_t IndexSVSVamana::remove_ids(const IDSelector& sel) {
189207
size_t removed = 0;
190208
auto Status = impl->remove_selected(&removed, id_filter);
191209
ntotal -= removed;
210+
stored_vectors.clear();
211+
stored_vectors_valid = false;
192212
return removed;
193213
}
194214

faiss/svs/IndexSVSVamana.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <svs/runtime/dynamic_vamana_index.h>
3131

3232
#include <iostream>
33+
#include <vector>
3334

3435
namespace faiss {
3536

@@ -108,6 +109,8 @@ struct IndexSVSVamana : Index {
108109

109110
void add(idx_t n, const float* x) override;
110111

112+
void reconstruct(idx_t key, float* recons) const override;
113+
111114
void search(
112115
idx_t n,
113116
const float* x,
@@ -134,6 +137,12 @@ struct IndexSVSVamana : Index {
134137
/* The actual SVS implementation */
135138
svs_runtime::DynamicVamanaIndex* impl{nullptr};
136139

140+
// The SVS runtime API does not expose vector retrieval, so we keep a copy
141+
// of added vectors to support reconstruct(). When used as a coarse
142+
// quantizer this holds only nlist centroids.
143+
std::vector<float> stored_vectors;
144+
bool stored_vectors_valid{true};
145+
137146
protected:
138147
/* Initializes the implementation*/
139148
virtual void create_impl();

0 commit comments

Comments
 (0)