Skip to content

Commit d9ac5f2

Browse files
committed
Address review feedback: defer IP/L2 distinction to Similarity objects
Move the metric-specific query pre-adjustment and raw-decode distance accumulation out of DCTemplate and into the Similarity classes, where the IP/L2 distinction is already managed. - Add a static adjust_query_for_raw_decode() method to each SimilarityL2 and SimilarityIP specialization (AVX512, AVX2, NEON). - Replace the if constexpr (Sim::metric_type == METRIC_L2) branches in DCTemplate::set_query() with a single call to Sim::adjust_query_for_raw_decode(). - Replace the hand-written SIMD loops in query_to_code_predecoded() with calls to the existing Similarity accumulator interface (begin_N / add_N_components / result_N). - Fix bench_scalar_quantizer.py: fix error by filter out QT_count since it's not a valid quantizer type. Signed-off-by: Mulugeta Mammo <[email protected]>
1 parent cc99465 commit d9ac5f2

4 files changed

Lines changed: 139 additions & 116 deletions

File tree

benchs/bench_scalar_quantizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
variants = [(name, getattr(faiss.ScalarQuantizer, name))
1919
for name in dir(faiss.ScalarQuantizer)
20-
if name.startswith('QT_')]
20+
if name.startswith('QT_') and name != 'QT_count']
2121

2222
quantizer = faiss.IndexFlatL2(d)
2323
# quantizer.add(np.zeros((1, d), dtype='float32'))

faiss/impl/scalar_quantizer/sq-avx2.cpp

Lines changed: 47 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,22 @@ struct SimilarityL2<SIMDLevel::AVX2> {
405405
const __m128 v3 = _mm_add_ps(v1, v2);
406406
return _mm_cvtss_f32(v3);
407407
}
408+
409+
static void adjust_query_for_raw_decode(
410+
const float* x,
411+
float* q_adj,
412+
size_t d,
413+
float vmin,
414+
float vdiff,
415+
float& scale_factor,
416+
float& bias) {
417+
float inv_vdiff = (vdiff != 0) ? 1.0f / vdiff : 0.0f;
418+
for (size_t i = 0; i < d; i++) {
419+
q_adj[i] = (x[i] - vmin) * inv_vdiff;
420+
}
421+
scale_factor = vdiff * vdiff;
422+
bias = 0;
423+
}
408424
};
409425

410426
template <>
@@ -448,6 +464,23 @@ struct SimilarityIP<SIMDLevel::AVX2> {
448464
const __m128 v3 = _mm_add_ps(v1, v2);
449465
return _mm_cvtss_f32(v3);
450466
}
467+
468+
static void adjust_query_for_raw_decode(
469+
const float* x,
470+
float* q_adj,
471+
size_t d,
472+
float vmin,
473+
float vdiff,
474+
float& scale_factor,
475+
float& bias) {
476+
float sum_q = 0;
477+
for (size_t i = 0; i < d; i++) {
478+
q_adj[i] = x[i];
479+
sum_q += x[i];
480+
}
481+
scale_factor = vdiff;
482+
bias = vmin * sum_q;
483+
}
451484
};
452485

453486
/**********************************************************
@@ -506,66 +539,25 @@ struct DCTemplate<Quantizer, Similarity, SIMDLevel::AVX2> : SQDistanceComputer {
506539
void set_query(const float* x) final {
507540
q = x;
508541
if constexpr (has_decode_raw()) {
509-
if constexpr (Sim::metric_type == METRIC_L2) {
510-
float inv_vdiff =
511-
(quant.vdiff != 0) ? 1.0f / quant.vdiff : 0.0f;
512-
for (size_t i = 0; i < quant.d; i++) {
513-
q_adj[i] = (x[i] - quant.vmin) * inv_vdiff;
514-
}
515-
scale_factor = quant.vdiff * quant.vdiff;
516-
bias = 0;
517-
} else {
518-
float sum_q = 0;
519-
for (size_t i = 0; i < quant.d; i++) {
520-
q_adj[i] = x[i];
521-
sum_q += x[i];
522-
}
523-
scale_factor = quant.vdiff;
524-
bias = quant.vmin * sum_q;
525-
}
542+
Sim::adjust_query_for_raw_decode(
543+
x,
544+
q_adj.data(),
545+
quant.d,
546+
quant.vmin,
547+
quant.vdiff,
548+
scale_factor,
549+
bias);
526550
}
527551
}
528552

529553
float query_to_code_predecoded(const uint8_t* code) const {
530-
__m256 acc0 = _mm256_setzero_ps();
531-
__m256 acc1 = _mm256_setzero_ps();
532-
const float* qptr = q_adj.data(); // hoist out of loop
533-
534-
size_t i = 0;
535-
for (; i + 16 <= quant.d; i += 16) {
536-
__m256 x0 = quant.decode_8_raw(code, static_cast<int>(i)).f;
537-
__m256 x1 = quant.decode_8_raw(code, static_cast<int>(i + 8)).f;
538-
__m256 q0 = _mm256_loadu_ps(qptr + i);
539-
__m256 q1 = _mm256_loadu_ps(qptr + i + 8);
540-
if constexpr (Sim::metric_type == METRIC_L2) {
541-
__m256 d0 = _mm256_sub_ps(q0, x0);
542-
__m256 d1 = _mm256_sub_ps(q1, x1);
543-
acc0 = _mm256_fmadd_ps(d0, d0, acc0);
544-
acc1 = _mm256_fmadd_ps(d1, d1, acc1);
545-
} else {
546-
acc0 = _mm256_fmadd_ps(q0, x0, acc0);
547-
acc1 = _mm256_fmadd_ps(q1, x1, acc1);
548-
}
549-
}
550-
// tail for remaining 8-lane block if d isn't a multiple of 16
551-
for (; i < quant.d; i += 8) {
552-
__m256 xi = quant.decode_8_raw(code, static_cast<int>(i)).f;
553-
__m256 qi = _mm256_loadu_ps(qptr + i);
554-
if constexpr (Sim::metric_type == METRIC_L2) {
555-
__m256 diff = _mm256_sub_ps(qi, xi);
556-
acc0 = _mm256_fmadd_ps(diff, diff, acc0);
557-
} else {
558-
acc0 = _mm256_fmadd_ps(qi, xi, acc0);
559-
}
554+
Similarity sim(q_adj.data());
555+
sim.begin_8();
556+
for (size_t i = 0; i < quant.d; i += 8) {
557+
simd8float32 xi = quant.decode_8_raw(code, static_cast<int>(i));
558+
sim.add_8_components(xi);
560559
}
561-
__m256 accu = _mm256_add_ps(acc0, acc1);
562-
563-
// horizontal sum
564-
__m128 sum = _mm_add_ps(
565-
_mm256_castps256_ps128(accu), _mm256_extractf128_ps(accu, 1));
566-
sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
567-
sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 1));
568-
return bias + scale_factor * _mm_cvtss_f32(sum);
560+
return bias + scale_factor * sim.result_8();
569561
}
570562

571563
float symmetric_dis(idx_t i, idx_t j) override {

faiss/impl/scalar_quantizer/sq-avx512.cpp

Lines changed: 46 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,22 @@ struct SimilarityL2<SIMDLevel::AVX512> {
417417
FAISS_ALWAYS_INLINE float result_16() {
418418
return horizontal_add(accu16);
419419
}
420+
421+
static void adjust_query_for_raw_decode(
422+
const float* x,
423+
float* q_adj,
424+
size_t d,
425+
float vmin,
426+
float vdiff,
427+
float& scale_factor,
428+
float& bias) {
429+
float inv_vdiff = (vdiff != 0) ? 1.0f / vdiff : 0.0f;
430+
for (size_t i = 0; i < d; i++) {
431+
q_adj[i] = (x[i] - vmin) * inv_vdiff;
432+
}
433+
scale_factor = vdiff * vdiff;
434+
bias = 0;
435+
}
420436
};
421437

422438
template <>
@@ -451,6 +467,23 @@ struct SimilarityIP<SIMDLevel::AVX512> {
451467
FAISS_ALWAYS_INLINE float result_16() {
452468
return horizontal_add(accu16);
453469
}
470+
471+
static void adjust_query_for_raw_decode(
472+
const float* x,
473+
float* q_adj,
474+
size_t d,
475+
float vmin,
476+
float vdiff,
477+
float& scale_factor,
478+
float& bias) {
479+
float sum_q = 0;
480+
for (size_t i = 0; i < d; i++) {
481+
q_adj[i] = x[i];
482+
sum_q += x[i];
483+
}
484+
scale_factor = vdiff;
485+
bias = vmin * sum_q;
486+
}
454487
};
455488

456489
/**********************************************************
@@ -507,39 +540,25 @@ struct DCTemplate<Quantizer, Similarity, SIMDLevel::AVX512>
507540
void set_query(const float* x) final {
508541
q = x;
509542
if constexpr (has_decode_raw()) {
510-
if constexpr (Sim::metric_type == METRIC_L2) {
511-
float inv_vdiff =
512-
(quant.vdiff != 0) ? 1.0f / quant.vdiff : 0.0f;
513-
for (size_t i = 0; i < quant.d; i++) {
514-
q_adj[i] = (x[i] - quant.vmin) * inv_vdiff;
515-
}
516-
scale_factor = quant.vdiff * quant.vdiff;
517-
bias = 0;
518-
} else {
519-
float sum_q = 0;
520-
for (size_t i = 0; i < quant.d; i++) {
521-
q_adj[i] = x[i];
522-
sum_q += x[i];
523-
}
524-
scale_factor = quant.vdiff;
525-
bias = quant.vmin * sum_q;
526-
}
543+
Sim::adjust_query_for_raw_decode(
544+
x,
545+
q_adj.data(),
546+
quant.d,
547+
quant.vmin,
548+
quant.vdiff,
549+
scale_factor,
550+
bias);
527551
}
528552
}
529553

530554
float query_to_code_predecoded(const uint8_t* code) const {
531-
__m512 accu = _mm512_setzero_ps();
555+
Similarity sim(q_adj.data());
556+
sim.begin_16();
532557
for (size_t i = 0; i < quant.d; i += 16) {
533-
__m512 xi = quant.decode_16_raw(code, i).f;
534-
__m512 qi = _mm512_loadu_ps(q_adj.data() + i);
535-
if constexpr (Sim::metric_type == METRIC_L2) {
536-
__m512 diff = _mm512_sub_ps(qi, xi);
537-
accu = _mm512_fmadd_ps(diff, diff, accu);
538-
} else {
539-
accu = _mm512_fmadd_ps(qi, xi, accu);
540-
}
558+
simd16float32 xi = quant.decode_16_raw(code, i);
559+
sim.add_16_components(xi);
541560
}
542-
return bias + scale_factor * _mm512_reduce_add_ps(accu);
561+
return bias + scale_factor * sim.result_16();
543562
}
544563

545564
float symmetric_dis(idx_t i, idx_t j) override {

faiss/impl/scalar_quantizer/sq-neon.cpp

Lines changed: 45 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,22 @@ struct SimilarityL2<SIMDLevel::ARM_NEON> {
403403
FAISS_ALWAYS_INLINE float result_8() {
404404
return horizontal_add(accu8);
405405
}
406+
407+
static void adjust_query_for_raw_decode(
408+
const float* x,
409+
float* q_adj,
410+
size_t d,
411+
float vmin,
412+
float vdiff,
413+
float& scale_factor,
414+
float& bias) {
415+
float inv_vdiff = (vdiff != 0) ? 1.0f / vdiff : 0.0f;
416+
for (size_t i = 0; i < d; i++) {
417+
q_adj[i] = (x[i] - vmin) * inv_vdiff;
418+
}
419+
scale_factor = vdiff * vdiff;
420+
bias = 0;
421+
}
406422
};
407423

408424
template <>
@@ -437,6 +453,23 @@ struct SimilarityIP<SIMDLevel::ARM_NEON> {
437453
FAISS_ALWAYS_INLINE float result_8() {
438454
return horizontal_add(accu8);
439455
}
456+
457+
static void adjust_query_for_raw_decode(
458+
const float* x,
459+
float* q_adj,
460+
size_t d,
461+
float vmin,
462+
float vdiff,
463+
float& scale_factor,
464+
float& bias) {
465+
float sum_q = 0;
466+
for (size_t i = 0; i < d; i++) {
467+
q_adj[i] = x[i];
468+
sum_q += x[i];
469+
}
470+
scale_factor = vdiff;
471+
bias = vmin * sum_q;
472+
}
440473
};
441474

442475
/**********************************************************
@@ -493,46 +526,25 @@ struct DCTemplate<Quantizer, Similarity, SIMDLevel::ARM_NEON>
493526
void set_query(const float* x) final {
494527
q = x;
495528
if constexpr (has_decode_raw()) {
496-
if constexpr (Sim::metric_type == METRIC_L2) {
497-
float inv_vdiff =
498-
(quant.vdiff != 0) ? 1.0f / quant.vdiff : 0.0f;
499-
for (size_t i = 0; i < quant.d; i++) {
500-
q_adj[i] = (x[i] - quant.vmin) * inv_vdiff;
501-
}
502-
scale_factor = quant.vdiff * quant.vdiff;
503-
bias = 0;
504-
} else {
505-
float sum_q = 0;
506-
for (size_t i = 0; i < quant.d; i++) {
507-
q_adj[i] = x[i];
508-
sum_q += x[i];
509-
}
510-
scale_factor = quant.vdiff;
511-
bias = quant.vmin * sum_q;
512-
}
529+
Sim::adjust_query_for_raw_decode(
530+
x,
531+
q_adj.data(),
532+
quant.d,
533+
quant.vmin,
534+
quant.vdiff,
535+
scale_factor,
536+
bias);
513537
}
514538
}
515539

516540
float query_to_code_predecoded(const uint8_t* code) const {
517-
float32x4_t accu0 = vdupq_n_f32(0);
518-
float32x4_t accu1 = vdupq_n_f32(0);
541+
Similarity sim(q_adj.data());
542+
sim.begin_8();
519543
for (size_t i = 0; i < quant.d; i += 8) {
520544
simd8float32 xi = quant.decode_8_raw(code, i);
521-
float32x4_t qi0 = vld1q_f32(q_adj.data() + i);
522-
float32x4_t qi1 = vld1q_f32(q_adj.data() + i + 4);
523-
if constexpr (Sim::metric_type == METRIC_L2) {
524-
float32x4_t d0 = vsubq_f32(qi0, xi.data.val[0]);
525-
float32x4_t d1 = vsubq_f32(qi1, xi.data.val[1]);
526-
accu0 = vfmaq_f32(accu0, d0, d0);
527-
accu1 = vfmaq_f32(accu1, d1, d1);
528-
} else {
529-
accu0 = vfmaq_f32(accu0, qi0, xi.data.val[0]);
530-
accu1 = vfmaq_f32(accu1, qi1, xi.data.val[1]);
531-
}
545+
sim.add_8_components(xi);
532546
}
533-
float32x4_t sum4 = vaddq_f32(accu0, accu1);
534-
float result = vaddvq_f32(sum4);
535-
return bias + scale_factor * result;
547+
return bias + scale_factor * sim.result_8();
536548
}
537549

538550
float symmetric_dis(idx_t i, idx_t j) override {

0 commit comments

Comments
 (0)