Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions benchs/bench_flat_l2_panorama.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import argparse
import multiprocessing as mp
import time

Expand All @@ -11,11 +12,18 @@
import numpy as np

try:
from faiss.contrib.datasets_fb import DatasetGIST1M
from faiss.contrib.datasets_fb import DatasetSIFT1M, DatasetGIST1M
except ImportError:
from faiss.contrib.datasets import DatasetGIST1M
from faiss.contrib.datasets import DatasetSIFT1M, DatasetGIST1M

ds = DatasetGIST1M()
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", default="gist1m", choices=["sift1m", "gist1m"])
args = parser.parse_args()

if args.dataset == "sift1m":
ds = DatasetSIFT1M()
else:
ds = DatasetGIST1M()

nq = 10
xq = ds.get_queries()[:nq]
Expand Down Expand Up @@ -60,7 +68,7 @@ def build_index(name):
return index


nlevels = 8
nlevels = 16 if args.dataset == "gist1m" else 8
batch_size = 512

plt.figure(figsize=(8, 6), dpi=80)
Expand Down Expand Up @@ -93,7 +101,8 @@ def build_index(name):
)
plt.xticks(x, labels, rotation=0)
plt.ylabel("QPS")
plt.title("Flat Indexes on GIST1M")
dataset_label = args.dataset.upper()
plt.title(f"Flat Indexes on {dataset_label}")

plt.tight_layout()
plt.savefig("bench_flat_l2_panorama.png", bbox_inches="tight")
plt.savefig(f"bench_flat_l2_panorama_{args.dataset}.png", bbox_inches="tight")
24 changes: 16 additions & 8 deletions benchs/bench_ivf_flat_panorama.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import argparse
import multiprocessing as mp
import time

Expand All @@ -11,11 +12,18 @@
import numpy as np

try:
from faiss.contrib.datasets_fb import DatasetGIST1M
from faiss.contrib.datasets_fb import DatasetSIFT1M, DatasetGIST1M
except ImportError:
from faiss.contrib.datasets import DatasetGIST1M
from faiss.contrib.datasets import DatasetSIFT1M, DatasetGIST1M

ds = DatasetGIST1M()
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", default="gist1m", choices=["sift1m", "gist1m"])
args = parser.parse_args()

if args.dataset == "sift1m":
ds = DatasetSIFT1M()
else:
ds = DatasetGIST1M()

xq = ds.get_queries()
xb = ds.get_database()
Expand All @@ -29,7 +37,7 @@

k = 10
gt = gt[:, :k]
nlevels = 8
nlevels = 16 if args.dataset == "gist1m" else 8


def get_ivf_index(index):
Expand Down Expand Up @@ -90,12 +98,12 @@ def eval_and_plot(name, plot=True):
eval_and_plot(f"IVF{nlist},Flat")

# IVFFlatPanorama (with PCA transform to concentrate energy in early dimensions)
eval_and_plot(f"PCA{d},IVF{nlist},FlatPanorama{nlevels}")
eval_and_plot(f"PCA{d},IVF{nlist},FlatPanorama{nlevels}_{1024}")

plt.title("IVF Flat Indexes on GIST1M")
plt.title("Indices on GIST1M")
dataset_label = args.dataset.upper()
plt.title(f"IVF Flat Indexes on {dataset_label}")
plt.xlabel(f"Recall@{k}")
plt.ylabel("QPS")
plt.yscale("log")
plt.legend(bbox_to_anchor=(1.02, 0.1), loc="upper left", borderaxespad=0)
plt.savefig("bench_ivf_flat_panorama.png", bbox_inches="tight")
plt.savefig(f"bench_ivf_flat_panorama_{args.dataset}.png", bbox_inches="tight")
13 changes: 11 additions & 2 deletions faiss/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -362,14 +362,23 @@ endif()
# Export FAISS_HEADERS variable to parent scope.
set(FAISS_HEADERS ${FAISS_HEADERS} PARENT_SCOPE)

# Detect BMI2 compiler support.
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-mbmi2" COMPILER_SUPPORTS_BMI2)
if(COMPILER_SUPPORTS_BMI2)
set(FAISS_BMI2_FLAGS "-mbmi2")
else()
set(FAISS_BMI2_FLAGS "")
endif()

add_library(faiss ${FAISS_SRC})

add_library(faiss_avx2 ${FAISS_SRC})
if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512_spr")
set_target_properties(faiss_avx2 PROPERTIES EXCLUDE_FROM_ALL TRUE)
endif()
if(NOT WIN32)
target_compile_options(faiss_avx2 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma -mf16c -mpopcnt>)
target_compile_options(faiss_avx2 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma -mf16c -mpopcnt ${FAISS_BMI2_FLAGS}>)
else()
# MSVC enables FMA with /arch:AVX2; no separate flags for F16C, POPCNT
# Ref. FMA (under /arch:AVX2): https://docs.microsoft.com/en-us/cpp/build/reference/arch-x64
Expand All @@ -389,7 +398,7 @@ endif()
if(NOT WIN32)
# All modern CPUs support F, CD, VL, DQ, BW extensions.
# Ref: https://en.wikipedia.org/wiki/AVX512
target_compile_options(faiss_avx512 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma -mf16c -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mpopcnt>)
target_compile_options(faiss_avx512 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma -mf16c -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mpopcnt ${FAISS_BMI2_FLAGS}>)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will have to add this to avx512_spr as well once #5034 goes in.

else()
target_compile_options(faiss_avx512 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/arch:AVX512>)
# we need bigobj for the swig wrapper
Expand Down
6 changes: 5 additions & 1 deletion faiss/IndexFlat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -628,8 +628,10 @@ inline void flat_pano_search_core(
SingleResultHandler res(handler);

std::vector<float> query_cum_norms(index.n_levels + 1);
std::vector<float> exact_distances(index.batch_size);
std::vector<uint32_t> active_indices(index.batch_size);
std::vector<uint8_t> active_byteset(index.batch_size);
std::vector<float> exact_distances(index.batch_size);
std::vector<float> dot_buffer(index.batch_size);

#pragma omp for
for (int64_t i = 0; i < n; i++) {
Expand Down Expand Up @@ -664,7 +666,9 @@ inline void flat_pano_search_core(
nullptr,
use_sel,
active_indices,
active_byteset,
exact_distances,
dot_buffer,
threshold,
local_stats);
});
Expand Down
45 changes: 28 additions & 17 deletions faiss/IndexIVFFlatPanorama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,23 @@ IndexIVFFlatPanorama::IndexIVFFlatPanorama(
size_t nlist_in,
int n_levels_in,
MetricType metric,
bool own_invlists_in)
bool own_invlists_in,
size_t batch_size_in)
: IndexIVFFlat(quantizer_in, d_in, nlist_in, metric, false),
n_levels(n_levels_in) {
n_levels(n_levels_in),
batch_size(batch_size_in) {
FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT);

// We construct the inverted lists here so that we can use the
// level-oriented storage. This does not cause a leak as we constructed
// IndexIVF first, with own_invlists set to false.
this->invlists = new ArrayInvertedListsPanorama(nlist, code_size, n_levels);
this->invlists = new ArrayInvertedListsPanorama(
nlist, code_size, n_levels, batch_size);
this->own_invlists = own_invlists_in;
}

IndexIVFFlatPanorama::IndexIVFFlatPanorama() : n_levels(0) {}
IndexIVFFlatPanorama::IndexIVFFlatPanorama()
: n_levels(0), batch_size(Panorama::kDefaultBatchSize) {}

namespace {

Expand All @@ -55,6 +59,11 @@ struct IVFFlatScannerPanorama : InvertedListScanner {
using C = typename VectorDistance::C;
static constexpr MetricType metric = VectorDistance::metric;

mutable std::vector<uint32_t> active_indices_;
mutable std::vector<uint8_t> active_byteset_;
mutable std::vector<float> exact_distances_;
mutable std::vector<float> dot_buffer_;

IVFFlatScannerPanorama(
const VectorDistance& vd_in,
const ArrayInvertedListsPanorama* storage_in,
Expand All @@ -65,7 +74,11 @@ struct IVFFlatScannerPanorama : InvertedListScanner {
storage(storage_in) {
keep_max = vd.is_similarity;
code_size = vd.d * sizeof(float);
cum_sums.resize(storage->n_levels + 1);
cum_sums.resize(storage->pano.n_levels + 1);
active_indices_.resize(storage->pano.batch_size);
active_byteset_.resize(storage->pano.batch_size);
exact_distances_.resize(storage->pano.batch_size);
dot_buffer_.resize(storage->pano.batch_size);
}

const float* xi = nullptr;
Expand All @@ -90,27 +103,24 @@ struct IVFFlatScannerPanorama : InvertedListScanner {
}

using InvertedListScanner::scan_codes;

size_t scan_codes(
size_t list_size,
const uint8_t* codes,
const idx_t* ids,
ResultHandler& handler) const override {
size_t nup = 0;

const size_t n_batches =
(list_size + storage->kBatchSize - 1) / storage->kBatchSize;
const size_t bs = storage->pano.batch_size;
const size_t n_batches = (list_size + bs - 1) / bs;

const float* cum_sums_data = storage->get_cum_sums(list_no);

std::vector<float> exact_distances(storage->kBatchSize);
std::vector<uint32_t> active_indices(storage->kBatchSize);

PanoramaStats local_stats;
local_stats.reset();

for (size_t batch_no = 0; batch_no < n_batches; batch_no++) {
size_t batch_start = batch_no * storage->kBatchSize;

size_t batch_start = batch_no * bs;
size_t num_active = with_metric_type(metric, [&]<MetricType M>() {
return storage->pano.progressive_filter_batch<C, M>(
codes,
Expand All @@ -122,17 +132,18 @@ struct IVFFlatScannerPanorama : InvertedListScanner {
sel,
ids,
use_sel,
active_indices,
exact_distances,
active_indices_,
active_byteset_,
exact_distances_,
dot_buffer_,
handler.threshold,
local_stats);
});

// Add batch survivors to heap.
for (size_t i = 0; i < num_active; i++) {
uint32_t idx = active_indices[i];
uint32_t idx = active_indices_[i];
size_t global_idx = batch_start + idx;
float dis = exact_distances[idx];
float dis = exact_distances_[idx];

if (C::cmp(handler.threshold, dis)) {
int64_t id = store_pairs ? lo_build(list_no, global_idx)
Expand Down
4 changes: 3 additions & 1 deletion faiss/IndexIVFFlatPanorama.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ namespace faiss {
/// `ArrayInvertedListsPanorama`, which is a struct member of `IndexIVF`.
struct IndexIVFFlatPanorama : IndexIVFFlat {
size_t n_levels;
size_t batch_size;

std::vector<MaybeOwnedVector<float>> cum_sums;

Expand All @@ -46,7 +47,8 @@ struct IndexIVFFlatPanorama : IndexIVFFlat {
size_t nlist_,
int n_levels,
MetricType = METRIC_L2,
bool own_invlists = true);
bool own_invlists = true,
size_t batch_size = Panorama::kDefaultBatchSize);

InvertedListScanner* get_InvertedListScanner(
bool store_pairs,
Expand Down
Loading
Loading