Skip to content

Commit 3f541ad

Browse files
committed
Added test for testing the new OpenAI api
Signed-off-by: Chaitany patel <[email protected]>
1 parent e96a116 commit 3f541ad

2 files changed

Lines changed: 473 additions & 1 deletion

File tree

sdk/python/tests/integration/online_store/test_universal_online.py

Lines changed: 313 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,15 @@
1414

1515
from feast import FeatureStore
1616
from feast.entity import Entity
17-
from feast.errors import FeatureNameCollisionError
17+
from feast.errors import FeatureNameCollisionError, FeatureViewNotFoundException
1818
from feast.feature_service import FeatureService
1919
from feast.feature_view import FeatureView
2020
from feast.field import Field
21+
from feast.filter_models import ComparisonFilter, CompoundFilter
2122
from feast.infra.offline_stores.file_source import FileSource
2223
from feast.infra.utils.postgres.postgres_config import ConnectionType
2324
from feast.online_response import TIMESTAMP_POSTFIX
25+
from feast.repo_config import EmbeddingModelConfig
2426
from feast.types import (
2527
Array,
2628
Float32,
@@ -1265,3 +1267,313 @@ def test_retrieve_online_documents_v2(environment, fake_document_data):
12651267
assert len(no_match_results["text_field"]) == 0
12661268
assert "text_rank" in no_match_results
12671269
assert len(no_match_results["text_rank"]) == 0
1270+
1271+
1272+
def _setup_documents_with_categories(fs):
1273+
"""Shared helper that creates and populates a feature view with embeddings,
1274+
text, and category fields. Returns (feature_view, entity, dataframe)."""
1275+
n_rows = 20
1276+
vector_dim = 2
1277+
random.seed(42)
1278+
1279+
df = pd.DataFrame(
1280+
{
1281+
"item_id": list(range(n_rows)),
1282+
"embedding": [list(np.random.random(vector_dim)) for _ in range(n_rows)],
1283+
"text_field": [
1284+
f"Document text content {i} with searchable keywords"
1285+
for i in range(n_rows)
1286+
],
1287+
"category": [f"Category-{i % 5}" for i in range(n_rows)],
1288+
"event_timestamp": [datetime.now() for _ in range(n_rows)],
1289+
}
1290+
)
1291+
1292+
data_source = FileSource(
1293+
path="dummy_path.parquet", timestamp_field="event_timestamp"
1294+
)
1295+
1296+
item_entity = Entity(
1297+
name="item_id",
1298+
join_keys=["item_id"],
1299+
value_type=ValueType.INT64,
1300+
)
1301+
1302+
item_embeddings_fv = FeatureView(
1303+
name="item_embeddings",
1304+
entities=[item_entity],
1305+
schema=[
1306+
Field(name="embedding", dtype=Array(Float32), vector_index=True),
1307+
Field(name="text_field", dtype=String),
1308+
Field(name="category", dtype=String),
1309+
Field(name="item_id", dtype=Int64),
1310+
],
1311+
source=data_source,
1312+
)
1313+
1314+
fs.apply([item_embeddings_fv, item_entity])
1315+
fs.write_to_online_store("item_embeddings", df)
1316+
return item_embeddings_fv, item_entity, df
1317+
1318+
1319+
@pytest.mark.integration
1320+
@pytest.mark.universal_online_stores(only=["pgvector", "elasticsearch"])
1321+
def test_retrieve_online_documents_v2_with_filters(environment, fake_document_data):
1322+
"""Test that metadata filters narrow down vector/text search results."""
1323+
fs = environment.feature_store
1324+
fs.config.online_store.vector_enabled = True
1325+
1326+
_, _, df = _setup_documents_with_categories(fs)
1327+
vector_dim = 2
1328+
query_embedding = list(np.random.random(vector_dim))
1329+
1330+
# --- eq filter: only Category-0 rows ---
1331+
eq_filter = ComparisonFilter(type="eq", key="category", value="Category-0")
1332+
results = fs.retrieve_online_documents_v2(
1333+
features=[
1334+
"item_embeddings:embedding",
1335+
"item_embeddings:text_field",
1336+
"item_embeddings:category",
1337+
"item_embeddings:item_id",
1338+
],
1339+
query=query_embedding,
1340+
top_k=10,
1341+
distance_metric="L2",
1342+
filters=eq_filter,
1343+
).to_dict()
1344+
1345+
assert len(results["category"]) > 0
1346+
assert len(results["category"]) <= 4 # 20 rows / 5 categories
1347+
assert all(c == "Category-0" for c in results["category"])
1348+
1349+
# --- ne filter: exclude Category-0 ---
1350+
ne_filter = ComparisonFilter(type="ne", key="category", value="Category-0")
1351+
results = fs.retrieve_online_documents_v2(
1352+
features=[
1353+
"item_embeddings:embedding",
1354+
"item_embeddings:text_field",
1355+
"item_embeddings:category",
1356+
"item_embeddings:item_id",
1357+
],
1358+
query=query_embedding,
1359+
top_k=10,
1360+
distance_metric="L2",
1361+
filters=ne_filter,
1362+
).to_dict()
1363+
1364+
assert len(results["category"]) > 0
1365+
assert all(c != "Category-0" for c in results["category"])
1366+
1367+
# --- in filter: Category-0 or Category-1 ---
1368+
in_filter = ComparisonFilter(
1369+
type="in", key="category", value=["Category-0", "Category-1"]
1370+
)
1371+
results = fs.retrieve_online_documents_v2(
1372+
features=[
1373+
"item_embeddings:embedding",
1374+
"item_embeddings:text_field",
1375+
"item_embeddings:category",
1376+
"item_embeddings:item_id",
1377+
],
1378+
query=query_embedding,
1379+
top_k=10,
1380+
distance_metric="L2",
1381+
filters=in_filter,
1382+
).to_dict()
1383+
1384+
assert len(results["category"]) > 0
1385+
assert all(c in ("Category-0", "Category-1") for c in results["category"])
1386+
1387+
# --- compound AND filter: category == Category-0 AND item_id >= 5 ---
1388+
and_filter = CompoundFilter(
1389+
type="and",
1390+
filters=[
1391+
ComparisonFilter(type="eq", key="category", value="Category-0"),
1392+
ComparisonFilter(type="gte", key="item_id", value=5),
1393+
],
1394+
)
1395+
results = fs.retrieve_online_documents_v2(
1396+
features=[
1397+
"item_embeddings:embedding",
1398+
"item_embeddings:text_field",
1399+
"item_embeddings:category",
1400+
"item_embeddings:item_id",
1401+
],
1402+
query=query_embedding,
1403+
top_k=10,
1404+
distance_metric="L2",
1405+
filters=and_filter,
1406+
).to_dict()
1407+
1408+
assert len(results["category"]) > 0
1409+
assert all(c == "Category-0" for c in results["category"])
1410+
assert all(i >= 5 for i in results["item_id"])
1411+
1412+
# --- text search + filter ---
1413+
text_filter = ComparisonFilter(type="eq", key="category", value="Category-2")
1414+
text_results = fs.retrieve_online_documents_v2(
1415+
features=[
1416+
"item_embeddings:embedding",
1417+
"item_embeddings:text_field",
1418+
"item_embeddings:category",
1419+
"item_embeddings:item_id",
1420+
],
1421+
query_string="searchable keywords",
1422+
top_k=10,
1423+
filters=text_filter,
1424+
).to_dict()
1425+
1426+
assert len(text_results["category"]) > 0
1427+
assert all(c == "Category-2" for c in text_results["category"])
1428+
1429+
# --- filter with no matches ---
1430+
empty_filter = ComparisonFilter(
1431+
type="eq", key="category", value="NonexistentCategory"
1432+
)
1433+
empty_results = fs.retrieve_online_documents_v2(
1434+
features=[
1435+
"item_embeddings:embedding",
1436+
"item_embeddings:text_field",
1437+
"item_embeddings:category",
1438+
"item_embeddings:item_id",
1439+
],
1440+
query=query_embedding,
1441+
top_k=10,
1442+
distance_metric="L2",
1443+
filters=empty_filter,
1444+
).to_dict()
1445+
1446+
assert len(empty_results.get("category", [])) == 0
1447+
1448+
1449+
@pytest.mark.integration
1450+
@pytest.mark.universal_online_stores(only=["pgvector", "elasticsearch"])
1451+
def test_retrieve_online_documents_openai(environment, fake_document_data):
1452+
"""Test OpenAI-compatible vector store search returns the correct response shape."""
1453+
fs = environment.feature_store
1454+
fs.config.online_store.vector_enabled = True
1455+
1456+
fv, _, df = _setup_documents_with_categories(fs)
1457+
vector_dim = 2
1458+
1459+
fs.config.embedding_model = EmbeddingModelConfig(model="text-embedding-3-small")
1460+
1461+
fake_embedding = list(np.random.random(vector_dim))
1462+
mock_embed_response = unittest.mock.MagicMock()
1463+
mock_embed_response.data = [{"embedding": fake_embedding}]
1464+
1465+
with unittest.mock.patch(
1466+
"feast.feature_store.litellm_embedding", create=True
1467+
) as mock_litellm:
1468+
mock_litellm.return_value = mock_embed_response
1469+
1470+
with unittest.mock.patch(
1471+
"feast.feature_store.FeatureStore.retrieve_online_documents_openai",
1472+
wraps=fs.retrieve_online_documents_openai,
1473+
):
1474+
# Patch the litellm import inside the method
1475+
with unittest.mock.patch.dict(
1476+
"sys.modules",
1477+
{"litellm": unittest.mock.MagicMock(embedding=mock_litellm)},
1478+
):
1479+
result = fs.retrieve_online_documents_openai(
1480+
vector_store_id="item_embeddings",
1481+
query="test query",
1482+
max_num_results=5,
1483+
)
1484+
1485+
# Validate top-level OpenAI response shape
1486+
assert result["object"] == "vector_store.search_results.page"
1487+
assert isinstance(result["search_query"], list)
1488+
assert result["search_query"] == ["test query"]
1489+
assert result["has_more"] is False
1490+
assert result["next_page"] is None
1491+
1492+
assert isinstance(result["data"], list)
1493+
assert len(result["data"]) > 0
1494+
assert len(result["data"]) <= 5
1495+
1496+
for item_result in result["data"]:
1497+
assert "file_id" in item_result
1498+
assert "filename" in item_result
1499+
assert item_result["filename"] == "item_embeddings"
1500+
assert "score" in item_result
1501+
assert isinstance(item_result["score"], float)
1502+
assert "attributes" in item_result
1503+
assert isinstance(item_result["attributes"], dict)
1504+
assert "content" in item_result
1505+
assert isinstance(item_result["content"], list)
1506+
for part in item_result["content"]:
1507+
assert "type" in part
1508+
assert part["type"] == "text"
1509+
assert "text" in part
1510+
1511+
# --- Test with features_to_retrieve ---
1512+
with unittest.mock.patch.dict(
1513+
"sys.modules",
1514+
{
1515+
"litellm": unittest.mock.MagicMock(
1516+
embedding=unittest.mock.MagicMock(return_value=mock_embed_response)
1517+
),
1518+
},
1519+
):
1520+
result_subset = fs.retrieve_online_documents_openai(
1521+
vector_store_id="item_embeddings",
1522+
query="test query",
1523+
max_num_results=5,
1524+
features_to_retrieve=["text_field", "category"],
1525+
)
1526+
1527+
assert len(result_subset["data"]) > 0
1528+
for item_result in result_subset["data"]:
1529+
attr_keys = set(item_result["attributes"].keys())
1530+
assert "embedding" not in attr_keys
1531+
1532+
# --- Test with list query ---
1533+
with unittest.mock.patch.dict(
1534+
"sys.modules",
1535+
{
1536+
"litellm": unittest.mock.MagicMock(
1537+
embedding=unittest.mock.MagicMock(return_value=mock_embed_response)
1538+
),
1539+
},
1540+
):
1541+
result_list = fs.retrieve_online_documents_openai(
1542+
vector_store_id="item_embeddings",
1543+
query=["term1", "term2"],
1544+
max_num_results=5,
1545+
)
1546+
1547+
assert result_list["search_query"] == ["term1", "term2"]
1548+
1549+
1550+
@pytest.mark.integration
1551+
@pytest.mark.universal_online_stores(only=["pgvector", "elasticsearch"])
1552+
def test_retrieve_online_documents_openai_no_embedding_config(
1553+
environment, fake_document_data
1554+
):
1555+
"""Test that retrieve_online_documents_openai raises ValueError
1556+
when embedding_model is not configured."""
1557+
fs = environment.feature_store
1558+
fs.config.embedding_model = None
1559+
1560+
with pytest.raises(ValueError, match="embedding_model is not configured"):
1561+
fs.retrieve_online_documents_openai(
1562+
vector_store_id="item_embeddings",
1563+
query="test query",
1564+
)
1565+
1566+
1567+
@pytest.mark.integration
1568+
@pytest.mark.universal_online_stores(only=["pgvector", "elasticsearch"])
1569+
def test_retrieve_online_documents_openai_not_found(environment, fake_document_data):
1570+
"""Test that retrieve_online_documents_openai raises FeatureViewNotFoundException
1571+
for a non-existent feature view."""
1572+
fs = environment.feature_store
1573+
fs.config.embedding_model = EmbeddingModelConfig(model="text-embedding-3-small")
1574+
1575+
with pytest.raises(FeatureViewNotFoundException):
1576+
fs.retrieve_online_documents_openai(
1577+
vector_store_id="nonexistent_feature_view",
1578+
query="test query",
1579+
)

0 commit comments

Comments
 (0)