Skip to content

Commit 3731f85

Browse files
committed
Phase 1: Added basic open ai competible search api in the vector store
Signed-off-by: Chaitany patel <patelchaitany93@gmail.com>
1 parent 3268ced commit 3731f85

14 files changed

Lines changed: 1146 additions & 87 deletions

File tree

sdk/python/feast/feature_server.py

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
)
3939
from fastapi.concurrency import run_in_threadpool
4040
from fastapi.logger import logger
41-
from fastapi.responses import JSONResponse
41+
from fastapi.responses import JSONResponse, ORJSONResponse
4242
from fastapi.staticfiles import StaticFiles
4343
from google.protobuf.json_format import MessageToDict
4444
from pydantic import BaseModel, field_validator
@@ -50,9 +50,11 @@
5050
from feast.data_source import PushMode
5151
from feast.errors import (
5252
FeastError,
53+
FeatureViewNotFoundException,
5354
)
5455
from feast.feast_object import FeastObject
5556
from feast.feature_view_utils import get_feature_view_from_feature_store
57+
from feast.filter_models import ComparisonFilter, CompoundFilter
5658
from feast.permissions.action import WRITE, AuthzedAction
5759
from feast.permissions.security_manager import assert_permissions
5860
from feast.permissions.server.rest import inject_user_details
@@ -110,7 +112,42 @@ class GetOnlineDocumentsRequest(BaseModel):
110112
top_k: Optional[int] = None
111113
query: Optional[List[float]] = None
112114
query_string: Optional[str] = None
115+
distance_metric: Optional[str] = None
113116
api_version: Optional[int] = 1
117+
filters: Optional[Union[ComparisonFilter, CompoundFilter]] = None
118+
119+
120+
class OpenAISearchMetadata(BaseModel):
121+
features_to_retrieve: Optional[List[str]] = None
122+
content_field: Optional[str] = None
123+
124+
125+
class OpenAIComparisonFilter(BaseModel):
126+
key: str
127+
type: str
128+
value: Union[str, int, float, bool, List[Union[str, int]]]
129+
130+
131+
class OpenAICompoundFilter(BaseModel):
132+
type: str
133+
filters: List[Union[OpenAIComparisonFilter, "OpenAICompoundFilter"]]
134+
135+
136+
OpenAICompoundFilter.model_rebuild()
137+
138+
139+
class OpenAIRankingOptions(BaseModel):
140+
ranker: Optional[str] = None
141+
score_threshold: Optional[float] = None
142+
143+
144+
class OpenAISearchRequest(BaseModel):
145+
query: Union[str, List[str]]
146+
filters: Optional[Union[OpenAIComparisonFilter, OpenAICompoundFilter]] = None
147+
max_num_results: Optional[int] = 10
148+
ranking_options: Optional[OpenAIRankingOptions] = None
149+
rewrite_query: Optional[bool] = None
150+
metadata: Optional[OpenAISearchMetadata] = None
114151

115152

116153
class FeatureVectorResponse(BaseModel):
@@ -423,6 +460,10 @@ async def retrieve_online_documents(
423460
)
424461
if request.api_version == 2 and request.query_string is not None:
425462
read_params["query_string"] = request.query_string
463+
if request.api_version == 2 and request.distance_metric is not None:
464+
read_params["distance_metric"] = request.distance_metric
465+
if request.api_version == 2 and request.filters is not None:
466+
read_params["filters"] = request.filters
426467

427468
if request.api_version == 2:
428469
read_params["include_feature_view_version_metadata"] = (
@@ -444,6 +485,51 @@ async def retrieve_online_documents(
444485
)
445486
return response_dict
446487

488+
@app.post(
489+
"/v1/vector_stores/{vector_store_id}/search",
490+
dependencies=[Depends(inject_user_details)],
491+
)
492+
async def openai_vector_store_search(
493+
vector_store_id: str,
494+
request: OpenAISearchRequest,
495+
) -> ORJSONResponse:
496+
with feast_metrics.track_request_latency(
497+
"/v1/vector_stores/{vector_store_id}/search"
498+
):
499+
try:
500+
result = await run_in_threadpool(
501+
lambda: store.retrieve_online_documents_openai(
502+
vector_store_id=vector_store_id,
503+
query=request.query,
504+
max_num_results=request.max_num_results or 10,
505+
filters=(
506+
request.filters.model_dump() if request.filters else None
507+
),
508+
ranking_options=(
509+
request.ranking_options.model_dump()
510+
if request.ranking_options
511+
else None
512+
),
513+
rewrite_query=request.rewrite_query,
514+
features_to_retrieve=(
515+
request.metadata.features_to_retrieve
516+
if request.metadata
517+
else None
518+
),
519+
)
520+
)
521+
except FeatureViewNotFoundException:
522+
return ORJSONResponse(
523+
status_code=404,
524+
content={
525+
"error": {
526+
"message": f"No vector store found with id '{vector_store_id}'",
527+
"type": "not_found_error",
528+
}
529+
},
530+
)
531+
return ORJSONResponse(content=result)
532+
447533
@app.post("/push", dependencies=[Depends(inject_user_details)])
448534
async def push(request: PushFeaturesRequest) -> Response:
449535
with feast_metrics.track_request_latency("/push"):

sdk/python/feast/feature_store.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
from feast.feast_object import FeastObject
7070
from feast.feature_service import FeatureService
7171
from feast.feature_view import DUMMY_ENTITY, DUMMY_ENTITY_NAME, FeatureView
72+
from feast.filter_models import ComparisonFilter, CompoundFilter, convert_dict_to_filter
7273
from feast.inference import (
7374
update_data_sources_with_inferred_event_timestamp_col,
7475
update_feature_views_with_inferred_features_and_entities,
@@ -2800,6 +2801,7 @@ def retrieve_online_documents_v2(
28002801
image_weight: float = 0.5,
28012802
combine_strategy: str = "weighted_sum",
28022803
include_feature_view_version_metadata: bool = False,
2804+
filters: Optional[Union[ComparisonFilter, CompoundFilter]] = None,
28032805
) -> OnlineResponse:
28042806
"""
28052807
Retrieves the top k closest document features. Note, embeddings are a subset of features.
@@ -2955,8 +2957,164 @@ def retrieve_online_documents_v2(
29552957
distance_metric,
29562958
query_string,
29572959
include_feature_view_version_metadata,
2960+
filters,
29582961
)
29592962

2963+
def retrieve_online_documents_openai(
2964+
self,
2965+
vector_store_id: str,
2966+
query: Union[str, List[str]],
2967+
max_num_results: int = 10,
2968+
filters: Optional[Dict[str, Any]] = None,
2969+
ranking_options: Optional[Dict[str, Any]] = None,
2970+
rewrite_query: Optional[bool] = None,
2971+
features_to_retrieve: Optional[List[str]] = None,
2972+
) -> Dict[str, Any]:
2973+
"""
2974+
OpenAI-compatible vector store search.
2975+
2976+
Accepts a raw query string, optionally embeds it via LiteLLM
2977+
(when ``query_embedding_model`` is configured in feature_store.yaml),
2978+
and returns results in OpenAI's ``vector_store.search_results.page``
2979+
format.
2980+
2981+
Args:
2982+
vector_store_id: Feature view name (maps to the OpenAI
2983+
``vector_store_id`` path parameter).
2984+
query: Natural language query string, or list of strings.
2985+
max_num_results: Maximum number of results to return.
2986+
filters: OpenAI-compatible filters (accepted but not yet
2987+
applied).
2988+
ranking_options: OpenAI-compatible ranking options (accepted
2989+
but not yet applied).
2990+
rewrite_query: Whether to rewrite the query (accepted but
2991+
not yet applied).
2992+
features_to_retrieve: Specific feature names to return.
2993+
If None, all features from the feature view are used.
2994+
2995+
Returns:
2996+
Dict matching the OpenAI ``vector_store.search_results.page``
2997+
schema.
2998+
2999+
Examples:
3000+
Keyword search (no embedding model configured)::
3001+
3002+
result = store.retrieve_online_documents_openai(
3003+
vector_store_id="city_embeddings",
3004+
query="cities in California",
3005+
max_num_results=5,
3006+
)
3007+
3008+
Vector search (embedding model configured in YAML)::
3009+
3010+
# feature_store.yaml has:
3011+
# feature_server:
3012+
# query_embedding_model: text-embedding-3-small
3013+
result = store.retrieve_online_documents_openai(
3014+
vector_store_id="product_embeddings",
3015+
query="wireless audio device",
3016+
max_num_results=3,
3017+
features_to_retrieve=["name", "description"],
3018+
)
3019+
"""
3020+
feature_view = self.get_feature_view(vector_store_id)
3021+
3022+
if features_to_retrieve:
3023+
feature_names = features_to_retrieve
3024+
else:
3025+
feature_names = [f.name for f in feature_view.features]
3026+
3027+
features = [f"{feature_view.name}:{name}" for name in feature_names]
3028+
query_text = query if isinstance(query, str) else " ".join(query)
3029+
3030+
embed_cfg = self.config.embedding_model
3031+
if embed_cfg is None:
3032+
raise ValueError(
3033+
"embedding_model is not configured in feature_store.yaml. "
3034+
"Add an 'embedding_model' section with at least a 'model' "
3035+
"field to use retrieve_online_documents_openai.\n"
3036+
"Example:\n"
3037+
" embedding_model:\n"
3038+
" model: text-embedding-3-small\n"
3039+
" api_key: sk-..."
3040+
)
3041+
3042+
try:
3043+
from litellm import embedding as litellm_embedding
3044+
except ImportError:
3045+
raise ImportError(
3046+
"litellm is required for query embedding. "
3047+
"Install with: pip install litellm"
3048+
)
3049+
3050+
litellm_kwargs: Dict[str, Any] = {
3051+
"model": embed_cfg.model,
3052+
"input": [query_text],
3053+
}
3054+
if embed_cfg.api_key:
3055+
litellm_kwargs["api_key"] = embed_cfg.api_key
3056+
if embed_cfg.api_base:
3057+
litellm_kwargs["api_base"] = embed_cfg.api_base
3058+
if embed_cfg.api_version:
3059+
litellm_kwargs["api_version"] = embed_cfg.api_version
3060+
if embed_cfg.dimensions:
3061+
litellm_kwargs["dimensions"] = embed_cfg.dimensions
3062+
3063+
embed_response = litellm_embedding(**litellm_kwargs)
3064+
query_embedding = embed_response.data[0]["embedding"]
3065+
3066+
typed_filters: Optional[Union[ComparisonFilter, CompoundFilter]] = None
3067+
if filters is not None:
3068+
typed_filters = convert_dict_to_filter(filters)
3069+
3070+
response = self.retrieve_online_documents_v2(
3071+
features=features,
3072+
query=query_embedding,
3073+
top_k=max_num_results,
3074+
filters=typed_filters,
3075+
)
3076+
3077+
response_dict = response.to_dict()
3078+
3079+
result_data = []
3080+
if response_dict:
3081+
first_key = next(iter(response_dict))
3082+
num_rows = len(response_dict.get(first_key, []))
3083+
for i in range(num_rows):
3084+
score = 0.0
3085+
attributes: Dict[str, Any] = {}
3086+
content_parts: List[Dict[str, str]] = []
3087+
3088+
for key, values in response_dict.items():
3089+
val = values[i] if i < len(values) else None
3090+
if key == "distance":
3091+
score = float(val) if val is not None else 0.0
3092+
else:
3093+
attributes[key] = val
3094+
if isinstance(val, str):
3095+
content_parts.append({"type": "text", "text": val})
3096+
3097+
result_data.append(
3098+
{
3099+
"file_id": f"{vector_store_id}_{i}",
3100+
"filename": vector_store_id,
3101+
"score": score,
3102+
"attributes": attributes,
3103+
"content": content_parts
3104+
if content_parts
3105+
else [{"type": "text", "text": str(attributes)}],
3106+
}
3107+
)
3108+
3109+
search_query = query if isinstance(query, list) else [query]
3110+
return {
3111+
"object": "vector_store.search_results.page",
3112+
"search_query": search_query,
3113+
"data": result_data,
3114+
"has_more": False,
3115+
"next_page": None,
3116+
}
3117+
29603118
def _retrieve_from_online_store(
29613119
self,
29623120
provider: Provider,
@@ -3019,6 +3177,7 @@ def _retrieve_from_online_store_v2(
30193177
top_k: int,
30203178
distance_metric: Optional[str],
30213179
query_string: Optional[str],
3180+
filters: Optional[Union[ComparisonFilter, CompoundFilter]] = None,
30223181
include_feature_view_version_metadata: bool = False,
30233182
) -> OnlineResponse:
30243183
"""
@@ -3036,6 +3195,7 @@ def _retrieve_from_online_store_v2(
30363195
top_k=top_k,
30373196
distance_metric=distance_metric,
30383197
query_string=query_string,
3198+
filters=filters,
30393199
include_feature_view_version_metadata=include_feature_view_version_metadata,
30403200
)
30413201

sdk/python/feast/filter_models.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from typing import Any, Dict, List, Literal, Optional, Union
2+
3+
from pydantic import BaseModel
4+
5+
6+
class ComparisonFilter(BaseModel):
7+
"""A filter that compares a metadata field against a value.
8+
9+
:param type: The comparison operator to apply
10+
:param key: The metadata field name to filter on
11+
:param value: The value to compare against
12+
"""
13+
14+
type: Literal["eq", "ne", "gt", "gte", "lt", "lte", "in", "nin"]
15+
key: str
16+
value: Any
17+
18+
19+
class CompoundFilter(BaseModel):
20+
"""A filter that combines multiple filters with a logical operator.
21+
22+
:param type: The logical operator ("and" requires all filters match,
23+
"or" requires any filter matches)
24+
:param filters: The list of filters to combine
25+
"""
26+
27+
type: Literal["and", "or"]
28+
filters: List[Union[ComparisonFilter, "CompoundFilter"]]
29+
30+
31+
CompoundFilter.model_rebuild()
32+
33+
FilterType = Optional[Union[ComparisonFilter, CompoundFilter]]
34+
35+
36+
def convert_dict_to_filter(
37+
filter_dict: Dict[str, Any],
38+
) -> Union[ComparisonFilter, CompoundFilter]:
39+
"""Convert a raw dict (e.g. from OpenAI-compatible JSON) into a typed filter object."""
40+
filter_type = filter_dict.get("type")
41+
if filter_type in ("and", "or"):
42+
return CompoundFilter(
43+
type=filter_type,
44+
filters=[convert_dict_to_filter(f) for f in filter_dict["filters"]],
45+
)
46+
return ComparisonFilter(
47+
type=filter_dict["type"],
48+
key=filter_dict["key"],
49+
value=filter_dict["value"],
50+
)

0 commit comments

Comments
 (0)