Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
d3c5344
squash commit
leeclemnet Apr 9, 2026
216b373
Show confidence hint 0.4 to match default
leeclemnet Apr 13, 2026
b67759d
dev dockerfiles overlay inference_models source build
leeclemnet Apr 13, 2026
2a39f9e
confidence filter debug logging
leeclemnet Apr 14, 2026
a6d348f
confidence filter lazy imports not needed
leeclemnet Apr 14, 2026
85af4a1
drop unneeded comment
leeclemnet Apr 14, 2026
27133cb
kwargs.get(recommended_parameters) -> Optional[RecommendedParameters]…
leeclemnet Apr 14, 2026
c669544
per-model default confidence, move to post_processing, inline per-ima…
leeclemnet Apr 14, 2026
fdc88e0
concrete class post_process confidence optional default None
leeclemnet Apr 14, 2026
0319e63
cleanup
leeclemnet Apr 14, 2026
6090871
simplify ConfidenceFilter and avoid double filtering
leeclemnet Apr 15, 2026
655fd9b
fix OOB bugs in yolov5/7 and rfdetr
leeclemnet Apr 15, 2026
b30215b
confidencefilter readability
leeclemnet Apr 15, 2026
f289b53
undo no-op diffs
leeclemnet Apr 15, 2026
8564ea6
revert workflow UI change for now
leeclemnet Apr 15, 2026
2f0b722
deeplapv3plus: drop double construction of SemanticSegmentationResult
leeclemnet Apr 15, 2026
7435547
Explicit 'best', 'default' or float confidence - easy opt-out
leeclemnet Apr 16, 2026
e72bb14
legacy inference ignore string valued confidence
leeclemnet Apr 16, 2026
27ca8ff
move Confidence to entities, validation throws ModelInputError, hint …
leeclemnet Apr 17, 2026
9abf606
use pydantic native ge/le validation instead of annotated_types
leeclemnet Apr 17, 2026
4b818da
keep 0.5 request default, default to 'default' in SDK instead of 'best'
leeclemnet Apr 17, 2026
5f8a36c
scalar threshold fast path
leeclemnet Apr 17, 2026
576ff5b
fix yolov10
leeclemnet Apr 17, 2026
cb5351b
update inference_sdk
leeclemnet Apr 17, 2026
8f5a853
drop pydantic validation on confidence
leeclemnet Apr 17, 2026
396952d
bump version to 0.25.0, update changelog, revert Dockerfile and CI ch…
leeclemnet Apr 17, 2026
0e87734
bump inference-models requirements to 0.25.0
leeclemnet Apr 17, 2026
87b17c1
fix roboflow-instant-hf scalar confidence path
leeclemnet Apr 17, 2026
0174623
bump inference-models version 0.25.1
leeclemnet Apr 17, 2026
00782d0
requirements back to 0.24.4
leeclemnet Apr 17, 2026
08fd135
tmp: force inference_models source build for dev and integration tests
leeclemnet Apr 17, 2026
9ecf1b3
add integration tests for per-class pathways
leeclemnet Apr 17, 2026
645ac89
0.25.1rc1
leeclemnet Apr 17, 2026
c15bf41
bump inference-models~=0.25.1rc1
leeclemnet Apr 18, 2026
e3b5be7
revert yolov7 IS to keep existing NMS behaviour including objectness col
leeclemnet Apr 20, 2026
efbcd0a
[tmp] install inference_models from git branch
leeclemnet Apr 20, 2026
e86cc33
0.25.1rc2
leeclemnet Apr 20, 2026
d4a51e6
requirements 0.25.1rc2
leeclemnet Apr 20, 2026
f9f14ef
try moving yolov8 test to avoid disturbing cache determinism tests
leeclemnet Apr 20, 2026
2a99a78
try removing added yolov8 trt test case
leeclemnet Apr 20, 2026
edfa5a3
0.25.1 for pypi release
leeclemnet Apr 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions inference/core/entities/requests/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pydantic import BaseModel, ConfigDict, Field, validator

from inference.core.entities.common import ApiKey, ModelID, ModelType
from inference_sdk.http.entities import Confidence


class BaseRequest(BaseModel):
Expand Down Expand Up @@ -145,10 +146,13 @@ class ObjectDetectionInferenceRequest(CVInferenceRequest):
examples=[["class-1", "class-2", "class-n"]],
description="If provided, only predictions for the listed classes will be returned",
)
confidence: Optional[float] = Field(
confidence: Confidence = Field(
default=0.4,
examples=[0.5],
description="The confidence threshold used to filter out predictions",
examples=[0.5, "best", "default"],
description=(
'Confidence threshold. "best" uses model-eval thresholds, '
'"default" uses the model built-in, or pass a float.'
),
)
fix_batch_size: Optional[bool] = Field(
default=False,
Expand Down Expand Up @@ -245,10 +249,13 @@ def __init__(self, **kwargs):
kwargs["model_type"] = "classification"
super().__init__(**kwargs)

confidence: Optional[float] = Field(
confidence: Confidence = Field(
default=0.4,
examples=[0.5],
description="The confidence threshold used to filter out predictions",
examples=[0.5, "best", "default"],
description=(
'Confidence threshold. "best" uses model-eval thresholds, '
'"default" uses the model built-in, or pass a float.'
),
)
visualization_stroke_width: Optional[int] = Field(
default=1,
Expand Down
11 changes: 10 additions & 1 deletion inference/core/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,16 @@ def infer_from_request(
is also included in the response.
"""
t1 = perf_counter()
responses = self.infer(**request.dict(), return_image_dims=False)
kwargs = request.dict()
confidence = kwargs.get("confidence")
if isinstance(confidence, str):
logger.warning(
"Legacy inference does not support confidence=%r, "
"using model default",
confidence,
)
kwargs.pop("confidence")
responses = self.infer(**kwargs, return_image_dims=False)
for response in responses:
response.time = perf_counter() - t1
logger.debug(f"model infer time: {response.time * 1000.0} ms")
Expand Down
55 changes: 33 additions & 22 deletions inference/core/models/inference_models_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,25 +677,29 @@ def postprocess(
List[ClassificationInferenceResponse],
]:
mapped_kwargs = self.map_inference_kwargs(kwargs)
post_processed_predictions = self._model.post_process(
predictions, **mapped_kwargs
)
if isinstance(post_processed_predictions, list):
# multi-label classification
return prepare_multi_label_classification_response(
post_processed_predictions,
image_sizes=returned_metadata,
class_names=self.class_names,
confidence_threshold=kwargs.get("confidence", 0.5),
if isinstance(self._model, MultiLabelClassificationModel):
post_processed_predictions = self._model.post_process(
predictions, **mapped_kwargs
)
else:
# single-label classification
return prepare_classification_response(
return prepare_multi_label_classification_response(
post_processed_predictions,
image_sizes=returned_metadata,
class_names=self.class_names,
confidence_threshold=kwargs.get("confidence", 0.5),
)
# Single-label classification: top-1 always wins regardless of
# confidence, so per-class refinement isn't meaningful here. The base
# class deliberately opts out of recommendedParameters entirely. The
# response builder still uses kwargs.get("confidence", 0.5) for the
# cutoff that decides which alternative classes show up.
post_processed_predictions = self._model.post_process(
predictions, **mapped_kwargs
)
return prepare_classification_response(
post_processed_predictions,
image_sizes=returned_metadata,
class_names=self.class_names,
confidence_threshold=kwargs.get("confidence") or 0.5,
)

def clear_cache(self, delete_from_disk: bool = True) -> None:
"""Clears any cache if necessary. TODO: Implement this to delete the cache from the experimental model.
Expand Down Expand Up @@ -747,20 +751,27 @@ def prepare_multi_label_classification_response(
post_processed_predictions: List[MultiLabelClassificationPrediction],
image_sizes: List[Tuple[int, int]],
class_names: List[str],
confidence_threshold: float,
) -> List[MultiLabelClassificationInferenceResponse]:
"""Build the API response from a model's post-processed predictions.

`prediction.class_ids` is the authoritative list of "passed" classes —
the model's `post_process` already applied the
full priority chain (user → per-class → global → default), so the
response builder doesn't re-threshold here. The full per-class score
vector is still emitted in `image_predictions_dict` for UI display.
"""
results = []
for prediction, image_size in zip(post_processed_predictions, image_sizes):
image_predictions_dict = dict()
predicted_classes = []
for class_id, confidence in enumerate(prediction.confidence.cpu().tolist()):
cls_name = class_names[class_id]
image_predictions_dict[cls_name] = {
image_predictions_dict = {
class_names[class_id]: {
"confidence": confidence,
"class_id": class_id,
}
if confidence > confidence_threshold:
predicted_classes.append(cls_name)
for class_id, confidence in enumerate(prediction.confidence.cpu().tolist())
}
predicted_classes = [
class_names[class_id] for class_id in prediction.class_ids.tolist()
]
results.append(
MultiLabelClassificationInferenceResponse(
predictions=image_predictions_dict,
Expand Down
19 changes: 19 additions & 0 deletions inference_models/docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
# Changelog

## `0.25.1`

### Fixed

- Fix bug in roboflow_instant_hf confidence filter.

---

## `0.25.0`

### Added

- `post_process(...)` on object detection, instance segmentation, keypoint detection, classification, and semantic
segmentation models now accepts `confidence` as `"best"` (use per-class or global thresholds from
`RecommendedParameters` when available), `"default"` (model's built-in default), or a float override. Shared NMS
helpers accept a per-class `torch.Tensor` for single-pass per-class filtering.

---

## `0.24.4`

### Changed
Expand Down
2 changes: 1 addition & 1 deletion inference_models/inference_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
if os.environ.get("TOKENIZERS_PARALLELISM") is None:
os.environ["TOKENIZERS_PARALLELISM"] = "false"

from inference_models.entities import ColorFormat
from inference_models.entities import ColorFormat, Confidence
from inference_models.model_pipelines.auto_loaders.core import AutoModelPipeline
from inference_models.models.auto_loaders.core import AutoModel
from inference_models.models.auto_loaders.entities import (
Expand Down
3 changes: 2 additions & 1 deletion inference_models/inference_models/entities.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections import namedtuple
from typing import Literal
from typing import Literal, Union

ImageDimensions = namedtuple("ImageDimensions", ["height", "width"])
ColorFormat = Literal["rgb", "bgr"]
Confidence = Union[float, Literal["best", "default"]]
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
TaskType,
)
from inference_models.utils.file_system import dump_json, read_json
from inference_models.weights_providers.entities import ModelDependency
from inference_models.weights_providers.entities import (
ModelDependency,
RecommendedParameters,
)


class AutoResolutionCacheEntry(BaseModel):
Expand All @@ -30,6 +33,7 @@ class AutoResolutionCacheEntry(BaseModel):
model_dependencies: Optional[List[ModelDependency]] = Field(default=None)
created_at: datetime
model_features: Optional[dict] = Field(default=None)
recommended_parameters: Optional[RecommendedParameters] = Field(default=None)


class AutoResolutionCache(ABC):
Expand Down
26 changes: 26 additions & 0 deletions inference_models/inference_models/models/auto_loaders/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
ModelDependency,
ModelPackageMetadata,
Quantization,
RecommendedParameters,
)

MODEL_TYPES_TO_LOAD_FROM_CHECKPOINT = {
Expand Down Expand Up @@ -926,6 +927,7 @@ def model_directory_pointer(model_dir: str) -> None:
model_dependencies=model_metadata.model_dependencies,
model_dependencies_instances=model_dependencies_instances,
model_dependencies_directories=model_dependencies_directories,
recommended_parameters=model_metadata.recommended_parameters,
max_package_loading_attempts=max_package_loading_attempts,
model_download_file_lock_acquire_timeout=model_download_file_lock_acquire_timeout,
verify_hash_while_download=verify_hash_while_download,
Expand Down Expand Up @@ -1078,6 +1080,10 @@ def attempt_loading_model_with_auto_load_cache(
package_id=cache_entry.model_package_id,
)
model_init_kwargs[MODEL_DEPENDENCIES_KEY] = model_dependencies_instances
# Cache stores the already-resolved (package-vs-model) value written
# in initialize_model — no need to re-run resolve_recommended_parameters.
if cache_entry.recommended_parameters is not None:
model_init_kwargs["recommended_parameters"] = cache_entry.recommended_parameters
model = model_class.from_pretrained(
model_package_cache_dir, **model_init_kwargs
)
Expand Down Expand Up @@ -1113,6 +1119,7 @@ def attempt_loading_matching_model_packages(
model_dependencies: Optional[List[ModelDependency]],
model_dependencies_instances: Dict[str, AnyModel],
model_dependencies_directories: Dict[str, str],
recommended_parameters: Optional[RecommendedParameters] = None,
max_package_loading_attempts: Optional[int] = None,
model_download_file_lock_acquire_timeout: int = FILE_LOCK_ACQUIRE_TIMEOUT,
verbose: bool = True,
Expand Down Expand Up @@ -1153,6 +1160,7 @@ def attempt_loading_matching_model_packages(
model_dependencies=model_dependencies,
model_dependencies_instances=model_dependencies_instances,
model_dependencies_directories=model_dependencies_directories,
recommended_parameters=recommended_parameters,
verify_hash_while_download=verify_hash_while_download,
download_files_without_hash=download_files_without_hash,
on_file_created=partial(
Expand Down Expand Up @@ -1218,6 +1226,7 @@ def initialize_model(
model_dependencies: Optional[List[ModelDependency]],
model_dependencies_instances: Dict[str, AnyModel],
model_dependencies_directories: Dict[str, str],
recommended_parameters: Optional[RecommendedParameters] = None,
model_download_file_lock_acquire_timeout: int = FILE_LOCK_ACQUIRE_TIMEOUT,
verify_hash_while_download: bool = True,
download_files_without_hash: bool = False,
Expand Down Expand Up @@ -1307,6 +1316,12 @@ def initialize_model(
)
resolved_files.update(dependencies_resolved_files)
model_init_kwargs[MODEL_DEPENDENCIES_KEY] = model_dependencies_instances
resolved_recommended_parameters = resolve_recommended_parameters(
package_level=model_package.recommended_parameters,
model_level=recommended_parameters,
)
if resolved_recommended_parameters is not None:
model_init_kwargs["recommended_parameters"] = resolved_recommended_parameters
model = model_class.from_pretrained(model_package_cache_dir, **model_init_kwargs)
dump_auto_resolution_cache(
use_auto_resolution_cache=use_auto_resolution_cache,
Expand All @@ -1320,6 +1335,7 @@ def initialize_model(
resolved_files=resolved_files,
model_dependencies=model_dependencies,
model_features=model_package.model_features,
recommended_parameters=resolved_recommended_parameters,
)
return model, model_package_cache_dir

Expand Down Expand Up @@ -1484,6 +1500,7 @@ def dump_auto_resolution_cache(
resolved_files: Set[str],
model_dependencies: Optional[List[ModelDependency]],
model_features: Optional[dict],
recommended_parameters: Optional[RecommendedParameters] = None,
) -> None:
if not use_auto_resolution_cache:
return None
Expand All @@ -1497,6 +1514,7 @@ def dump_auto_resolution_cache(
created_at=datetime.now(),
model_dependencies=model_dependencies,
model_features=model_features,
recommended_parameters=recommended_parameters,
)
auto_resolution_cache.register(
auto_negotiation_hash=auto_negotiation_hash, cache_entry=cache_content
Expand Down Expand Up @@ -1812,3 +1830,11 @@ def load_class_from_path(module_path: str, class_name: str) -> AnyModel:
help_url="https://inference-models.roboflow.com/errors/model-loading/#corruptedmodelpackageerror",
)
return getattr(module, class_name)


def resolve_recommended_parameters(
package_level: Optional[RecommendedParameters],
model_level: Optional[RecommendedParameters],
) -> Optional[RecommendedParameters]:
"""Package-level recommended_parameters take priority over model-level."""
return package_level if package_level is not None else model_level
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ class ClassificationPrediction:

class ClassificationModel(ABC, Generic[PreprocessedInputs, RawPrediction]):

# Single-label classification deliberately opts out of recommendedParameters.
# Top-1 always wins regardless of confidence, so per-class refinement isn't
# a meaningful semantic for this task type. (Multi-label classification opts
# in below — that's where per-class thresholds actually filter the result.)

@classmethod
@abstractmethod
def from_pretrained(
Expand Down
Loading
Loading