roboflow
diff --git a/‎inference/core/constants.py‎
Lines changed: 0 additions & 1 deletion b/‎inference/core/constants.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎inference/core/interfaces/http/http_api.py‎
Lines changed: 25 additions & 38 deletions b/‎inference/core/interfaces/http/http_api.py‎
Lines changed: 25 additions & 38 deletions
diff --git a/‎inference/core/interfaces/http/request_metrics.py‎
Lines changed: 0 additions & 134 deletions b/‎inference/core/interfaces/http/request_metrics.py‎
Lines changed: 0 additions & 134 deletions
diff --git a/‎inference/core/managers/model_load_collector.py‎
Lines changed: 2 additions & 5 deletions b/‎inference/core/managers/model_load_collector.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎inference/core/version.py‎
Lines changed: 1 addition & 1 deletion b/‎inference/core/version.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎inference_sdk/config.py‎
Lines changed: 1 addition & 68 deletions b/‎inference_sdk/config.py‎
Lines changed: 1 addition & 68 deletions
@@ -4,7 +4,6 @@
 KEYPOINTS_DETECTION_TASK = "keypoint-detection"
 PROCESSING_TIME_HEADER = "X-Processing-Time"
 MODEL_COLD_START_HEADER = "X-Model-Cold-Start"
-MODEL_COLD_START_COUNT_HEADER = "X-Model-Cold-Start-Count"
 MODEL_LOAD_TIME_HEADER = "X-Model-Load-Time"
 MODEL_LOAD_DETAILS_HEADER = "X-Model-Load-Details"
 MODEL_ID_HEADER = "X-Model-Id"
 
@@ -32,7 +32,6 @@
 
 from inference.core import logger
 from inference.core.constants import (
-    MODEL_COLD_START_COUNT_HEADER,
     MODEL_COLD_START_HEADER,
     MODEL_ID_HEADER,
     MODEL_LOAD_DETAILS_HEADER,
@@ -230,12 +229,6 @@
     orjson_response,
     orjson_response_keeping_parent_id,
 )
-from inference.core.interfaces.http.request_metrics import (
-    REMOTE_PROCESSING_TIME_HEADER,
-    REMOTE_PROCESSING_TIMES_HEADER,
-    GCPServerlessMiddleware,
-    build_model_response_headers,
-)
 from inference.core.interfaces.stream_manager.api.entities import (
     CommandContext,
     CommandResponse,
@@ -323,9 +316,23 @@
 from inference.core.version import __version__
 
 try:
-    from inference_sdk.config import EXECUTION_ID_HEADER
+    from inference_sdk.config import (
+        EXECUTION_ID_HEADER,
+        INTERNAL_REMOTE_EXEC_REQ_HEADER,
+        INTERNAL_REMOTE_EXEC_REQ_VERIFIED_HEADER,
+        RemoteProcessingTimeCollector,
+        apply_duration_minimum,
+        execution_id,
+        remote_processing_times,
+    )
 except ImportError:
+    execution_id = None
+    remote_processing_times = None
+    RemoteProcessingTimeCollector = None
     EXECUTION_ID_HEADER = None
+    INTERNAL_REMOTE_EXEC_REQ_HEADER = None
+    INTERNAL_REMOTE_EXEC_REQ_VERIFIED_HEADER = None
+    apply_duration_minimum = None
 
 
 def get_content_type(request: Request) -> str:
@@ -503,7 +510,6 @@ async def on_shutdown():
                     REMOTE_PROCESSING_TIME_HEADER,
                     REMOTE_PROCESSING_TIMES_HEADER,
                     MODEL_COLD_START_HEADER,
-                    MODEL_COLD_START_COUNT_HEADER,
                     MODEL_LOAD_TIME_HEADER,
                     MODEL_LOAD_DETAILS_HEADER,
                     MODEL_ID_HEADER,
@@ -814,35 +820,17 @@ async def track_model_load(request: Request, call_next):
             ids_collector = RequestModelIds()
             request_model_ids.set(ids_collector)
             response = await call_next(request)
-            remote_processing_collector = getattr(
-                request.state, "remote_processing_time_collector", None
-            )
-            if remote_processing_collector is not None:
-                remote_model_ids = remote_processing_collector.snapshot_model_ids()
-                remote_cold_start_entries = (
-                    remote_processing_collector.snapshot_cold_start_entries()
-                )
-                remote_cold_start_count = (
-                    remote_processing_collector.snapshot_cold_start_count()
-                )
-                remote_cold_start_total_load_time = (
-                    remote_processing_collector.snapshot_cold_start_total_load_time()
-                )
+            if load_collector.has_data():
+                total, detail = load_collector.summarize()
+                response.headers[MODEL_COLD_START_HEADER] = "true"
+                response.headers[MODEL_LOAD_TIME_HEADER] = str(total)
+                if detail is not None:
+                    response.headers[MODEL_LOAD_DETAILS_HEADER] = detail
             else:
-                remote_model_ids = set()
-                remote_cold_start_entries = []
-                remote_cold_start_count = 0
-                remote_cold_start_total_load_time = 0.0
-            response.headers.update(
-                build_model_response_headers(
-                    local_model_ids=ids_collector.get_ids(),
-                    local_cold_start_entries=load_collector.snapshot_entries(),
-                    remote_model_ids=remote_model_ids,
-                    remote_cold_start_entries=remote_cold_start_entries,
-                    remote_cold_start_count=remote_cold_start_count,
-                    remote_cold_start_total_load_time=remote_cold_start_total_load_time,
-                )
-            )
+                response.headers[MODEL_COLD_START_HEADER] = "false"
+            model_ids = ids_collector.get_ids()
+            if model_ids:
+                response.headers[MODEL_ID_HEADER] = ",".join(sorted(model_ids))
             wf_id = request_workflow_id.get(None)
             if wf_id:
                 response.headers[WORKFLOW_ID_HEADER] = wf_id
@@ -868,7 +856,6 @@ async def structured_access_log(request: Request, call_next):
                     "request_id": CORRELATION_ID_HEADER,
                     "processing_time": PROCESSING_TIME_HEADER,
                     "model_cold_start": MODEL_COLD_START_HEADER,
-                    "model_cold_start_count": MODEL_COLD_START_COUNT_HEADER,
                     "model_load_time": MODEL_LOAD_TIME_HEADER,
                     "model_id": MODEL_ID_HEADER,
                     "workflow_id": WORKFLOW_ID_HEADER,
 
@@ -25,18 +25,15 @@ def has_data(self) -> bool:
         with self._lock:
             return len(self._entries) > 0
 
-    def snapshot_entries(self) -> list:
-        with self._lock:
-            return list(self._entries)
-
     def summarize(self, max_detail_bytes: int = 4096) -> Tuple[float, Optional[str]]:
         """Return (total_load_time, entries_json_or_none).
 
         Returns the total model load time and a JSON string of individual
         entries.  If the JSON exceeds *max_detail_bytes*, the detail string
         is omitted (None).
         """
-        entries = self.snapshot_entries()
+        with self._lock:
+            entries = list(self._entries)
         total = sum(t for _, t in entries)
         detail = json.dumps([{"m": m, "t": t} for m, t in entries])
         if len(detail) > max_detail_bytes:
 
@@ -1,4 +1,4 @@
-__version__ = "1.2.2"
+__version__ = "1.2.1"
 
 
 if __name__ == "__main__":
 
@@ -2,7 +2,7 @@
 import json
 import os
 import threading
-from typing import Iterable, Optional, Tuple
+from typing import Optional, Tuple
 
 from inference_sdk.utils.environment import str2bool
 
@@ -23,90 +23,23 @@ class RemoteProcessingTimeCollector:
 
     def __init__(self):
         self._entries: list = []  # list of (model_id, time) tuples
-        self._model_ids: set = set()
-        self._cold_start_entries: list = []  # list of (model_id, load_time) tuples
-        self._cold_start_total_load_time: float = 0.0
-        self._cold_start_count: int = 0
         self._lock = threading.Lock()
 
     def add(self, processing_time: float, model_id: str = "unknown") -> None:
         with self._lock:
             self._entries.append((model_id, processing_time))
 
-    def add_model_id(self, model_id: Optional[str]) -> None:
-        if model_id in (None, "", "unknown"):
-            return
-        with self._lock:
-            self._model_ids.add(model_id)
-
-    def add_model_ids(self, model_ids: Iterable[str]) -> None:
-        filtered_ids = {
-            model_id for model_id in model_ids if model_id not in (None, "", "unknown")
-        }
-        if not filtered_ids:
-            return
-        with self._lock:
-            self._model_ids.update(filtered_ids)
-
-    def record_cold_start(
-        self,
-        load_time: float,
-        model_id: Optional[str] = None,
-        count: int = 1,
-    ) -> None:
-        with self._lock:
-            self._cold_start_total_load_time += load_time
-            self._cold_start_count += count
-            if model_id not in (None, "", "unknown"):
-                self._cold_start_entries.append((model_id, load_time))
-                self._model_ids.add(model_id)
-
     def drain(self) -> list:
         """Atomically return all entries and clear the internal list."""
         with self._lock:
             entries = self._entries
             self._entries = []
             return entries
 
-    def snapshot_entries(self) -> list:
-        with self._lock:
-            return list(self._entries)
-
-    def snapshot_model_ids(self) -> set:
-        with self._lock:
-            return set(self._model_ids)
-
-    def snapshot_cold_start_entries(self) -> list:
-        with self._lock:
-            return list(self._cold_start_entries)
-
-    def snapshot_cold_start_total_load_time(self) -> float:
-        with self._lock:
-            return self._cold_start_total_load_time
-
-    def snapshot_cold_start_count(self) -> int:
-        with self._lock:
-            return self._cold_start_count
-
     def has_data(self) -> bool:
         with self._lock:
             return len(self._entries) > 0
 
-    def has_cold_start_data(self) -> bool:
-        with self._lock:
-            return self._cold_start_count > 0
-
-    def snapshot_summary(
-        self, max_detail_bytes: int = 4096
-    ) -> Tuple[float, Optional[str]]:
-        """Return (total_time, entries_json_or_none) without clearing entries."""
-        entries = self.snapshot_entries()
-        total = sum(t for _, t in entries)
-        detail = json.dumps([{"m": m, "t": t} for m, t in entries])
-        if len(detail) > max_detail_bytes:
-            detail = None
-        return total, detail
-
     def summarize(self, max_detail_bytes: int = 4096) -> Tuple[float, Optional[str]]:
         """Atomically drain entries and return (total_time, entries_json_or_none).
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "1.2.2"`
	`1`	`+__version__ = "1.2.1"`
`2`	`2`
`3`	`3`
`4`	`4`	`if __name__ == "__main__":`