3232
3333from inference .core import logger
3434from inference .core .constants import (
35- MODEL_COLD_START_COUNT_HEADER ,
3635 MODEL_COLD_START_HEADER ,
3736 MODEL_ID_HEADER ,
3837 MODEL_LOAD_DETAILS_HEADER ,
230229 orjson_response ,
231230 orjson_response_keeping_parent_id ,
232231)
233- from inference .core .interfaces .http .request_metrics import (
234- REMOTE_PROCESSING_TIME_HEADER ,
235- REMOTE_PROCESSING_TIMES_HEADER ,
236- GCPServerlessMiddleware ,
237- build_model_response_headers ,
238- )
239232from inference .core .interfaces .stream_manager .api .entities import (
240233 CommandContext ,
241234 CommandResponse ,
323316from inference .core .version import __version__
324317
325318try :
326- from inference_sdk .config import EXECUTION_ID_HEADER
319+ from inference_sdk .config import (
320+ EXECUTION_ID_HEADER ,
321+ INTERNAL_REMOTE_EXEC_REQ_HEADER ,
322+ INTERNAL_REMOTE_EXEC_REQ_VERIFIED_HEADER ,
323+ RemoteProcessingTimeCollector ,
324+ apply_duration_minimum ,
325+ execution_id ,
326+ remote_processing_times ,
327+ )
327328except ImportError :
329+ execution_id = None
330+ remote_processing_times = None
331+ RemoteProcessingTimeCollector = None
328332 EXECUTION_ID_HEADER = None
333+ INTERNAL_REMOTE_EXEC_REQ_HEADER = None
334+ INTERNAL_REMOTE_EXEC_REQ_VERIFIED_HEADER = None
335+ apply_duration_minimum = None
329336
330337
331338def get_content_type (request : Request ) -> str :
@@ -503,7 +510,6 @@ async def on_shutdown():
503510 REMOTE_PROCESSING_TIME_HEADER ,
504511 REMOTE_PROCESSING_TIMES_HEADER ,
505512 MODEL_COLD_START_HEADER ,
506- MODEL_COLD_START_COUNT_HEADER ,
507513 MODEL_LOAD_TIME_HEADER ,
508514 MODEL_LOAD_DETAILS_HEADER ,
509515 MODEL_ID_HEADER ,
@@ -814,35 +820,17 @@ async def track_model_load(request: Request, call_next):
814820 ids_collector = RequestModelIds ()
815821 request_model_ids .set (ids_collector )
816822 response = await call_next (request )
817- remote_processing_collector = getattr (
818- request .state , "remote_processing_time_collector" , None
819- )
820- if remote_processing_collector is not None :
821- remote_model_ids = remote_processing_collector .snapshot_model_ids ()
822- remote_cold_start_entries = (
823- remote_processing_collector .snapshot_cold_start_entries ()
824- )
825- remote_cold_start_count = (
826- remote_processing_collector .snapshot_cold_start_count ()
827- )
828- remote_cold_start_total_load_time = (
829- remote_processing_collector .snapshot_cold_start_total_load_time ()
830- )
823+ if load_collector .has_data ():
824+ total , detail = load_collector .summarize ()
825+ response .headers [MODEL_COLD_START_HEADER ] = "true"
826+ response .headers [MODEL_LOAD_TIME_HEADER ] = str (total )
827+ if detail is not None :
828+ response .headers [MODEL_LOAD_DETAILS_HEADER ] = detail
831829 else :
832- remote_model_ids = set ()
833- remote_cold_start_entries = []
834- remote_cold_start_count = 0
835- remote_cold_start_total_load_time = 0.0
836- response .headers .update (
837- build_model_response_headers (
838- local_model_ids = ids_collector .get_ids (),
839- local_cold_start_entries = load_collector .snapshot_entries (),
840- remote_model_ids = remote_model_ids ,
841- remote_cold_start_entries = remote_cold_start_entries ,
842- remote_cold_start_count = remote_cold_start_count ,
843- remote_cold_start_total_load_time = remote_cold_start_total_load_time ,
844- )
845- )
830+ response .headers [MODEL_COLD_START_HEADER ] = "false"
831+ model_ids = ids_collector .get_ids ()
832+ if model_ids :
833+ response .headers [MODEL_ID_HEADER ] = "," .join (sorted (model_ids ))
846834 wf_id = request_workflow_id .get (None )
847835 if wf_id :
848836 response .headers [WORKFLOW_ID_HEADER ] = wf_id
@@ -868,7 +856,6 @@ async def structured_access_log(request: Request, call_next):
868856 "request_id" : CORRELATION_ID_HEADER ,
869857 "processing_time" : PROCESSING_TIME_HEADER ,
870858 "model_cold_start" : MODEL_COLD_START_HEADER ,
871- "model_cold_start_count" : MODEL_COLD_START_COUNT_HEADER ,
872859 "model_load_time" : MODEL_LOAD_TIME_HEADER ,
873860 "model_id" : MODEL_ID_HEADER ,
874861 "workflow_id" : WORKFLOW_ID_HEADER ,
0 commit comments