cristofima
diff --git a/‎CHANGELOG.md‎
Lines changed: 11 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎backend/api/main.py‎
Lines changed: 2 additions & 1 deletion b/‎backend/api/main.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backend/api/models/schemas.py‎
Lines changed: 39 additions & 0 deletions b/‎backend/api/models/schemas.py‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎backend/api/routers/models.py‎
Lines changed: 65 additions & 2 deletions b/‎backend/api/routers/models.py‎
Lines changed: 65 additions & 2 deletions
@@ -8,6 +8,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 ### Added
+- **Serverless Model Inference** - Deploy and make predictions without SageMaker
+  - One-click model deploy button on results page
+  - `POST /jobs/{job_id}/deploy` endpoint to deploy/undeploy models
+  - `POST /predict/{job_id}` endpoint for making predictions with ONNX Runtime
+  - `GET /predict/{job_id}/info` endpoint for model metadata
+  - ONNX model caching in Lambda memory for fast subsequent predictions
+  - Prediction Playground UI with interactive feature input form
+  - Real-time prediction results with confidence and probabilities
+  - Cost comparison panel: Lambda ($0 idle) vs SageMaker (~$50-100/month)
+  - ONNX Runtime 1.20.1 for serverless inference (compatible with Docker local dev)
+
 - **Dark Mode Support** - Full dark/light/system theme support across all pages
   - Integrated `next-themes` for flicker-free theme switching
   - `ThemeToggle` component with 3-way cycling (Light → Dark → System)
 
@@ -2,7 +2,7 @@
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from mangum import Mangum
-from .routers import upload, training, models, datasets
+from .routers import upload, training, models, datasets, predict
 from .utils.helpers import get_settings
 
 settings = get_settings()
@@ -30,6 +30,7 @@
 app.include_router(datasets.router)
 app.include_router(training.router)
 app.include_router(models.router)
+app.include_router(predict.router)
 
 
 @app.get("/")
 
@@ -119,6 +119,14 @@ class JobDetails(BaseModel):
     model_config = {"protected_namespaces": ()}
 
 
+class PreprocessingInfo(BaseModel):
+    """Preprocessing information for inference"""
+    feature_columns: Optional[List[str]] = None
+    feature_count: Optional[int] = None
+    dropped_columns: Optional[List[str]] = None
+    dropped_count: Optional[int] = None
+
+
 class JobResponse(BaseModel):
     job_id: str
     dataset_id: str
@@ -139,6 +147,8 @@ class JobResponse(BaseModel):
     error_message: Optional[str] = None
     tags: Optional[List[str]] = None  # Custom labels for filtering
     notes: Optional[str] = None  # User notes for experiment tracking
+    deployed: bool = False  # Whether the model is deployed for inference
+    preprocessing_info: Optional[PreprocessingInfo] = None  # Feature info for inference
 
     model_config = {"protected_namespaces": ()}
 
@@ -149,6 +159,35 @@ class JobUpdateRequest(BaseModel):
     notes: Optional[str] = Field(default=None, max_length=1000, description="User notes for experiment tracking")
 
 
+class DeployRequest(BaseModel):
+    """Request schema for deploying/undeploying a model"""
+    deploy: bool = Field(..., description="True to deploy, False to undeploy")
+
+
+class DeployResponse(BaseModel):
+    """Response schema for deploy/undeploy operations"""
+    job_id: str
+    deployed: bool
+    message: str
+
+
+class PredictionInput(BaseModel):
+    """Request schema for making predictions"""
+    features: Dict[str, float | int | str] = Field(..., description="Input features for prediction")
+
+
+class PredictionResponse(BaseModel):
+    """Response schema for predictions"""
+    job_id: str
+    prediction: float | int | str
+    probability: Optional[float] = None
+    probabilities: Optional[Dict[str, float]] = None
+    inference_time_ms: float
+    model_type: str
+    
+    model_config = {"protected_namespaces": ()}
+
+
 class JobListResponse(BaseModel):
     jobs: List[JobDetails]
     next_token: Optional[str] = None
@@ -1,6 +1,9 @@
 from fastapi import APIRouter, HTTPException, status, Query
 from typing import Optional
-from ..models.schemas import JobListResponse, JobResponse, JobStatus, ProblemType, JobUpdateRequest
+from ..models.schemas import (
+    JobListResponse, JobResponse, JobStatus, ProblemType, JobUpdateRequest,
+    DeployRequest, DeployResponse, PreprocessingInfo
+)
 from ..services.dynamo_service import dynamodb_service
 from ..services.s3_service import s3_service
 from ..utils.helpers import get_settings
@@ -22,6 +25,16 @@ async def get_job_status(job_id: str):
                 detail="Job not found"
             )
 
+        # Build preprocessing_info if available
+        preprocessing_info = None
+        if job.get('preprocessing_info'):
+            preprocessing_info = PreprocessingInfo(
+                feature_columns=job['preprocessing_info'].get('feature_columns'),
+                feature_count=job['preprocessing_info'].get('feature_count'),
+                dropped_columns=job['preprocessing_info'].get('dropped_columns'),
+                dropped_count=job['preprocessing_info'].get('dropped_count')
+            )
+        
         response = JobResponse(
             job_id=job['job_id'],
             dataset_id=job['dataset_id'],
@@ -36,7 +49,9 @@ async def get_job_status(job_id: str):
             metrics=job.get('metrics'),
             error_message=job.get('error_message'),
             tags=job.get('tags'),
-            notes=job.get('notes')
+            notes=job.get('notes'),
+            deployed=job.get('deployed', False),
+            preprocessing_info=preprocessing_info
         )
 
         # Generate download URLs if job is completed
@@ -234,6 +249,54 @@ async def update_job_metadata(job_id: str, request: JobUpdateRequest):
         )
 
 
+@router.post("/{job_id}/deploy", response_model=DeployResponse)
+async def deploy_model(job_id: str, request: DeployRequest):
+    """
+    Deploy or undeploy a trained model for inference.
+    Only completed jobs with ONNX models can be deployed.
+    """
+    try:
+        # Verify job exists
+        job = dynamodb_service.get_job(job_id)
+        if not job:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail="Job not found"
+            )
+        
+        # Check if job is completed
+        if job['status'] != JobStatus.COMPLETED.value:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=f"Cannot deploy job with status '{job['status']}'. Only completed jobs can be deployed."
+            )
+        
+        # Check if ONNX model exists
+        if request.deploy and not job.get('onnx_model_path'):
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="No ONNX model available for this job. Only jobs with ONNX export can be deployed."
+            )
+        
+        # Update deployed status
+        dynamodb_service.update_job_deployed(job_id, request.deploy)
+        
+        action = "deployed" if request.deploy else "undeployed"
+        return DeployResponse(
+            job_id=job_id,
+            deployed=request.deploy,
+            message=f"Model successfully {action}"
+        )
+    
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Error deploying model: {str(e)}"
+        )
+
+
 @router.get("", response_model=JobListResponse)
 async def list_jobs(
     limit: int = Query(default=20, ge=1, le=100),