atharvakarval-dev
diff --git a/‎form-flow-backend/chroma_db/chroma.sqlite3‎
64 KB b/‎form-flow-backend/chroma_db/chroma.sqlite3‎
64 KB
diff --git a/‎form-flow-backend/config/settings.py‎
Lines changed: 4 additions & 0 deletions b/‎form-flow-backend/config/settings.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎form-flow-backend/requirements.txt‎
Lines changed: 1 addition & 1 deletion b/‎form-flow-backend/requirements.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎form-flow-backend/routers/forms.py‎
Lines changed: 37 additions & 13 deletions b/‎form-flow-backend/routers/forms.py‎
Lines changed: 37 additions & 13 deletions
diff --git a/‎form-flow-backend/scripts/clear_cache.py‎
Lines changed: 31 additions & 0 deletions b/‎form-flow-backend/scripts/clear_cache.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎form-flow-backend/services/ai/profile/suggestions.py‎
Lines changed: 119 additions & 3 deletions b/‎form-flow-backend/services/ai/profile/suggestions.py‎
Lines changed: 119 additions & 3 deletions
diff --git a/‎form-flow-backend/services/ai/session_manager.py‎
Lines changed: 21 additions & 9 deletions b/‎form-flow-backend/services/ai/session_manager.py‎
Lines changed: 21 additions & 9 deletions
@@ -141,6 +141,10 @@ def cors_origins_list(self) -> list:
     # ==========================================================================
     # Smart Question Engine Configuration
     # ==========================================================================
+    ENABLE_AI: bool = Field(
+        default=True,
+        description="Enable AI features (disable for dev/testing to save quota)"
+    )
     SMART_GROUPING_ENABLED: bool = Field(
         default=True,
         description="Enable Smart Question Grouping (reduces 159 fields to ~30 groups)"
 
@@ -20,7 +20,7 @@ psutil
 py-cpuinfo
 seaborn>=0.11.0
 elevenlabs==1.6.1
-vosk>=0.3.45
+vosk>=0.3.44
 sqlalchemy
 asyncpg
 passlib[bcrypt]
 
@@ -43,6 +43,7 @@ class FormSubmitRequest(BaseModel):
     form_data: Dict[str, Any]
     form_schema: List[Dict[str, Any]]
     use_cdp: bool = False  # If True, connect to user's browser via Chrome DevTools Protocol
+    human_like: bool = False  # If True, use anti-detection human behavior
 
 class ConversationalFlowRequest(BaseModel):
     extracted_fields: Dict[str, str]
@@ -319,21 +320,25 @@ async def scrape_form(
         print(f"⏱️  Scrape + process: {t2 - t1:.2f}s")
 
         # ━━━ MAGIC FILL (non-blocking — runs in background) ━━━
-        auth_header = request.headers.get('Authorization')
-        if auth_header and auth_header.startswith('Bearer '):
-            # Fire-and-forget: run Magic Fill in background so /scrape returns instantly
-            background_tasks.add_task(
-                _run_magic_fill_background,
-                url, auth_header, processed_data['form_schema'], db, gemini_service
-            )
+        # ━━━ MAGIC FILL (non-blocking — runs in background) ━━━
+        if settings.ENABLE_AI:
+            auth_header = request.headers.get('Authorization')
+            if auth_header and auth_header.startswith('Bearer '):
+                # Fire-and-forget: run Magic Fill in background so /scrape returns instantly
+                background_tasks.add_task(
+                    _run_magic_fill_background,
+                    url, auth_header, processed_data['form_schema'], db, gemini_service
+                )
+        else:
+             print("ℹ️ Magic Fill skipped (ENABLE_AI=False)")
 
         # ━━━ BUILD RESPONSE ━━━
         response_data = {
             "message": "Form scraped and analyzed successfully",
             **processed_data,
-            "gemini_ready": gemini_service is not None,
+            "gemini_ready": gemini_service is not None and settings.ENABLE_AI,
             "magic_fill_data": None,  # Will be available via /magic-fill-result endpoint
-            "magic_fill_status": "processing" if auth_header and auth_header.startswith('Bearer ') else "skipped"
+            "magic_fill_status": "processing" if settings.ENABLE_AI and auth_header and auth_header.startswith('Bearer ') else "skipped"
         }
 
         # ━━━ CACHE RESULT (30 min TTL) ━━━
@@ -393,17 +398,21 @@ async def comprehensive_form_setup(
 
         # Step 3: Generate initial conversational flow if requested
         conversational_flow = None
-        if data.auto_generate_flow and gemini_service:
+        if data.auto_generate_flow and gemini_service and settings.ENABLE_AI:
             flow_result = gemini_service.generate_conversational_flow({}, processed_data["form_schema"])
             if flow_result["success"]:
                 conversational_flow = flow_result["conversational_flow"]
+        elif not settings.ENABLE_AI:
+            print("ℹ️ Conversational flow generation skipped (ENABLE_AI=False)")
 
         return {
             "message": "Form setup completed successfully",
             **processed_data,
             "conversational_flow": conversational_flow,
             "ready_for_interaction": True,
-            "gemini_ready": gemini_service is not None
+            "conversational_flow": conversational_flow,
+            "ready_for_interaction": True,
+            "gemini_ready": gemini_service is not None and settings.ENABLE_AI
         }
 
     except Exception as e:
@@ -452,6 +461,9 @@ async def generate_conversational_flow(
 ):
     """Generate conversational flow based on extracted fields using Gemini API."""
     try:
+        if not settings.ENABLE_AI:
+            raise HTTPException(status_code=400, detail="AI features are disabled")
+            
         if not gemini_service:
             raise HTTPException(status_code=500, detail="Gemini API not configured")
 
@@ -607,6 +619,16 @@ async def magic_fill(
                 "summary": "Please sign in to use Magic Fill"
             }
 
+        # Check if AI is enabled
+        if not settings.ENABLE_AI:
+            return {
+                "success": False,
+                "error": "AI features are disabled",
+                "filled": {},
+                "unfilled": [],
+                "summary": "AI features are currently disabled"
+            }
+        
         # 2. Call Smart Form Filler Chain
         if not gemini_service:
             raise HTTPException(status_code=500, detail="Gemini service not available")
@@ -659,14 +681,16 @@ async def submit_form(
                 url=data.url,
                 form_data=formatted_data,
                 form_schema=data.form_schema,
-                use_cdp=data.use_cdp
+                use_cdp=data.use_cdp,
+                human_like=data.human_like
             )
         else:
             result = await form_submitter.submit_form_data(
                 url=data.url,
                 form_data=data.form_data,
                 form_schema=data.form_schema,
-                use_cdp=data.use_cdp
+                use_cdp=data.use_cdp,
+                human_like=data.human_like
             )
 
         # --- History Tracking ---
 
@@ -0,0 +1,31 @@
+import sys
+import os
+import asyncio
+import logging
+
+# Setup path to include backend root
+root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, root_dir)
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+
+from utils.api_cache import invalidate_form_cache
+
+async def main():
+    target_url = "https://www.zensar.com/contact-us"
+    
+    print(f"🧹 Clearing cache for: {target_url}")
+    
+    # 1. Clear form schema cache
+    await invalidate_form_cache(target_url)
+    
+    # 2. Also check if there are other related keys (e.g. smart prompts)
+    # The prefix for form schema is "form_schema:"
+    # We rely on invalidate_form_cache logic
+    
+    print("✅ Cache cleared successfully.")
+    print("Please refresh the frontend to re-scrape.")
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -91,12 +91,13 @@ async def get_suggestions(
                 return await self._tier1_profile_based(profile, field_context, form_context, previous_answers, form_intent)
             else:
                 # STRICT: No profile = No suggestions.
-                logger.warning("⛔ [Lifecycle] No Profile found. Skipping Tier 3 fallback (returning empty).")
-                return []
+                logger.info("🌱 [Lifecycle] No Profile found. Attempting Tier 0: Cold-Start suggestions.")
+                return await self._tier0_cold_start(field_context, form_context, previous_answers, form_intent)
 
         except Exception as e:
             logger.error(f"❌ [Lifecycle] CRITICAL ERROR: {str(e)}", exc_info=True)
             return []
+
 
     async def _tier1_profile_based(
         self,
@@ -121,6 +122,28 @@ async def _tier1_profile_based(
         except Exception as e:
             logger.error(f"❌ [Lifecycle] Tier 1: LLM Failed ({str(e)})")
             return [] # STRICT: Return empty instead of fallback
+        
+    def _format_profile_for_prompt(self, profile: Any) -> str:
+            """Extract and structure profile data for better LLM consumption."""
+            profile_text = getattr(profile, 'profile_text', None)
+    
+            if not profile_text:
+                return str(profile)
+            
+            try:
+                parsed = json.loads(profile_text) if isinstance(profile_text, str) else profile_text
+                
+                # If it's already structured JSON, format it clearly
+                if isinstance(parsed, dict):
+                    sections = []
+                    for key, value in parsed.items():
+                        label = key.replace("_", " ").title()
+                        sections.append(f"- {label}: {value}")
+                    return "\n".join(sections)
+            except (json.JSONDecodeError, TypeError):
+                pass
+            
+            return str(profile_text)
 
     async def _generate_llm_suggestions(
         self,
@@ -142,7 +165,18 @@ async def _generate_llm_suggestions(
             return None
 
         # Extract profile text safely
-        profile_text = getattr(profile, 'profile_text', str(profile))
+        profile_text = self._format_profile_for_prompt(profile)
+
+        # ADD ↓
+        form_count = getattr(profile, 'form_count', 1)
+        try:
+            metadata = json.loads(getattr(profile, 'metadata_json', '{}') or '{}')
+        except Exception:
+            metadata = {}
+        forms_history = metadata.get('forms_analyzed', [])
+        history_str = ", ".join(forms_history[-5:]) if forms_history else "None"
+        maturity_hint = "mature — trust it heavily" if form_count >= 5 else "early stage — use as a hint, stay flexible"
+
 
         # Context extraction
         field_name = field_context.get("name", "unknown")
@@ -189,6 +223,8 @@ async def _generate_llm_suggestions(
 4.  **Guardrail:** NEVER describe the user in the third person (e.g., "User exhibits...") unless the form_type is explicitly 'diagnostic_report'.
 
 5.  **Output:** Return a JSON object with a list of 1-3 suggestions and your reasoning. The reasoning MUST mention the detected Form Intent.
+6.  **Profile Maturity:** The user has filled {form_count} forms — profile is {maturity_hint}. Weight suggestions accordingly.
+7.  **Past Forms:** They've previously filled: {forms_history}. Use this to infer domain or recurring needs.
 
 FORMAT:
 {{
@@ -212,6 +248,9 @@ async def _generate_llm_suggestions(
                 "field_name": field_name,
                 "persona": persona,
                 "previous_answers_context": previous_answers_str,
+                "form_count": form_count,
+                "maturity_hint": maturity_hint,
+                "forms_history": history_str,
             })
 
             duration = (datetime.now() - start_time).total_seconds()
@@ -259,6 +298,83 @@ def _tier3_pattern_only(
         # DISABLED as per request
         logger.info("🧩 [Lifecycle] Tier 3 requested but DISABLED.")
         return []
+    async def _tier0_cold_start(
+            self,
+            field_context: Dict[str, Any],
+            form_context: Dict[str, Any],
+            previous_answers: Dict[str, str],
+            form_intent: Optional[FormIntent]
+        ) -> List[IntelligentSuggestion]:
+            """
+            Tier 0: Cold-start suggestions for users with no profile.
+            Uses only form intent + field semantics to generate contextual placeholders.
+            """
+            gemini = get_gemini_service()
+            if not gemini or not gemini.llm:
+                return []
+
+            field_label = field_context.get("label", field_context.get("name", "unknown"))
+            form_purpose = form_intent.intent if form_intent else form_context.get("purpose", "General Form")
+            persona = form_intent.persona if form_intent else "Customer"
+
+            previous_answers_str = "None"
+            if previous_answers:
+                previous_answers_str = "\n".join([f"- {k}: {v}" for k, v in previous_answers.items() if v])
+
+            prompt = ChatPromptTemplate.from_messages([
+                ("system", """You are a smart form-filling assistant helping a first-time user.
+        You have NO prior information about this user. Generate helpful, realistic example suggestions 
+        for the field based ONLY on the form's purpose and previously filled fields.
+
+        CONTEXT:
+        - Form Intent: {form_intent}
+        - Persona: {persona}
+        - Field: "{field_label}"
+        - Previously Filled Fields:
+        {previous_answers_context}
+
+        INSTRUCTIONS:
+        1. Generate 2-3 realistic, generic-but-useful example values a typical {persona} would enter.
+        2. Use the form intent to tailor suggestions (e.g., for "Job Application" + "Skills" field → "Python, FastAPI, SQL").
+        3. Use previous answers to stay consistent (e.g., if Role = "Designer", suggest design-related skills).
+        4. Keep suggestions short, realistic, and immediately usable.
+        5. Do NOT say "example" or "placeholder" - write as if the user would actually submit this.
+
+        FORMAT:
+        {{
+        "suggestions": ["Value 1", "Value 2"],
+        "reasoning": "Based on the form intent '{form_intent}', these are typical values a {persona} would provide."
+        }}
+        """)
+            ])
+
+            parser = JsonOutputParser(pydantic_object=SuggestionResponse)
+            chain = prompt | gemini.llm | parser
+
+            try:
+                result = await chain.ainvoke({
+                    "form_intent": form_purpose,
+                    "persona": persona,
+                    "field_label": field_label,
+                    "previous_answers_context": previous_answers_str,
+                })
+
+                if result and result.get("suggestions"):
+                    return [
+                        IntelligentSuggestion(
+                            value=val,
+                            confidence=0.55,  # Lower confidence - no profile backing
+                            tier=SuggestionTier.PATTERN_ONLY,
+                            reasoning=result.get("reasoning", "Cold-start suggestion based on form intent"),
+                            behavioral_match="cold_start_intent"
+                        )
+                        for val in result["suggestions"]
+                    ]
+            except Exception as e:
+                logger.error(f"❌ [Lifecycle] Tier 0 Cold Start Failed: {str(e)}")
+
+            return []
+        
 
 
 # Singleton instance
 
@@ -186,17 +186,29 @@ async def cleanup_local_cache(self) -> int:
 
         return len(expired)
 
-    def _serialize_session(self, session: Dict[str, Any]) -> Dict[str, Any]:
-        """Serialize session data for storage."""
+    def _serialize_session(self, session: Dict[str, Any], _depth: int = 0) -> Dict[str, Any]:
+        """Serialize session data for storage with depth limit to prevent infinite recursion."""
+        MAX_DEPTH = 10
+        
+        if _depth > MAX_DEPTH:
+            logger.warning(f"Max serialization depth {MAX_DEPTH} reached, truncating")
+            return {}
+        
         serialized = {}
         for key, value in session.items():
-            if isinstance(value, datetime):
-                serialized[key] = {'__datetime__': value.isoformat()}
-            elif hasattr(value, '__dict__'):
-                # Handle dataclass objects
-                serialized[key] = self._serialize_session(value.__dict__)
-            else:
-                serialized[key] = value
+            try:
+                if isinstance(value, datetime):
+                    serialized[key] = {'__datetime__': value.isoformat()}
+                elif hasattr(value, '__dict__') and not isinstance(value, (str, int, float, bool, type(None))):
+                    # Recursively serialize with depth tracking
+                    serialized[key] = self._serialize_session(value.__dict__, _depth + 1)
+                else:
+                    serialized[key] = value
+            except Exception as e:
+                logger.warning(f"Failed to serialize field '{key}': {e}")
+                # Fallback to string representation
+                serialized[key] = str(value)
+        
         return serialized
 
     def _deserialize_session(self, data: Dict[str, Any]) -> Dict[str, Any]: