Skip to content

Commit 3ef94a5

Browse files
Merge pull request #9 from atharvakarval-dev/feature/suggestion-engine
Feature/suggestion engine
2 parents 431f1f7 + 60e0376 commit 3ef94a5

20 files changed

Lines changed: 1933 additions & 334 deletions

File tree

64 KB
Binary file not shown.

form-flow-backend/config/settings.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,10 @@ def cors_origins_list(self) -> list:
141141
# ==========================================================================
142142
# Smart Question Engine Configuration
143143
# ==========================================================================
144+
ENABLE_AI: bool = Field(
145+
default=True,
146+
description="Enable AI features (disable for dev/testing to save quota)"
147+
)
144148
SMART_GROUPING_ENABLED: bool = Field(
145149
default=True,
146150
description="Enable Smart Question Grouping (reduces 159 fields to ~30 groups)"

form-flow-backend/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ psutil
2020
py-cpuinfo
2121
seaborn>=0.11.0
2222
elevenlabs==1.6.1
23-
vosk>=0.3.45
23+
vosk>=0.3.44
2424
sqlalchemy
2525
asyncpg
2626
passlib[bcrypt]

form-flow-backend/routers/forms.py

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class FormSubmitRequest(BaseModel):
4343
form_data: Dict[str, Any]
4444
form_schema: List[Dict[str, Any]]
4545
use_cdp: bool = False # If True, connect to user's browser via Chrome DevTools Protocol
46+
human_like: bool = False # If True, use anti-detection human behavior
4647

4748
class ConversationalFlowRequest(BaseModel):
4849
extracted_fields: Dict[str, str]
@@ -319,21 +320,25 @@ async def scrape_form(
319320
print(f"⏱️ Scrape + process: {t2 - t1:.2f}s")
320321

321322
# ━━━ MAGIC FILL (non-blocking — runs in background) ━━━
322-
auth_header = request.headers.get('Authorization')
323-
if auth_header and auth_header.startswith('Bearer '):
324-
# Fire-and-forget: run Magic Fill in background so /scrape returns instantly
325-
background_tasks.add_task(
326-
_run_magic_fill_background,
327-
url, auth_header, processed_data['form_schema'], db, gemini_service
328-
)
323+
# ━━━ MAGIC FILL (non-blocking — runs in background) ━━━
324+
if settings.ENABLE_AI:
325+
auth_header = request.headers.get('Authorization')
326+
if auth_header and auth_header.startswith('Bearer '):
327+
# Fire-and-forget: run Magic Fill in background so /scrape returns instantly
328+
background_tasks.add_task(
329+
_run_magic_fill_background,
330+
url, auth_header, processed_data['form_schema'], db, gemini_service
331+
)
332+
else:
333+
print("ℹ️ Magic Fill skipped (ENABLE_AI=False)")
329334

330335
# ━━━ BUILD RESPONSE ━━━
331336
response_data = {
332337
"message": "Form scraped and analyzed successfully",
333338
**processed_data,
334-
"gemini_ready": gemini_service is not None,
339+
"gemini_ready": gemini_service is not None and settings.ENABLE_AI,
335340
"magic_fill_data": None, # Will be available via /magic-fill-result endpoint
336-
"magic_fill_status": "processing" if auth_header and auth_header.startswith('Bearer ') else "skipped"
341+
"magic_fill_status": "processing" if settings.ENABLE_AI and auth_header and auth_header.startswith('Bearer ') else "skipped"
337342
}
338343

339344
# ━━━ CACHE RESULT (30 min TTL) ━━━
@@ -393,17 +398,21 @@ async def comprehensive_form_setup(
393398

394399
# Step 3: Generate initial conversational flow if requested
395400
conversational_flow = None
396-
if data.auto_generate_flow and gemini_service:
401+
if data.auto_generate_flow and gemini_service and settings.ENABLE_AI:
397402
flow_result = gemini_service.generate_conversational_flow({}, processed_data["form_schema"])
398403
if flow_result["success"]:
399404
conversational_flow = flow_result["conversational_flow"]
405+
elif not settings.ENABLE_AI:
406+
print("ℹ️ Conversational flow generation skipped (ENABLE_AI=False)")
400407

401408
return {
402409
"message": "Form setup completed successfully",
403410
**processed_data,
404411
"conversational_flow": conversational_flow,
405412
"ready_for_interaction": True,
406-
"gemini_ready": gemini_service is not None
413+
"conversational_flow": conversational_flow,
414+
"ready_for_interaction": True,
415+
"gemini_ready": gemini_service is not None and settings.ENABLE_AI
407416
}
408417

409418
except Exception as e:
@@ -452,6 +461,9 @@ async def generate_conversational_flow(
452461
):
453462
"""Generate conversational flow based on extracted fields using Gemini API."""
454463
try:
464+
if not settings.ENABLE_AI:
465+
raise HTTPException(status_code=400, detail="AI features are disabled")
466+
455467
if not gemini_service:
456468
raise HTTPException(status_code=500, detail="Gemini API not configured")
457469

@@ -607,6 +619,16 @@ async def magic_fill(
607619
"summary": "Please sign in to use Magic Fill"
608620
}
609621

622+
# Check if AI is enabled
623+
if not settings.ENABLE_AI:
624+
return {
625+
"success": False,
626+
"error": "AI features are disabled",
627+
"filled": {},
628+
"unfilled": [],
629+
"summary": "AI features are currently disabled"
630+
}
631+
610632
# 2. Call Smart Form Filler Chain
611633
if not gemini_service:
612634
raise HTTPException(status_code=500, detail="Gemini service not available")
@@ -659,14 +681,16 @@ async def submit_form(
659681
url=data.url,
660682
form_data=formatted_data,
661683
form_schema=data.form_schema,
662-
use_cdp=data.use_cdp
684+
use_cdp=data.use_cdp,
685+
human_like=data.human_like
663686
)
664687
else:
665688
result = await form_submitter.submit_form_data(
666689
url=data.url,
667690
form_data=data.form_data,
668691
form_schema=data.form_schema,
669-
use_cdp=data.use_cdp
692+
use_cdp=data.use_cdp,
693+
human_like=data.human_like
670694
)
671695

672696
# --- History Tracking ---
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import sys
2+
import os
3+
import asyncio
4+
import logging
5+
6+
# Setup path to include backend root
7+
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
8+
sys.path.insert(0, root_dir)
9+
10+
# Configure logging
11+
logging.basicConfig(level=logging.INFO)
12+
13+
from utils.api_cache import invalidate_form_cache
14+
15+
async def main():
16+
target_url = "https://www.zensar.com/contact-us"
17+
18+
print(f"🧹 Clearing cache for: {target_url}")
19+
20+
# 1. Clear form schema cache
21+
await invalidate_form_cache(target_url)
22+
23+
# 2. Also check if there are other related keys (e.g. smart prompts)
24+
# The prefix for form schema is "form_schema:"
25+
# We rely on invalidate_form_cache logic
26+
27+
print("✅ Cache cleared successfully.")
28+
print("Please refresh the frontend to re-scrape.")
29+
30+
if __name__ == "__main__":
31+
asyncio.run(main())

form-flow-backend/services/ai/profile/suggestions.py

Lines changed: 119 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,13 @@ async def get_suggestions(
9191
return await self._tier1_profile_based(profile, field_context, form_context, previous_answers, form_intent)
9292
else:
9393
# STRICT: No profile = No suggestions.
94-
logger.warning("⛔ [Lifecycle] No Profile found. Skipping Tier 3 fallback (returning empty).")
95-
return []
94+
logger.info("🌱 [Lifecycle] No Profile found. Attempting Tier 0: Cold-Start suggestions.")
95+
return await self._tier0_cold_start(field_context, form_context, previous_answers, form_intent)
9696

9797
except Exception as e:
9898
logger.error(f"❌ [Lifecycle] CRITICAL ERROR: {str(e)}", exc_info=True)
9999
return []
100+
100101

101102
async def _tier1_profile_based(
102103
self,
@@ -121,6 +122,28 @@ async def _tier1_profile_based(
121122
except Exception as e:
122123
logger.error(f"❌ [Lifecycle] Tier 1: LLM Failed ({str(e)})")
123124
return [] # STRICT: Return empty instead of fallback
125+
126+
def _format_profile_for_prompt(self, profile: Any) -> str:
127+
"""Extract and structure profile data for better LLM consumption."""
128+
profile_text = getattr(profile, 'profile_text', None)
129+
130+
if not profile_text:
131+
return str(profile)
132+
133+
try:
134+
parsed = json.loads(profile_text) if isinstance(profile_text, str) else profile_text
135+
136+
# If it's already structured JSON, format it clearly
137+
if isinstance(parsed, dict):
138+
sections = []
139+
for key, value in parsed.items():
140+
label = key.replace("_", " ").title()
141+
sections.append(f"- {label}: {value}")
142+
return "\n".join(sections)
143+
except (json.JSONDecodeError, TypeError):
144+
pass
145+
146+
return str(profile_text)
124147

125148
async def _generate_llm_suggestions(
126149
self,
@@ -142,7 +165,18 @@ async def _generate_llm_suggestions(
142165
return None
143166

144167
# Extract profile text safely
145-
profile_text = getattr(profile, 'profile_text', str(profile))
168+
profile_text = self._format_profile_for_prompt(profile)
169+
170+
# ADD ↓
171+
form_count = getattr(profile, 'form_count', 1)
172+
try:
173+
metadata = json.loads(getattr(profile, 'metadata_json', '{}') or '{}')
174+
except Exception:
175+
metadata = {}
176+
forms_history = metadata.get('forms_analyzed', [])
177+
history_str = ", ".join(forms_history[-5:]) if forms_history else "None"
178+
maturity_hint = "mature — trust it heavily" if form_count >= 5 else "early stage — use as a hint, stay flexible"
179+
146180

147181
# Context extraction
148182
field_name = field_context.get("name", "unknown")
@@ -189,6 +223,8 @@ async def _generate_llm_suggestions(
189223
4. **Guardrail:** NEVER describe the user in the third person (e.g., "User exhibits...") unless the form_type is explicitly 'diagnostic_report'.
190224
191225
5. **Output:** Return a JSON object with a list of 1-3 suggestions and your reasoning. The reasoning MUST mention the detected Form Intent.
226+
6. **Profile Maturity:** The user has filled {form_count} forms — profile is {maturity_hint}. Weight suggestions accordingly.
227+
7. **Past Forms:** They've previously filled: {forms_history}. Use this to infer domain or recurring needs.
192228
193229
FORMAT:
194230
{{
@@ -212,6 +248,9 @@ async def _generate_llm_suggestions(
212248
"field_name": field_name,
213249
"persona": persona,
214250
"previous_answers_context": previous_answers_str,
251+
"form_count": form_count,
252+
"maturity_hint": maturity_hint,
253+
"forms_history": history_str,
215254
})
216255

217256
duration = (datetime.now() - start_time).total_seconds()
@@ -259,6 +298,83 @@ def _tier3_pattern_only(
259298
# DISABLED as per request
260299
logger.info("🧩 [Lifecycle] Tier 3 requested but DISABLED.")
261300
return []
301+
async def _tier0_cold_start(
302+
self,
303+
field_context: Dict[str, Any],
304+
form_context: Dict[str, Any],
305+
previous_answers: Dict[str, str],
306+
form_intent: Optional[FormIntent]
307+
) -> List[IntelligentSuggestion]:
308+
"""
309+
Tier 0: Cold-start suggestions for users with no profile.
310+
Uses only form intent + field semantics to generate contextual placeholders.
311+
"""
312+
gemini = get_gemini_service()
313+
if not gemini or not gemini.llm:
314+
return []
315+
316+
field_label = field_context.get("label", field_context.get("name", "unknown"))
317+
form_purpose = form_intent.intent if form_intent else form_context.get("purpose", "General Form")
318+
persona = form_intent.persona if form_intent else "Customer"
319+
320+
previous_answers_str = "None"
321+
if previous_answers:
322+
previous_answers_str = "\n".join([f"- {k}: {v}" for k, v in previous_answers.items() if v])
323+
324+
prompt = ChatPromptTemplate.from_messages([
325+
("system", """You are a smart form-filling assistant helping a first-time user.
326+
You have NO prior information about this user. Generate helpful, realistic example suggestions
327+
for the field based ONLY on the form's purpose and previously filled fields.
328+
329+
CONTEXT:
330+
- Form Intent: {form_intent}
331+
- Persona: {persona}
332+
- Field: "{field_label}"
333+
- Previously Filled Fields:
334+
{previous_answers_context}
335+
336+
INSTRUCTIONS:
337+
1. Generate 2-3 realistic, generic-but-useful example values a typical {persona} would enter.
338+
2. Use the form intent to tailor suggestions (e.g., for "Job Application" + "Skills" field → "Python, FastAPI, SQL").
339+
3. Use previous answers to stay consistent (e.g., if Role = "Designer", suggest design-related skills).
340+
4. Keep suggestions short, realistic, and immediately usable.
341+
5. Do NOT say "example" or "placeholder" - write as if the user would actually submit this.
342+
343+
FORMAT:
344+
{{
345+
"suggestions": ["Value 1", "Value 2"],
346+
"reasoning": "Based on the form intent '{form_intent}', these are typical values a {persona} would provide."
347+
}}
348+
""")
349+
])
350+
351+
parser = JsonOutputParser(pydantic_object=SuggestionResponse)
352+
chain = prompt | gemini.llm | parser
353+
354+
try:
355+
result = await chain.ainvoke({
356+
"form_intent": form_purpose,
357+
"persona": persona,
358+
"field_label": field_label,
359+
"previous_answers_context": previous_answers_str,
360+
})
361+
362+
if result and result.get("suggestions"):
363+
return [
364+
IntelligentSuggestion(
365+
value=val,
366+
confidence=0.55, # Lower confidence - no profile backing
367+
tier=SuggestionTier.PATTERN_ONLY,
368+
reasoning=result.get("reasoning", "Cold-start suggestion based on form intent"),
369+
behavioral_match="cold_start_intent"
370+
)
371+
for val in result["suggestions"]
372+
]
373+
except Exception as e:
374+
logger.error(f"❌ [Lifecycle] Tier 0 Cold Start Failed: {str(e)}")
375+
376+
return []
377+
262378

263379

264380
# Singleton instance

form-flow-backend/services/ai/session_manager.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -186,17 +186,29 @@ async def cleanup_local_cache(self) -> int:
186186

187187
return len(expired)
188188

189-
def _serialize_session(self, session: Dict[str, Any]) -> Dict[str, Any]:
190-
"""Serialize session data for storage."""
189+
def _serialize_session(self, session: Dict[str, Any], _depth: int = 0) -> Dict[str, Any]:
190+
"""Serialize session data for storage with depth limit to prevent infinite recursion."""
191+
MAX_DEPTH = 10
192+
193+
if _depth > MAX_DEPTH:
194+
logger.warning(f"Max serialization depth {MAX_DEPTH} reached, truncating")
195+
return {}
196+
191197
serialized = {}
192198
for key, value in session.items():
193-
if isinstance(value, datetime):
194-
serialized[key] = {'__datetime__': value.isoformat()}
195-
elif hasattr(value, '__dict__'):
196-
# Handle dataclass objects
197-
serialized[key] = self._serialize_session(value.__dict__)
198-
else:
199-
serialized[key] = value
199+
try:
200+
if isinstance(value, datetime):
201+
serialized[key] = {'__datetime__': value.isoformat()}
202+
elif hasattr(value, '__dict__') and not isinstance(value, (str, int, float, bool, type(None))):
203+
# Recursively serialize with depth tracking
204+
serialized[key] = self._serialize_session(value.__dict__, _depth + 1)
205+
else:
206+
serialized[key] = value
207+
except Exception as e:
208+
logger.warning(f"Failed to serialize field '{key}': {e}")
209+
# Fallback to string representation
210+
serialized[key] = str(value)
211+
200212
return serialized
201213

202214
def _deserialize_session(self, data: Dict[str, Any]) -> Dict[str, Any]:

0 commit comments

Comments
 (0)