Skip to content

Commit 05daa68

Browse files
committed
Merge branch 'main' and resolve conflicts in submitter.py, parser.py and api.js
1 parent 3ef94a5 commit 05daa68

15 files changed

Lines changed: 1423 additions & 1134 deletions

File tree

form-flow-backend/main.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,20 @@
1-
"""
2-
Form Flow AI - Backend Application
3-
4-
FastAPI application for voice-powered form automation.
5-
Provides endpoints for form scraping, voice processing, and form submission.
6-
7-
Features:
8-
- Form URL scraping with Playwright
9-
- Voice-to-text with Vosk
10-
- Text-to-speech with ElevenLabs
11-
- AI-powered form field understanding with Gemini
12-
- Automated form submission
13-
14-
Run:
15-
python main.py
16-
# or
17-
uvicorn main:app --reload
18-
"""
19-
20-
21-
import warnings
221
import sys
232
import asyncio
243

254
# Fix for Playwright on Windows - ProactorEventLoop required for subprocess
26-
if sys.platform == 'win32' and 'pytest' not in sys.modules:
5+
if sys.platform == 'win32':
276
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
7+
# Explicitly confirm policy application
8+
print(f"🔧 [Windows] Event loop policy set to: {type(asyncio.get_event_loop_policy()).__name__}")
9+
10+
import warnings
2811

2912
# Suppress Pydantic V1 warning from LangChain
3013
warnings.filterwarnings("ignore", message=".*Core Pydantic V1 functionality.*")
3114

3215
from contextlib import asynccontextmanager
3316
from fastapi import FastAPI, Request
17+
from fastapi.exceptions import RequestValidationError
3418
from fastapi.middleware.cors import CORSMiddleware
3519
from fastapi.responses import JSONResponse
3620
from slowapi.errors import RateLimitExceeded
@@ -67,6 +51,15 @@ async def lifespan(app: FastAPI):
6751
logger.info(f"Starting {settings.APP_NAME} v{settings.APP_VERSION}")
6852
logger.info(f"Debug mode: {settings.DEBUG}")
6953

54+
# Log loop type for debugging
55+
try:
56+
current_loop = asyncio.get_running_loop()
57+
logger.info(f"Running on event loop: {type(current_loop).__name__}")
58+
if sys.platform == 'win32' and 'Proactor' not in type(current_loop).__name__:
59+
logger.warning("❌ WARNING: Not using ProactorEventLoop on Windows. Subprocesses (Playwright) will fail!")
60+
except Exception as e:
61+
logger.warning(f"Could not determine running loop type: {e}")
62+
7063
# Create database tables
7164
async with database.engine.begin() as conn:
7265
await conn.run_sync(models.Base.metadata.create_all)
@@ -174,6 +167,15 @@ async def formflow_exception_handler(request: Request, exc: FormFlowError):
174167
# Rate limit exceeded handler
175168
app.add_exception_handler(RateLimitExceeded, rate_limit_exceeded_handler)
176169

170+
# Log Pydantic/validation errors with full detail to diagnose 422s quickly
171+
@app.exception_handler(RequestValidationError)
172+
async def validation_exception_handler(request: Request, exc: RequestValidationError):
173+
logger.error(f"❌ Validation error at {request.url.path}: {exc.errors()} | body={getattr(exc, 'body', 'n/a')}")
174+
return JSONResponse(
175+
status_code=422,
176+
content={"detail": exc.errors()}
177+
)
178+
177179

178180
# =============================================================================
179181
# Routers
@@ -332,5 +334,6 @@ async def metrics_dashboard():
332334
host="0.0.0.0",
333335
port=8001,
334336
reload=settings.DEBUG,
337+
reload_excludes=["models/**", "**/__pycache__/**", ".venv/**", "chroma_db/**", "storage/**"],
335338
log_level="debug" if settings.DEBUG else "info"
336339
)

form-flow-backend/requirements.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,6 @@ python-docx>=1.1.0
7676
# Voice Processing
7777
numpy>=1.24.0
7878
webrtcvad>=2.0.10
79-
edge-tts>=7.0.0 # Free Microsoft TTS fallback
80-
81-
# Logging
79+
edge-tts>=7.0.0
8280
structlog>=23.1.0
8381

form-flow-backend/routers/forms.py

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,14 @@
2323
from config.settings import settings
2424
from sqlalchemy.future import select
2525
from services.ai.profile.service import generate_profile_background
26-
from utils.api_cache import get_cached_form_schema, cache_form_schema
26+
from utils.api_cache import get_cached_form_schema, cache_form_schema, invalidate_form_cache
27+
28+
SUBMIT_TIMEOUT_SECONDS = 30
2729

2830
# --- Pydantic Models ---
2931
class ScrapeRequest(BaseModel):
3032
url: str
33+
refresh: bool = False # Cache-busting: set True to force re-scrape
3134

3235
class VoiceProcessRequest(BaseModel):
3336
transcript: str
@@ -178,7 +181,7 @@ async def _run_magic_fill_background(
178181
if gemini_service:
179182
t_mf = _time.time()
180183
print("✨ [Background] Magic Fill starting...")
181-
filler = SmartFormFillerChain(gemini_service.llm)
184+
filler = SmartFormFillerChain(gemini_service)
182185
magic_result = await filler.fill(
183186
user_profile=user_profile,
184187
form_schema=form_schema,
@@ -298,20 +301,29 @@ async def scrape_form(
298301

299302
print(f"Normalized URL: {url}")
300303

301-
# ━━━ CACHE CHECK ━━━
302-
try:
303-
cached = await get_cached_form_schema(url)
304-
if cached:
305-
print(f"✅ Cache HIT for {url} — returning instantly")
306-
return {
307-
"message": "Form loaded from cache",
308-
**cached,
309-
"cached": True,
310-
"gemini_ready": gemini_service is not None,
311-
"timing": {"total": round(_time.time() - t0, 2)}
312-
}
313-
except Exception as e:
314-
print(f"⚠️ Cache lookup failed (proceeding without cache): {e}")
304+
# ━━━ CACHE BUSTING ━━━
305+
if data.refresh:
306+
print(f"🔄 Cache bust requested for {url}")
307+
try:
308+
await invalidate_form_cache(url)
309+
except Exception as e:
310+
print(f"⚠️ Cache invalidation failed: {e}")
311+
312+
# ━━━ CACHE CHECK (with validation guard) ━━━
313+
if not data.refresh:
314+
try:
315+
cached = await get_cached_form_schema(url)
316+
if cached:
317+
print(f"✅ Cache HIT for {url} — returning instantly")
318+
return {
319+
"message": "Form loaded from cache",
320+
**cached,
321+
"cached": True,
322+
"gemini_ready": gemini_service is not None,
323+
"timing": {"total": round(_time.time() - t0, 2)}
324+
}
325+
except Exception as e:
326+
print(f"⚠️ Cache lookup failed (proceeding without cache): {e}")
315327

316328
# ━━━ SCRAPE + PROCESS (parallel smart prompts + hybrid TTS) ━━━
317329
t1 = _time.time()
@@ -341,9 +353,9 @@ async def scrape_form(
341353
"magic_fill_status": "processing" if settings.ENABLE_AI and auth_header and auth_header.startswith('Bearer ') else "skipped"
342354
}
343355

344-
# ━━━ CACHE RESULT (30 min TTL) ━━━
356+
# ━━━ CACHE RESULT (30 min TTL, validated) ━━━
345357
try:
346-
# Cache processed data (excluding magic fill — that's user-specific)
358+
# cache_form_schema validates internally — won't cache 0-field schemas
347359
await cache_form_schema(url, processed_data, ttl=1800)
348360
except Exception as e:
349361
print(f"⚠️ Failed to cache result: {e}")
@@ -633,7 +645,7 @@ async def magic_fill(
633645
if not gemini_service:
634646
raise HTTPException(status_code=500, detail="Gemini service not available")
635647

636-
filler = SmartFormFillerChain(gemini_service.llm)
648+
filler = SmartFormFillerChain(gemini_service)
637649
result = await filler.fill(
638650
user_profile=user_profile,
639651
form_schema=data.form_schema,
@@ -666,6 +678,8 @@ async def submit_form(
666678
schema = get_schema(data.url, form_data=data.form_schema)
667679
formatted_data = data.form_data
668680

681+
submit_coro = None
682+
669683
if schema:
670684
formatted_data = schema.format_all(data.form_data)
671685
valid, errors = schema.validate_all(formatted_data)
@@ -677,21 +691,28 @@ async def submit_form(
677691
"submitted_data": formatted_data
678692
}
679693

680-
result = await form_submitter.submit_form_data(
694+
submit_coro = form_submitter.submit_form_data(
681695
url=data.url,
682696
form_data=formatted_data,
683697
form_schema=data.form_schema,
684698
use_cdp=data.use_cdp,
685699
human_like=data.human_like
686700
)
687701
else:
688-
result = await form_submitter.submit_form_data(
702+
submit_coro = form_submitter.submit_form_data(
689703
url=data.url,
690704
form_data=data.form_data,
691705
form_schema=data.form_schema,
692706
use_cdp=data.use_cdp,
693707
human_like=data.human_like
694708
)
709+
formatted_data = data.form_data
710+
711+
try:
712+
result = await asyncio.wait_for(submit_coro, timeout=SUBMIT_TIMEOUT_SECONDS)
713+
except asyncio.TimeoutError:
714+
print(f"⏱️ Submit action timed out after {SUBMIT_TIMEOUT_SECONDS}s — aborting to keep UX responsive.")
715+
raise HTTPException(status_code=504, detail=f"Form submission timed out after {SUBMIT_TIMEOUT_SECONDS} seconds")
695716

696717
# --- History Tracking ---
697718
try:

form-flow-backend/routers/suggestions.py

Lines changed: 91 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from fastapi import APIRouter, HTTPException, Depends, Request, BackgroundTasks
2-
from pydantic import BaseModel
2+
from fastapi.exceptions import RequestValidationError
3+
from fastapi.responses import JSONResponse
4+
from pydantic import BaseModel, Field, validator
35
from typing import Optional, List, Dict, Any
6+
from difflib import SequenceMatcher
47
import logging
58
import uuid
69

@@ -41,9 +44,19 @@ class SuggestionRequest(BaseModel):
4144
field_name: str
4245
field_label: Optional[str] = None
4346
field_type: Optional[str] = "text"
47+
field_options: Optional[List[Dict[str, Any]]] = None
48+
is_dropdown: Optional[bool] = None
4449
current_value: Optional[str] = None
4550
n_results: int = 5
4651

52+
@validator('field_type', pre=True, always=True)
53+
def normalize_field_type(cls, v):
54+
if v is None or str(v).strip() == "":
55+
return "text"
56+
v = str(v).lower()
57+
allowed = {"text", "email", "tel", "phone", "number", "select", "dropdown", "textarea", "url", "date", "radio", "checkbox"}
58+
return v if v in allowed else "text"
59+
4760

4861
class SuggestionResponse(BaseModel):
4962
suggestions: List[str]
@@ -54,12 +67,23 @@ class IntelligentSuggestionRequest(BaseModel):
5467
"""Request for profile-based intelligent suggestions."""
5568
field_name: str
5669
field_label: Optional[str] = None
57-
field_type: Optional[str] = "text"
70+
field_type: Optional[str] = Field(default=None, description="Field input type (text, email, select, textarea, etc.)")
5871
form_purpose: Optional[str] = "General"
59-
previous_answers: Optional[Dict[str, str]] = None
72+
previous_answers: Optional[Dict[str, Any]] = None
6073
form_url: Optional[str] = None
6174
all_field_labels: Optional[List[str]] = None
6275
session_id: Optional[str] = None
76+
field_options: Optional[List[Dict[str, Any]]] = Field(default=None, description="Dropdown options for this field")
77+
is_dropdown: Optional[bool] = Field(default=None, description="True if field has a constrained options list")
78+
79+
@validator('field_type', pre=True, always=True)
80+
def normalize_field_type(cls, v):
81+
# Accept None/unknown; default to text and explicitly allow textarea
82+
if v is None or str(v).strip() == "":
83+
return "text"
84+
v = str(v).lower()
85+
allowed = {"text", "email", "tel", "phone", "number", "select", "dropdown", "textarea", "url", "date"}
86+
return v if v in allowed else "text"
6387

6488

6589
class IntelligentSuggestionItem(BaseModel):
@@ -104,6 +128,8 @@ async def get_suggestions(
104128
"name": data.field_name,
105129
"label": data.field_label or data.field_name,
106130
"type": data.field_type or "text",
131+
"options": data.field_options or [],
132+
"is_dropdown": data.is_dropdown if data.is_dropdown is not None else bool(data.field_options),
107133
}
108134

109135
# Detect patterns from current value if provided
@@ -125,6 +151,12 @@ async def get_suggestions(
125151

126152
# Extract suggestion values
127153
suggestion_values = [s.suggested_value for s in suggestions if s.target_field == data.field_name]
154+
155+
if data.is_dropdown or (data.field_options and len(data.field_options) > 0):
156+
filtered_values = _filter_to_field_options(suggestion_values, data.field_options)
157+
if len(filtered_values) != len(suggestion_values):
158+
logger.info(f"🎛️ Dropdown guardrail filtered {len(suggestion_values) - len(filtered_values)} invalid pattern suggestions for {data.field_name}")
159+
suggestion_values = filtered_values
128160

129161
return SuggestionResponse(
130162
suggestions=suggestion_values[:data.n_results],
@@ -199,6 +231,7 @@ async def get_smart_suggestions(
199231
"name": data.field_name,
200232
"label": data.field_label or data.field_name,
201233
"type": data.field_type or "text",
234+
"options": data.field_options or [],
202235
}
203236

204237
form_context = {
@@ -225,6 +258,16 @@ async def get_smart_suggestions(
225258

226259
# Determine tier used
227260
tier_used = suggestions[0].tier.value if suggestions else "pattern_only"
261+
262+
# Guardrail: for dropdowns, only allow values that match an existing option
263+
filtered = suggestions
264+
if data.is_dropdown or (data.field_options and len(data.field_options) > 0):
265+
allowed_values = _filter_to_field_options([s.value for s in suggestions], data.field_options)
266+
allowed_set = {str(v).strip().lower() for v in allowed_values}
267+
filtered = [s for s in suggestions if str(s.value).strip().lower() in allowed_set]
268+
if len(filtered) != len(suggestions):
269+
logger.info(f"🎛️ Dropdown guardrail filtered {len(suggestions) - len(filtered)} invalid suggestions for {data.field_name}")
270+
suggestions = filtered
228271

229272
return IntelligentSuggestionResponse(
230273
suggestions=[
@@ -258,6 +301,13 @@ async def get_smart_suggestions(
258301
# Helper Functions
259302
# =============================================================================
260303

304+
async def validation_exception_handler(request: Request, exc: RequestValidationError):
305+
"""Log full validation details for faster debugging."""
306+
body = getattr(exc, "body", None)
307+
logger.error(f"❌ Validation error on {request.url.path}: {exc.errors()} | body={body}")
308+
return JSONResponse(status_code=422, content={"detail": exc.errors(), "body": body})
309+
310+
261311
def _infer_field_pattern(field_name: str, field_label: str, field_type: str) -> str:
262312
"""
263313
Infer the field pattern/category for RAG lookup.
@@ -294,3 +344,41 @@ def _infer_field_pattern(field_name: str, field_label: str, field_type: str) ->
294344
return "website"
295345
else:
296346
return field_name.lower()
347+
348+
349+
def _filter_to_field_options(values: List[str], options: Optional[List[Dict[str, Any]]]) -> List[str]:
350+
"""
351+
Restrict suggestions to provided options using case-insensitive fuzzy match.
352+
Returns canonical option labels/values when matched.
353+
"""
354+
if not options:
355+
return values
356+
357+
option_labels = []
358+
for opt in options:
359+
label = opt.get("label")
360+
value = opt.get("value")
361+
if label:
362+
option_labels.append(str(label).strip())
363+
if value and value != label:
364+
option_labels.append(str(value).strip())
365+
366+
if not option_labels:
367+
return values
368+
369+
filtered: List[str] = []
370+
for val in values:
371+
sval = str(val or "").strip()
372+
sval_lower = sval.lower()
373+
for opt in option_labels:
374+
opt_lower = opt.lower()
375+
ratio = SequenceMatcher(None, sval_lower, opt_lower).ratio()
376+
if (
377+
sval_lower == opt_lower
378+
or sval_lower in opt_lower
379+
or opt_lower in sval_lower
380+
or ratio >= 0.82
381+
):
382+
filtered.append(opt)
383+
break
384+
return filtered

0 commit comments

Comments
 (0)