Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions form-flow-backend/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from typing import Optional, List

from .database import Base
from .vocabulary_model import VocabularyCorrection


class User(Base):
Expand Down
23 changes: 23 additions & 0 deletions form-flow-backend/core/vocabulary_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Vocabulary Correction Model

Database model for storing user-specific vocabulary corrections
"""

from sqlalchemy import Column, Integer, String, DateTime, Text
from sqlalchemy.sql import func
from core.database import Base


class VocabularyCorrection(Base):
"""User vocabulary corrections for STT improvements"""
__tablename__ = "vocabulary_corrections"

id = Column(Integer, primary_key=True, index=True)
heard = Column(String(255), nullable=False, index=True)
correct = Column(String(255), nullable=False)
context = Column(String(255), nullable=True)
phonetic = Column(String(255), nullable=True)
usage_count = Column(Integer, default=0)
created_at = Column(DateTime(timezone=True), server_default=func.now())
last_used = Column(DateTime(timezone=True), nullable=True)
3 changes: 2 additions & 1 deletion form-flow-backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from utils.rate_limit import limiter, rate_limit_exceeded_handler

# Import Routers
from routers import auth, forms, speech, conversation, advanced_voice, analytics, websocket, local_llm, pdf, suggestions, docx, profile, snippets, plugins, attachments
from routers import auth, forms, speech, conversation, advanced_voice, analytics, websocket, local_llm, pdf, suggestions, docx, profile, snippets, plugins, attachments, vocabulary

# Initialize logging
setup_logging()
Expand Down Expand Up @@ -194,6 +194,7 @@ async def formflow_exception_handler(request: Request, exc: FormFlowError):
app.include_router(snippets.router)
app.include_router(plugins.router)
app.include_router(attachments.router)
app.include_router(vocabulary.router)


# =============================================================================
Expand Down
1 change: 1 addition & 0 deletions form-flow-backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pytest-asyncio>=0.21.0
pytest-cov>=4.1.0
httpx>=0.25.0
aiosqlite>=0.19.0
aiofiles>=23.2.0

# Redis (for caching and rate limiting)
redis>=5.0.0
Expand Down
28 changes: 4 additions & 24 deletions form-flow-backend/routers/docx.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,37 +92,17 @@ async def fill_docx(docx_id: str, data: Dict[str, str]) -> Dict[str, Any]:
)

try:
from docx import Document
from services.docx.docx_filler import fill_docx_template

# Load original document
content = _docx_storage[docx_id]
doc = Document(io.BytesIO(content))

# Replace placeholders in paragraphs
for paragraph in doc.paragraphs:
for field_name, value in data.items():
# Replace bracket placeholders
import re
# Match both [Name] and [name] style
pattern = re.compile(rf'\[{re.escape(field_name)}\]', re.IGNORECASE)
if pattern.search(paragraph.text):
for run in paragraph.runs:
run.text = pattern.sub(value, run.text)

# Also try display name variations
display_pattern = re.compile(rf'\[{field_name.replace("_", " ")}\]', re.IGNORECASE)
if display_pattern.search(paragraph.text):
for run in paragraph.runs:
run.text = display_pattern.sub(value, run.text)

# Save filled document
output = io.BytesIO()
doc.save(output)
output.seek(0)
# Fill document using service
filled_content, filled_count = fill_docx_template(content, data)

# Generate download ID
download_id = str(uuid.uuid4())
_docx_storage[f"filled_{download_id}"] = output.getvalue()
_docx_storage[f"filled_{download_id}"] = filled_content

return {
"success": True,
Expand Down
84 changes: 84 additions & 0 deletions form-flow-backend/routers/vocabulary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""
Vocabulary Correction Router

Vocabulary correction endpoints using VocabularyService.
"""

from fastapi import APIRouter, HTTPException, Depends
from pydantic import BaseModel, Field
from typing import List, Optional, Any, Dict
from datetime import datetime
from sqlalchemy.orm import Session

from core.database import get_db
from services.voice.vocabulary import get_vocabulary_service

router = APIRouter(tags=["Vocabulary"])
vocab_service = get_vocabulary_service()


class VocabularyCorrectionRequest(BaseModel):
heard: str = Field(..., description="What Whisper/STT heard")
correct: str = Field(..., description="What it should be")
context: Optional[str] = Field(None, description="Context (field name, form type)")


class CorrectionResponse(BaseModel):
id: int
heard: str
correct: str
context: Optional[str]
usage_count: int
created_at: datetime
last_used: Optional[datetime]
phonetic: Optional[str]


@router.post("/vocabulary/correction", response_model=CorrectionResponse)
async def add_correction(correction: VocabularyCorrectionRequest, db: Session = Depends(get_db)):
"""Add a new vocabulary correction rule"""
return await vocab_service.add_correction(
db,
correction.heard,
correction.correct,
correction.context
)


@router.get("/vocabulary/corrections", response_model=List[CorrectionResponse])
async def get_corrections(db: Session = Depends(get_db)):
"""Get all vocabulary corrections"""
return await vocab_service.get_corrections(db)


@router.post("/vocabulary/apply")
async def apply_corrections(text: str, db: Session = Depends(get_db)):
"""Apply all corrections to text"""
# Ensure cache is loaded (lazy load check)
if not vocab_service._initialized:
await vocab_service.initialize(db)

result = vocab_service.apply_corrections(text)
return {
"original": result["original"],
"corrected": result["corrected"],
"corrections_applied": len(result["applied"]),
"details": result["applied"]
}


@router.delete("/vocabulary/correction/{correction_id}")
async def delete_correction(correction_id: int, db: Session = Depends(get_db)):
"""Delete a vocabulary correction"""
success = await vocab_service.delete_correction(db, correction_id)
if not success:
raise HTTPException(status_code=404, detail="Correction not found")

return {"success": True, "message": "Correction deleted"}


@router.get("/vocabulary/analytics")
async def get_analytics(db: Session = Depends(get_db)):
"""Get vocabulary correction analytics"""
return await vocab_service.get_analytics(db)

181 changes: 181 additions & 0 deletions form-flow-backend/scripts/migrate_vocabulary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
"""
Database Migration: Add Vocabulary Corrections Table

Run this script to create the vocabulary_corrections table
for the self-learning vocabulary correction system.

Usage:
python scripts/migrate_vocabulary.py
"""

import asyncio
import sys
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))

from sqlalchemy import text
from core.database import engine
from utils.logging import get_logger

logger = get_logger(__name__)


async def migrate():
"""Create vocabulary_corrections table"""

create_table_sql = """
CREATE TABLE IF NOT EXISTS vocabulary_corrections (
id SERIAL PRIMARY KEY,
heard VARCHAR(255) NOT NULL,
correct VARCHAR(255) NOT NULL,
context VARCHAR(255),
phonetic VARCHAR(255),
usage_count INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_used TIMESTAMP
);

-- Remove duplicate rows before creating unique index
DELETE FROM vocabulary_corrections a USING vocabulary_corrections b
WHERE a.id < b.id AND a.heard = b.heard;

CREATE UNIQUE INDEX IF NOT EXISTS idx_heard_unique ON vocabulary_corrections(heard);
CREATE INDEX IF NOT EXISTS idx_usage_count ON vocabulary_corrections(usage_count DESC);
"""

try:
async with engine.begin() as conn:
logger.info("Creating vocabulary_corrections table...")
await conn.execute(text(create_table_sql))
logger.info("✓ Table created successfully")

# Verify table exists
result = await conn.execute(text("""
SELECT COUNT(*) FROM information_schema.tables
WHERE table_name = 'vocabulary_corrections'
"""))
count = result.scalar()

if count > 0:
logger.info("✓ Migration verified")
return True
else:
logger.error("✗ Table not found after creation")
return False

except Exception as e:
logger.error(f"Migration failed: {e}")
return False


async def rollback():
"""Drop vocabulary_corrections table"""

drop_table_sql = """
DROP TABLE IF EXISTS vocabulary_corrections CASCADE;
"""

try:
async with engine.begin() as conn:
logger.info("Dropping vocabulary_corrections table...")
await conn.execute(text(drop_table_sql))
logger.info("✓ Table dropped successfully")
return True

except Exception as e:
logger.error(f"Rollback failed: {e}")
return False


async def seed_sample_data():
"""Add sample vocabulary corrections for testing"""

sample_data = [
("karval", "Karwal", "name"),
("john dough", "John Doe", "name"),
("acme corp", "ACME Corporation", "company"),
("gmail dot com", "gmail.com", "email"),
("triple w", "www", "url"),
]

insert_sql = """
INSERT INTO vocabulary_corrections (heard, correct, context, usage_count)
VALUES (:heard, :correct, :context, 0)
ON CONFLICT DO NOTHING
"""

try:
async with engine.begin() as conn:
logger.info("Seeding sample data...")

for heard, correct, context in sample_data:
await conn.execute(
text(insert_sql),
{"heard": heard, "correct": correct, "context": context}
)

logger.info(f"✓ Seeded {len(sample_data)} sample corrections")
return True

except Exception as e:
logger.error(f"Seeding failed: {e}")
return False


async def main():
"""Main migration script"""

print("\n" + "="*60)
print("FormFlow AI - Vocabulary Corrections Migration")
print("="*60 + "\n")

print("Options:")
print("1. Migrate (create table)")
print("2. Rollback (drop table)")
print("3. Seed sample data")
print("4. Exit")

choice = input("\nEnter choice (1-4): ").strip()

if choice == "1":
success = await migrate()
if success:
print("\n✓ Migration completed successfully!")

seed = input("\nSeed sample data? (y/n): ").strip().lower()
if seed == 'y':
await seed_sample_data()
else:
print("\n✗ Migration failed. Check logs for details.")

elif choice == "2":
confirm = input("\nAre you sure? This will delete all corrections. (yes/no): ").strip().lower()
if confirm == "yes":
success = await rollback()
if success:
print("\n✓ Rollback completed successfully!")
else:
print("\n✗ Rollback failed. Check logs for details.")
else:
print("\nRollback cancelled.")

elif choice == "3":
success = await seed_sample_data()
if success:
print("\n✓ Sample data seeded successfully!")
else:
print("\n✗ Seeding failed. Check logs for details.")

elif choice == "4":
print("\nExiting...")

else:
print("\nInvalid choice.")

print("\n" + "="*60 + "\n")


if __name__ == "__main__":
asyncio.run(main())
Loading
Loading