feat(library): add file library with audio-transcription correlation

Br3n0k · Br3n0k · commit dce6e284658e · 2026-03-17T20:58:25.000-03:00
- Create new library router with file management and deletion endpoints
- Add library template with table view showing correlated files
- Implement smart file correlation between uploads and transcriptions
- Centralize template configuration in core.templates module
- Add comprehensive tests for library functionality
- Update navigation to include library link
- Improve README documentation with new feature description
diff --git a/.env b/.env
@@ -14,7 +14,7 @@ DEBUG=true
 FFMPEG_AUTO_SETUP=true  # If true, attempts to auto-install FFmpeg via imageio-ffmpeg on startup
 
 # Defines the device to use for transcription: auto (prefer GPU), cuda (force GPU), cpu (force CPU)
-TRANSCRIPTION_DEVICE=auto 
+TRANSCRIPTION_DEVICE=cuda 
 
 # Whisper Configuration
 # WHISPER_MODEL=base  # Options: tiny, base, small, medium, large-v2, large-v3
diff --git a/README.md b/README.md
@@ -22,6 +22,8 @@ Built with **FastAPI**, **WebSockets**, **Alpine.js**, and **Tailwind CSS** for
 
 * 🎥 **YouTube & Local Support:** Transcribe directly from YouTube URLs or upload `.mp3`, `.mp4`, `.wav`, and more.
 * 🚀 **Real-Time Progress:** Watch the transcription status live via **WebSockets** — no more guessing when it finishes.
+* 📚 **Smart Library:** Manage your media and transcriptions in one place. Auto-correlates audio files with their transcripts and allows one-click cleanup of paired files.
+* 🛡️ **Filename Sanitization:** Automatic handling of special characters, accents, and spaces in filenames for robust cross-platform compatibility.
 * 🤖 **Dual AI Engine:**
   * **openai-whisper**: High accuracy (default when FFmpeg is available).
   * **faster-whisper**: Blazing fast inference with seamless fallback.
@@ -30,7 +32,7 @@ Built with **FastAPI**, **WebSockets**, **Alpine.js**, and **Tailwind CSS** for
   * **Auto-FFmpeg:** Automatically detects or installs FFmpeg locally (Windows/Linux/Mac).
   * **yt-dlp Python API:** Robust media downloading without external binary dependencies.
 * 🐳 **Production Ready:** Optimized **Docker** image (multi-stage build, non-root user, secure).
-* 🌓 **Modern UI:** Dark/light mode, responsive design, and history management.
+* 🌓 **Modern UI:** Dark/light mode, responsive design, history management, and file library.
 
 ---
 
@@ -44,6 +46,7 @@ Built with **FastAPI**, **WebSockets**, **Alpine.js**, and **Tailwind CSS** for
   * `imageio-ffmpeg` (Auto-setup)
   * `torch` (PyTorch with CUDA 12.4 support)
 * **DevOps:** Docker (Multi-stage), GitHub Actions (CI), Pytest
+* **Storage:** Local filesystem with smart correlation (Library)
 
 ---
 
@@ -59,12 +62,13 @@ app/
  │   ├─ home.py            # UI: Homepage
  │   ├─ upload.py          # API: Handle file/YouTube uploads
  │   ├─ websocket.py       # API: Real-time progress updates
+ │   ├─ library.py         # API: Manage audio/transcription files
  │   └─ history.py         # UI: Transcription history
  ├─ services/
  │   ├─ progress.py        # Task state management
  │   ├─ youtube.py         # yt-dlp integration
  │   ├─ transcriber.py     # Whisper engine & FFmpeg logic
- │   └─ file_manager.py    # File I/O operations
+ │   └─ file_manager.py    # File I/O operations & Sanitization
  ├─ scripts/
  │   └─ setup_ffmpeg.py    # Auto-installer for FFmpeg
  ├─ templates/             # Jinja2 + Alpine.js templates
diff --git a/app/core/templates.py b/app/core/templates.py
@@ -0,0 +1,19 @@
+from fastapi.templating import Jinja2Templates
+from datetime import datetime
+from .config import settings
+from .theme import default_theme
+
+# Create a single Jinja2Templates instance
+templates = Jinja2Templates(directory=str(settings.templates_dir))
+
+# Register Filters
+def timestamp_to_date(timestamp: float) -> str:
+    try:
+        return datetime.fromtimestamp(timestamp).strftime('%d/%m/%Y %H:%M')
+    except Exception:
+        return ""
+
+templates.env.filters["timestamp_to_date"] = timestamp_to_date
+
+# Register Globals
+templates.env.globals["theme"] = default_theme()
diff --git a/app/main.py b/app/main.py
@@ -1,9 +1,8 @@
-from fastapi import FastAPI, Request
+from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
-from fastapi.templating import Jinja2Templates
 from .core.config import settings
-from .core.theme import default_theme
 from .scripts.setup_ffmpeg import setup_ffmpeg
+from .core.templates import templates  # Importa templates configurados
 
 # Executar setup do FFmpeg se configurado
 if settings.ffmpeg_auto_setup:
@@ -14,9 +13,6 @@
 # Mount static files
 app.mount("/static", StaticFiles(directory=settings.static_dir), name="static")
 
-# Templates
-templates = Jinja2Templates(directory=str(settings.templates_dir))
-
 from fastapi.responses import FileResponse  # noqa: E402
 
 
@@ -26,15 +22,13 @@ async def download(filename: str):
     return FileResponse(path=str(file_path), filename=filename, media_type="text/plain")
 
 # Include routers (to be implemented)
-from .routers import home, upload, history, websocket  # noqa: E402
+from .routers import home, upload, history, websocket, library  # noqa: E402
 
 app.include_router(home.router)
 app.include_router(upload.router)
 app.include_router(history.router)
 app.include_router(websocket.router)
-
-# Add template globals
-templates.env.globals["theme"] = default_theme()
+app.include_router(library.router)
 
 
 @app.get("/health")
diff --git a/app/routers/history.py b/app/routers/history.py
@@ -1,17 +1,10 @@
 from fastapi import APIRouter, Request
 from fastapi.responses import HTMLResponse
-from fastapi.templating import Jinja2Templates
-from ..core.config import settings
 from ..services.file_manager import list_transcriptions
-from ..core.theme import default_theme
+from ..core.templates import templates
 
 router = APIRouter(prefix="/history")
 
-templates = Jinja2Templates(directory=str(settings.templates_dir))
-# Registrar tema padrão como global para os templates
-templates.env.globals["theme"] = default_theme()
-
-
 @router.get("/", response_class=HTMLResponse)
 async def history(request: Request):
     files = list_transcriptions()
diff --git a/app/routers/home.py b/app/routers/home.py
@@ -1,16 +1,9 @@
 from fastapi import APIRouter, Request
 from fastapi.responses import HTMLResponse
-from fastapi.templating import Jinja2Templates
-from ..core.config import settings
-from ..core.theme import default_theme
+from ..core.templates import templates
 
 router = APIRouter()
 
-templates = Jinja2Templates(directory=str(settings.templates_dir))
-# Registrar tema padrão como global para os templates
-templates.env.globals["theme"] = default_theme()
-
-
 @router.get("/", response_class=HTMLResponse)
 async def index(request: Request):
     return templates.TemplateResponse(request=request, name="index.html")
diff --git a/app/routers/library.py b/app/routers/library.py
@@ -0,0 +1,127 @@
+from __future__ import annotations
+from pathlib import Path
+import logging
+from typing import Optional
+from fastapi import APIRouter, Request, HTTPException
+from fastapi.responses import HTMLResponse, RedirectResponse
+from ..core.config import settings
+from ..core.templates import templates
+import logging
+from pathlib import Path
+from typing import Optional
+
+router = APIRouter(prefix="/library")
+
+logger = logging.getLogger(__name__)
+
+def get_library_items():
+    transcriptions_dir = settings.storage_transcriptions
+    uploads_dir = settings.storage_uploads
+    
+    # Ensure directories exist
+    if not transcriptions_dir.exists():
+        transcriptions_dir.mkdir(parents=True)
+    if not uploads_dir.exists():
+        uploads_dir.mkdir(parents=True)
+
+    transcription_files = sorted(
+        [p for p in transcriptions_dir.glob("*.txt") if p.is_file()],
+        key=lambda p: p.stat().st_mtime,
+        reverse=True
+    )
+    
+    audio_files = [p for p in uploads_dir.glob("*") if p.is_file() and p.name != ".keep"]
+    
+    # Map stem -> audio file(s)
+    # We need to handle:
+    # 1. Exact stem match: video.mp4 -> video.txt
+    # 2. UUID prefix: uuid_video.mp4 -> video.txt
+    
+    items = []
+    processed_audio = set()
+
+    for t_file in transcription_files:
+        stem = t_file.stem
+        related_audio = None
+        
+        # Strategy 1: Exact stem match
+        for a_file in audio_files:
+            if a_file in processed_audio:
+                continue
+            if a_file.stem == stem:
+                related_audio = a_file
+                processed_audio.add(a_file)
+                break
+        
+        # Strategy 2: Check for uuid_stem pattern if not found
+        if not related_audio:
+            for a_file in audio_files:
+                if a_file in processed_audio:
+                    continue
+                # Check if audio file ends with _{stem}.ext or just contains stem
+                # Simple check: uuid_filename.ext -> filename is at the end of stem? 
+                # Actually, our upload logic is: {task_id}_{filename}
+                # And transcription logic is: get_unique_stem(filename) -> stem
+                # So if filename was "video.mp4", audio is "uuid_video.mp4", transcription is "video.txt"
+                # audio.stem is "uuid_video"
+                # t.stem is "video"
+                if a_file.stem.endswith(f"_{stem}"):
+                    related_audio = a_file
+                    processed_audio.add(a_file)
+                    break
+
+        items.append({
+            "transcription": t_file.name,
+            "transcription_path": str(t_file),
+            "audio": related_audio.name if related_audio else None,
+            "audio_path": str(related_audio) if related_audio else None,
+            "date": t_file.stat().st_mtime,
+            "size": t_file.stat().st_size
+        })
+
+    # Add orphaned audio files
+    for a_file in audio_files:
+        if a_file not in processed_audio:
+             items.append({
+                "transcription": None,
+                "transcription_path": None,
+                "audio": a_file.name,
+                "audio_path": str(a_file),
+                "date": a_file.stat().st_mtime,
+                "size": a_file.stat().st_size
+            })
+            
+    # Sort by date descending
+    items.sort(key=lambda x: x["date"], reverse=True)
+    return items
+
+@router.get("/", response_class=HTMLResponse)
+async def library_view(request: Request):
+    items = get_library_items()
+    return templates.TemplateResponse(request=request, name="library.html", context={"items": items})
+
+@router.post("/delete")
+async def delete_item(request: Request):
+    form = await request.form()
+    transcription_path = form.get("transcription_path")
+    audio_path = form.get("audio_path")
+    
+    def safe_delete(path_str: Optional[str], allowed_dir: Path):
+        if not path_str:
+            return
+        try:
+            p = Path(path_str).resolve()
+            # Security check: ensure path is within allowed directory
+            if not p.is_relative_to(allowed_dir.resolve()):
+                logger.warning(f"Tentativa de deletar arquivo fora do diretório permitido: {p}")
+                return
+            if p.exists() and p.is_file():
+                p.unlink()
+                logger.info(f"Arquivo deletado: {p}")
+        except Exception as e:
+            logger.error(f"Erro ao deletar arquivo {path_str}: {e}")
+
+    safe_delete(transcription_path, settings.storage_transcriptions)
+    safe_delete(audio_path, settings.storage_uploads)
+            
+    return RedirectResponse(url="/library", status_code=303)
diff --git a/app/routers/upload.py b/app/routers/upload.py
@@ -1,24 +1,19 @@
 from __future__ import annotations
 from pathlib import Path
-import os
 import logging
 import uuid
 import asyncio
 from fastapi import APIRouter, File, Form, HTTPException, UploadFile, Request, BackgroundTasks
-from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
-from fastapi.templating import Jinja2Templates
+from fastapi.responses import HTMLResponse
 from ..core.config import settings
 from ..services.youtube import download_from_youtube
 from ..services.transcriber import transcribe_file
 from ..services.file_manager import save_upload, save_transcription, get_unique_stem, sanitize_filename
 from ..services.progress import progress_manager
-from ..core.theme import default_theme
+from ..core.templates import templates
 
 router = APIRouter(prefix="/transcribe")
 
-templates = Jinja2Templates(directory=str(settings.templates_dir))
-templates.env.globals["theme"] = default_theme()
-
 logger = logging.getLogger(__name__)
 
 async def process_transcription(task_id: str, media_path: Path, original_filename: str):
diff --git a/app/templates/base.html b/app/templates/base.html
@@ -21,6 +21,7 @@
     <div class="max-w-5xl mx-auto px-4 py-4 flex items-center justify-between">
       <a href="/" class="text-xl font-semibold text-primary">Transcriber</a>
       <div class="flex items-center gap-4">
+        <a href="/library" class="text-sm hover:underline">Biblioteca</a>
         <a href="/history" class="text-sm hover:underline">Histórico</a>
         <button class="px-3 py-1 rounded border border-gray-300 dark:border-gray-700 text-sm" @click="dark = !dark">
           <span x-show="!dark">Dark</span>
diff --git a/app/templates/library.html b/app/templates/library.html
@@ -0,0 +1,86 @@
+{% extends 'base.html' %}
+
+{% block content %}
+<div class="max-w-6xl mx-auto px-4 py-8">
+    <div class="flex justify-between items-center mb-6">
+        <h2 class="text-2xl font-bold text-gray-800 dark:text-gray-100">Biblioteca de Arquivos</h2>
+        <span class="text-sm text-gray-500 dark:text-gray-400">Gerencie suas transcrições e áudios</span>
+    </div>
+
+    <div class="bg-white dark:bg-gray-800 shadow rounded-lg overflow-hidden">
+        <table class="min-w-full divide-y divide-gray-200 dark:divide-gray-700">
+            <thead class="bg-gray-50 dark:bg-gray-700">
+                <tr>
+                    <th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-300 uppercase tracking-wider">
+                        Transcrição
+                    </th>
+                    <th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-300 uppercase tracking-wider">
+                        Áudio Associado
+                    </th>
+                    <th scope="col" class="px-6 py-3 text-left text-xs font-medium text-gray-500 dark:text-gray-300 uppercase tracking-wider">
+                        Data
+                    </th>
+                    <th scope="col" class="px-6 py-3 text-right text-xs font-medium text-gray-500 dark:text-gray-300 uppercase tracking-wider">
+                        Ações
+                    </th>
+                </tr>
+            </thead>
+            <tbody class="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-700">
+                {% for item in items %}
+                <tr class="hover:bg-gray-50 dark:hover:bg-gray-700 transition-colors">
+                    <td class="px-6 py-4 whitespace-nowrap">
+                        {% if item.transcription %}
+                        <div class="flex items-center">
+                            <div class="flex-shrink-0 h-8 w-8 flex items-center justify-center rounded bg-blue-100 text-blue-500">
+                                <svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12h6m-6 4h6m2 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z"></path></svg>
+                            </div>
+                            <div class="ml-4">
+                                <div class="text-sm font-medium text-gray-900 dark:text-gray-100">{{ item.transcription }}</div>
+                                <div class="text-xs text-gray-500 dark:text-gray-400">{{ (item.size / 1024)|round(1) }} KB</div>
+                            </div>
+                        </div>
+                        {% else %}
+                        <span class="text-xs text-gray-400 italic">Sem transcrição</span>
+                        {% endif %}
+                    </td>
+                    <td class="px-6 py-4 whitespace-nowrap">
+                        {% if item.audio %}
+                        <div class="flex items-center">
+                            <div class="flex-shrink-0 h-8 w-8 flex items-center justify-center rounded bg-purple-100 text-purple-500">
+                                <svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15.536 8.464a5 5 0 010 7.072m2.828-9.9a9 9 0 010 12.728M5.586 15H4a1 1 0 01-1-1v-4a1 1 0 011-1h1.586l4.707-4.707C10.923 3.663 12 4.109 12 5v14c0 .891-1.077 1.337-1.707.707L5.586 15z"></path></svg>
+                            </div>
+                            <div class="ml-4">
+                                <div class="text-sm text-gray-900 dark:text-gray-100">{{ item.audio }}</div>
+                            </div>
+                        </div>
+                        {% else %}
+                        <span class="text-xs text-gray-400 italic">Áudio removido</span>
+                        {% endif %}
+                    </td>
+                    <td class="px-6 py-4 whitespace-nowrap text-sm text-gray-500 dark:text-gray-400">
+                        {{ item.date | timestamp_to_date }}
+                    </td>
+                    <td class="px-6 py-4 whitespace-nowrap text-right text-sm font-medium">
+                        <div class="flex justify-end space-x-3">
+                            {% if item.transcription %}
+                            <a href="/download/{{ item.transcription }}" class="text-indigo-600 hover:text-indigo-900 dark:text-indigo-400 dark:hover:text-indigo-300" title="Baixar Transcrição">
+                                <svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-4l-4 4m0 0l-4-4m4 4V4"></path></svg>
+                            </a>
+                            {% endif %}
+                            
+                            <form action="/library/delete" method="POST" onsubmit="return confirm('Tem certeza que deseja excluir este item? Esta ação removerá a transcrição e o áudio associado.');" class="inline">
+                                <input type="hidden" name="transcription_path" value="{{ item.transcription_path or '' }}">
+                                <input type="hidden" name="audio_path" value="{{ item.audio_path or '' }}">
+                                <button type="submit" class="text-red-600 hover:text-red-900 dark:text-red-400 dark:hover:text-red-300" title="Excluir">
+                                    <svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"></path></svg>
+                                </button>
+                            </form>
+                        </div>
+                    </td>
+                </tr>
+                {% endfor %}
+            </tbody>
+        </table>
+    </div>
+</div>
+{% endblock %}
diff --git a/tests/test_library.py b/tests/test_library.py
diff --git a/tests/test_library_integration.py b/tests/test_library_integration.py