chore: bootstrap repository with governance docs
Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line endings), AGENTS.md (entry points, stack, discovery order, baseline checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion, failures, rollback, scaling notes), .env.prod.example with rotated credential placeholders, and dev-only warnings on .env.example. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
63
app/api/routes_ingestion.py
Normal file
63
app/api/routes_ingestion.py
Normal file
@@ -0,0 +1,63 @@
|
||||
"""Ingestion endpoints."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from app.api.schemas import IngestFolderRequest, IngestFolderResponse
|
||||
from app.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/ingest", tags=["ingestion"])
|
||||
|
||||
|
||||
@router.post("/folder", response_model=IngestFolderResponse)
|
||||
def ingest_folder(req: IngestFolderRequest) -> IngestFolderResponse:
|
||||
"""Discover all PDFs under ``path`` and queue them for processing.
|
||||
|
||||
The request returns immediately after the discovery pass. Per-document
|
||||
OCR / extraction / indexing happens asynchronously in Celery workers.
|
||||
"""
|
||||
folder = Path(req.path)
|
||||
if not folder.exists() or not folder.is_dir():
|
||||
raise HTTPException(status_code=400, detail=f"Folder not found: {req.path}")
|
||||
|
||||
# Lazy import - keeps module load light.
|
||||
from app.ingestion.scanner import discover_documents
|
||||
from app.workers.tasks import process_document
|
||||
|
||||
run_id = uuid.uuid4()
|
||||
discovered, queued, dups, invalid = 0, 0, 0, 0
|
||||
|
||||
for record in discover_documents(folder, recursive=req.recursive, force=req.force):
|
||||
discovered += 1
|
||||
if record.duplicate and not req.force:
|
||||
dups += 1
|
||||
continue
|
||||
if not record.document_id:
|
||||
invalid += 1
|
||||
continue
|
||||
process_document.delay(str(record.document_id), str(run_id))
|
||||
queued += 1
|
||||
|
||||
logger.info(
|
||||
"ingest.folder.queued",
|
||||
path=str(folder),
|
||||
discovered=discovered,
|
||||
queued=queued,
|
||||
skipped_duplicates=dups,
|
||||
invalid=invalid,
|
||||
run_id=str(run_id),
|
||||
)
|
||||
|
||||
return IngestFolderResponse(
|
||||
run_id=run_id,
|
||||
discovered=discovered,
|
||||
queued=queued,
|
||||
skipped_duplicates=dups,
|
||||
invalid_files=invalid,
|
||||
)
|
||||
Reference in New Issue
Block a user