chore: bootstrap repository with governance docs

Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line
endings), AGENTS.md (entry points, stack, discovery order, baseline
checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion,
failures, rollback, scaling notes), .env.prod.example with rotated
credential placeholders, and dev-only warnings on .env.example.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Vadim Malanov
2026-05-13 16:41:50 +03:00
commit 7f72171572
157 changed files with 11298 additions and 0 deletions

View File

@@ -0,0 +1,63 @@
"""Ingestion endpoints."""
from __future__ import annotations
import uuid
from pathlib import Path
from fastapi import APIRouter, HTTPException
from app.api.schemas import IngestFolderRequest, IngestFolderResponse
from app.logging_config import get_logger
logger = get_logger(__name__)
router = APIRouter(prefix="/ingest", tags=["ingestion"])
@router.post("/folder", response_model=IngestFolderResponse)
def ingest_folder(req: IngestFolderRequest) -> IngestFolderResponse:
"""Discover all PDFs under ``path`` and queue them for processing.
The request returns immediately after the discovery pass. Per-document
OCR / extraction / indexing happens asynchronously in Celery workers.
"""
folder = Path(req.path)
if not folder.exists() or not folder.is_dir():
raise HTTPException(status_code=400, detail=f"Folder not found: {req.path}")
# Lazy import - keeps module load light.
from app.ingestion.scanner import discover_documents
from app.workers.tasks import process_document
run_id = uuid.uuid4()
discovered, queued, dups, invalid = 0, 0, 0, 0
for record in discover_documents(folder, recursive=req.recursive, force=req.force):
discovered += 1
if record.duplicate and not req.force:
dups += 1
continue
if not record.document_id:
invalid += 1
continue
process_document.delay(str(record.document_id), str(run_id))
queued += 1
logger.info(
"ingest.folder.queued",
path=str(folder),
discovered=discovered,
queued=queued,
skipped_duplicates=dups,
invalid=invalid,
run_id=str(run_id),
)
return IngestFolderResponse(
run_id=run_id,
discovered=discovered,
queued=queued,
skipped_duplicates=dups,
invalid_files=invalid,
)