chore: bootstrap repository with governance docs

Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line
endings), AGENTS.md (entry points, stack, discovery order, baseline
checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion,
failures, rollback, scaling notes), .env.prod.example with rotated
credential placeholders, and dev-only warnings on .env.example.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Vadim Malanov
2026-05-13 16:41:50 +03:00
commit 7f72171572
157 changed files with 11298 additions and 0 deletions

60
scripts/ingest_folder.py Normal file
View File

@@ -0,0 +1,60 @@
"""Synchronous CLI ingestion: discover -> queue -> process inline.
Use ``--async`` to push tasks to Celery instead of running inline (default
inline mode is convenient for ad-hoc runs without a worker container).
"""
from __future__ import annotations
import sys
import uuid
from pathlib import Path
import click
from app.ingestion.scanner import discover_documents
from app.logging_config import configure_logging, get_logger
configure_logging()
logger = get_logger(__name__)
@click.command()
@click.option("--path", required=True, type=click.Path(exists=True, file_okay=True, dir_okay=True, path_type=Path))
@click.option("--recursive/--no-recursive", default=True)
@click.option("--force", is_flag=True, default=False, help="Re-process even if SHA already exists")
@click.option("--mode", type=click.Choice(["inline", "celery"]), default="inline")
def main(path: Path, recursive: bool, force: bool, mode: str) -> None:
run_id = uuid.uuid4()
discovered = queued = dups = invalid = 0
for record in discover_documents(path, recursive=recursive, force=force):
discovered += 1
if record.duplicate and not force:
dups += 1
continue
if not record.document_id:
invalid += 1
continue
if mode == "celery":
from app.workers.tasks import process_document
process_document.delay(str(record.document_id), str(run_id))
else:
from app.ingestion.pipeline import process_document_id
try:
result = process_document_id(record.document_id, run_id)
logger.info("ingest.cli.processed", path=str(record.path), result=result)
except Exception as exc: # noqa: BLE001
logger.exception("ingest.cli.failed", path=str(record.path), error=str(exc))
invalid += 1
continue
queued += 1
click.echo(
f"discovered={discovered} queued={queued} duplicates={dups} invalid={invalid} run={run_id}"
)
if __name__ == "__main__":
sys.exit(main(standalone_mode=True) or 0)