chore: bootstrap repository with governance docs
Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line endings), AGENTS.md (entry points, stack, discovery order, baseline checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion, failures, rollback, scaling notes), .env.prod.example with rotated credential placeholders, and dev-only warnings on .env.example. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
60
scripts/ingest_folder.py
Normal file
60
scripts/ingest_folder.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""Synchronous CLI ingestion: discover -> queue -> process inline.
|
||||
|
||||
Use ``--async`` to push tasks to Celery instead of running inline (default
|
||||
inline mode is convenient for ad-hoc runs without a worker container).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
|
||||
from app.ingestion.scanner import discover_documents
|
||||
from app.logging_config import configure_logging, get_logger
|
||||
|
||||
configure_logging()
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--path", required=True, type=click.Path(exists=True, file_okay=True, dir_okay=True, path_type=Path))
|
||||
@click.option("--recursive/--no-recursive", default=True)
|
||||
@click.option("--force", is_flag=True, default=False, help="Re-process even if SHA already exists")
|
||||
@click.option("--mode", type=click.Choice(["inline", "celery"]), default="inline")
|
||||
def main(path: Path, recursive: bool, force: bool, mode: str) -> None:
|
||||
run_id = uuid.uuid4()
|
||||
discovered = queued = dups = invalid = 0
|
||||
|
||||
for record in discover_documents(path, recursive=recursive, force=force):
|
||||
discovered += 1
|
||||
if record.duplicate and not force:
|
||||
dups += 1
|
||||
continue
|
||||
if not record.document_id:
|
||||
invalid += 1
|
||||
continue
|
||||
|
||||
if mode == "celery":
|
||||
from app.workers.tasks import process_document
|
||||
process_document.delay(str(record.document_id), str(run_id))
|
||||
else:
|
||||
from app.ingestion.pipeline import process_document_id
|
||||
try:
|
||||
result = process_document_id(record.document_id, run_id)
|
||||
logger.info("ingest.cli.processed", path=str(record.path), result=result)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.exception("ingest.cli.failed", path=str(record.path), error=str(exc))
|
||||
invalid += 1
|
||||
continue
|
||||
queued += 1
|
||||
|
||||
click.echo(
|
||||
f"discovered={discovered} queued={queued} duplicates={dups} invalid={invalid} run={run_id}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(standalone_mode=True) or 0)
|
||||
Reference in New Issue
Block a user