"""Synchronous CLI ingestion: discover -> queue -> process inline. Use ``--async`` to push tasks to Celery instead of running inline (default inline mode is convenient for ad-hoc runs without a worker container). """ from __future__ import annotations import sys import uuid from pathlib import Path import click from app.ingestion.scanner import discover_documents from app.logging_config import configure_logging, get_logger configure_logging() logger = get_logger(__name__) @click.command() @click.option("--path", required=True, type=click.Path(exists=True, file_okay=True, dir_okay=True, path_type=Path)) @click.option("--recursive/--no-recursive", default=True) @click.option("--force", is_flag=True, default=False, help="Re-process even if SHA already exists") @click.option("--mode", type=click.Choice(["inline", "celery"]), default="inline") def main(path: Path, recursive: bool, force: bool, mode: str) -> None: run_id = uuid.uuid4() discovered = queued = dups = invalid = 0 for record in discover_documents(path, recursive=recursive, force=force): discovered += 1 if record.duplicate and not force: dups += 1 continue if not record.document_id: invalid += 1 continue if mode == "celery": from app.workers.tasks import process_document process_document.delay(str(record.document_id), str(run_id)) else: from app.ingestion.pipeline import process_document_id try: result = process_document_id(record.document_id, run_id) logger.info("ingest.cli.processed", path=str(record.path), result=result) except Exception as exc: # noqa: BLE001 logger.exception("ingest.cli.failed", path=str(record.path), error=str(exc)) invalid += 1 continue queued += 1 click.echo( f"discovered={discovered} queued={queued} duplicates={dups} invalid={invalid} run={run_id}" ) if __name__ == "__main__": sys.exit(main(standalone_mode=True) or 0)