chore: bootstrap repository with governance docs
Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line endings), AGENTS.md (entry points, stack, discovery order, baseline checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion, failures, rollback, scaling notes), .env.prod.example with rotated credential placeholders, and dev-only warnings on .env.example. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
0
scripts/__init__.py
Normal file
0
scripts/__init__.py
Normal file
60
scripts/ingest_folder.py
Normal file
60
scripts/ingest_folder.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""Synchronous CLI ingestion: discover -> queue -> process inline.
|
||||
|
||||
Use ``--async`` to push tasks to Celery instead of running inline (default
|
||||
inline mode is convenient for ad-hoc runs without a worker container).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
|
||||
from app.ingestion.scanner import discover_documents
|
||||
from app.logging_config import configure_logging, get_logger
|
||||
|
||||
configure_logging()
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--path", required=True, type=click.Path(exists=True, file_okay=True, dir_okay=True, path_type=Path))
|
||||
@click.option("--recursive/--no-recursive", default=True)
|
||||
@click.option("--force", is_flag=True, default=False, help="Re-process even if SHA already exists")
|
||||
@click.option("--mode", type=click.Choice(["inline", "celery"]), default="inline")
|
||||
def main(path: Path, recursive: bool, force: bool, mode: str) -> None:
|
||||
run_id = uuid.uuid4()
|
||||
discovered = queued = dups = invalid = 0
|
||||
|
||||
for record in discover_documents(path, recursive=recursive, force=force):
|
||||
discovered += 1
|
||||
if record.duplicate and not force:
|
||||
dups += 1
|
||||
continue
|
||||
if not record.document_id:
|
||||
invalid += 1
|
||||
continue
|
||||
|
||||
if mode == "celery":
|
||||
from app.workers.tasks import process_document
|
||||
process_document.delay(str(record.document_id), str(run_id))
|
||||
else:
|
||||
from app.ingestion.pipeline import process_document_id
|
||||
try:
|
||||
result = process_document_id(record.document_id, run_id)
|
||||
logger.info("ingest.cli.processed", path=str(record.path), result=result)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.exception("ingest.cli.failed", path=str(record.path), error=str(exc))
|
||||
invalid += 1
|
||||
continue
|
||||
queued += 1
|
||||
|
||||
click.echo(
|
||||
f"discovered={discovered} queued={queued} duplicates={dups} invalid={invalid} run={run_id}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(standalone_mode=True) or 0)
|
||||
25
scripts/init_db.py
Normal file
25
scripts/init_db.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""Apply Alembic migrations against the configured Postgres."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from alembic import command
|
||||
from alembic.config import Config
|
||||
|
||||
from app.config import settings
|
||||
|
||||
|
||||
def main() -> int:
|
||||
root = Path(__file__).resolve().parents[1]
|
||||
cfg = Config(str(root / "alembic.ini"))
|
||||
cfg.set_main_option("script_location", str(root / "app" / "db" / "migrations"))
|
||||
cfg.set_main_option("sqlalchemy.url", settings.database_url)
|
||||
command.upgrade(cfg, "head")
|
||||
print("alembic upgrade head: ok")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
17
scripts/init_opensearch.py
Normal file
17
scripts/init_opensearch.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""Bootstrap the OpenSearch chunk index."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
from app.indexing.opensearch_client import ensure_index
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ensure_index()
|
||||
print("opensearch index ensured")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
17
scripts/init_qdrant.py
Normal file
17
scripts/init_qdrant.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""Bootstrap the Qdrant chunk collection."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
from app.indexing.qdrant_client import ensure_collection
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ensure_collection()
|
||||
print("qdrant collection ensured")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
24
scripts/reindex_document.py
Normal file
24
scripts/reindex_document.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""Re-run the pipeline for a single document by ID."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import uuid
|
||||
|
||||
import click
|
||||
|
||||
from app.ingestion.pipeline import process_document_id
|
||||
from app.logging_config import configure_logging
|
||||
|
||||
configure_logging()
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--document-id", required=True, type=str)
|
||||
def main(document_id: str) -> None:
|
||||
result = process_document_id(uuid.UUID(document_id))
|
||||
click.echo(result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(standalone_mode=True) or 0)
|
||||
74
scripts/smoke_test.py
Normal file
74
scripts/smoke_test.py
Normal file
@@ -0,0 +1,74 @@
|
||||
"""Smoke test - verify all infrastructure is reachable and indices are present.
|
||||
|
||||
Exits non-zero on first hard error.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from app.db.session import get_engine
|
||||
from app.indexing.opensearch_client import ensure_index, get_opensearch
|
||||
from app.indexing.qdrant_client import ensure_collection, get_qdrant
|
||||
from app.logging_config import configure_logging, get_logger
|
||||
from app.storage.minio_client import get_storage
|
||||
|
||||
configure_logging()
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
failures: list[str] = []
|
||||
|
||||
# Postgres
|
||||
try:
|
||||
with get_engine().connect() as conn:
|
||||
conn.execute(text("SELECT 1"))
|
||||
print("[ok] postgres")
|
||||
except Exception as exc: # noqa: BLE001
|
||||
failures.append(f"postgres: {exc}")
|
||||
print(f"[err] postgres: {exc}")
|
||||
|
||||
# MinIO
|
||||
try:
|
||||
s = get_storage()
|
||||
s.ensure_buckets()
|
||||
info = s.health()
|
||||
if info.get("status") != "ok":
|
||||
raise RuntimeError(info)
|
||||
print("[ok] minio:", info.get("buckets"))
|
||||
except Exception as exc: # noqa: BLE001
|
||||
failures.append(f"minio: {exc}")
|
||||
print(f"[err] minio: {exc}")
|
||||
|
||||
# OpenSearch
|
||||
try:
|
||||
ensure_index()
|
||||
info = get_opensearch().cluster.health()
|
||||
print("[ok] opensearch:", info.get("status"))
|
||||
except Exception as exc: # noqa: BLE001
|
||||
failures.append(f"opensearch: {exc}")
|
||||
print(f"[err] opensearch: {exc}")
|
||||
|
||||
# Qdrant
|
||||
try:
|
||||
ensure_collection()
|
||||
cols = [c.name for c in get_qdrant().get_collections().collections]
|
||||
print("[ok] qdrant collections:", cols)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
failures.append(f"qdrant: {exc}")
|
||||
print(f"[err] qdrant: {exc}")
|
||||
|
||||
if failures:
|
||||
print("\nSMOKE FAIL:")
|
||||
for f in failures:
|
||||
print(" -", f)
|
||||
return 1
|
||||
print("\nSMOKE OK")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user