chore: bootstrap repository with governance docs

Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line
endings), AGENTS.md (entry points, stack, discovery order, baseline
checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion,
failures, rollback, scaling notes), .env.prod.example with rotated
credential placeholders, and dev-only warnings on .env.example.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Vadim Malanov
2026-05-13 16:41:50 +03:00
commit 7f72171572
157 changed files with 11298 additions and 0 deletions

82
.env.example Normal file
View File

@@ -0,0 +1,82 @@
# ---- DEVELOPMENT TEMPLATE ----
# Copy to .env. Values below are intentionally weak defaults for local Docker
# Compose. NEVER use them in production — see .env.prod.example.
# ==== PostgreSQL ====
POSTGRES_HOST=postgres
POSTGRES_PORT=5432
POSTGRES_DB=legacyhub
POSTGRES_USER=legacyhub
POSTGRES_PASSWORD=legacyhub
# ==== MinIO ====
MINIO_ENDPOINT=minio:9000
MINIO_ACCESS_KEY=legacyhub
MINIO_SECRET_KEY=legacyhub-secret
MINIO_BUCKET_ORIGINALS=legacyhub-originals
MINIO_BUCKET_DERIVED=legacyhub-derived
MINIO_SECURE=false
MINIO_REGION=us-east-1
# ==== OpenSearch ====
OPENSEARCH_HOST=opensearch
OPENSEARCH_PORT=9200
OPENSEARCH_USE_SSL=false
OPENSEARCH_VERIFY_CERTS=false
OPENSEARCH_USER=
OPENSEARCH_PASSWORD=
OPENSEARCH_INDEX_CHUNKS=legacy_chunks
# ==== Qdrant ====
QDRANT_HOST=qdrant
QDRANT_PORT=6333
QDRANT_API_KEY=
QDRANT_COLLECTION_CHUNKS=legacy_chunks
# ==== Redis ====
REDIS_URL=redis://redis:6379/0
# ==== OCR ====
OCR_LANGUAGES=rus+eng
OCR_ENABLED=true
DOCLING_OCR_ENABLED=false
MAX_DOCUMENT_TIMEOUT_SECONDS=180
OCR_DESKEW=true
OCR_CLEAN=true
OCR_OPTIMIZE=1
# ==== Embeddings / Reranker ====
EMBEDDING_MODEL=BAAI/bge-m3
EMBEDDING_DIM=1024
EMBEDDING_DEVICE=cpu
EMBEDDING_BATCH_SIZE=8
EMBEDDING_NORMALIZE=true
RERANKER_MODEL=BAAI/bge-reranker-v2-m3
RERANKER_DEVICE=cpu
RERANKER_ENABLED=true
RERANKER_BATCH_SIZE=8
# ==== Chunking ====
CHUNK_TARGET_TOKENS=700
CHUNK_MIN_TOKENS=120
CHUNK_MAX_TOKENS=900
CHUNK_OVERLAP_TOKENS=100
# ==== Search ====
HYBRID_OPENSEARCH_TOP_K=50
HYBRID_QDRANT_TOP_K=50
HYBRID_RRF_K=60
RERANK_CANDIDATES=40
# ==== App ====
APP_LOG_LEVEL=INFO
APP_HOST=0.0.0.0
APP_PORT=8000
APP_INPUT_DIR=/data/input
APP_WORK_DIR=/data/work
APP_API_PREFIX=/api/v1
# Comma-separated list of allowed origins for the browser. Use specific origins
# in production; * is accepted only for local development.
CORS_ALLOWED_ORIGINS=http://localhost:5173,http://localhost:5273,http://localhost:4173