chore: bootstrap repository with governance docs

Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line
endings), AGENTS.md (entry points, stack, discovery order, baseline
checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion,
failures, rollback, scaling notes), .env.prod.example with rotated
credential placeholders, and dev-only warnings on .env.example.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Vadim Malanov
2026-05-13 16:41:50 +03:00
commit 7f72171572
157 changed files with 11298 additions and 0 deletions

186
docker-compose.yml Normal file
View File

@@ -0,0 +1,186 @@
name: legacyhub
x-common-env: &common-env
POSTGRES_HOST: ${POSTGRES_HOST:-postgres}
POSTGRES_PORT: ${POSTGRES_PORT:-5432}
POSTGRES_DB: ${POSTGRES_DB:-legacyhub}
POSTGRES_USER: ${POSTGRES_USER:-legacyhub}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-legacyhub}
MINIO_ENDPOINT: ${MINIO_ENDPOINT:-minio:9000}
MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-legacyhub}
MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-legacyhub-secret}
MINIO_BUCKET_ORIGINALS: ${MINIO_BUCKET_ORIGINALS:-legacyhub-originals}
MINIO_BUCKET_DERIVED: ${MINIO_BUCKET_DERIVED:-legacyhub-derived}
MINIO_SECURE: ${MINIO_SECURE:-false}
OPENSEARCH_HOST: ${OPENSEARCH_HOST:-opensearch}
OPENSEARCH_PORT: ${OPENSEARCH_PORT:-9200}
OPENSEARCH_USE_SSL: ${OPENSEARCH_USE_SSL:-false}
OPENSEARCH_VERIFY_CERTS: ${OPENSEARCH_VERIFY_CERTS:-false}
OPENSEARCH_INDEX_CHUNKS: ${OPENSEARCH_INDEX_CHUNKS:-legacy_chunks}
QDRANT_HOST: ${QDRANT_HOST:-qdrant}
QDRANT_PORT: ${QDRANT_PORT:-6333}
QDRANT_COLLECTION_CHUNKS: ${QDRANT_COLLECTION_CHUNKS:-legacy_chunks}
REDIS_URL: ${REDIS_URL:-redis://redis:6379/0}
OCR_LANGUAGES: ${OCR_LANGUAGES:-rus+eng}
OCR_ENABLED: ${OCR_ENABLED:-true}
DOCLING_OCR_ENABLED: ${DOCLING_OCR_ENABLED:-false}
MAX_DOCUMENT_TIMEOUT_SECONDS: ${MAX_DOCUMENT_TIMEOUT_SECONDS:-180}
EMBEDDING_MODEL: ${EMBEDDING_MODEL:-BAAI/bge-m3}
EMBEDDING_DEVICE: ${EMBEDDING_DEVICE:-cpu}
RERANKER_MODEL: ${RERANKER_MODEL:-BAAI/bge-reranker-v2-m3}
RERANKER_DEVICE: ${RERANKER_DEVICE:-cpu}
APP_LOG_LEVEL: ${APP_LOG_LEVEL:-INFO}
APP_INPUT_DIR: /data/input
APP_WORK_DIR: /data/work
services:
postgres:
image: postgres:16-alpine
restart: unless-stopped
environment:
POSTGRES_DB: ${POSTGRES_DB:-legacyhub}
POSTGRES_USER: ${POSTGRES_USER:-legacyhub}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-legacyhub}
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-legacyhub} -d ${POSTGRES_DB:-legacyhub}"]
interval: 10s
timeout: 5s
retries: 10
minio:
image: minio/minio:RELEASE.2024-08-29T01-40-52Z
restart: unless-stopped
command: server /data --console-address ":9001"
environment:
MINIO_ROOT_USER: ${MINIO_ACCESS_KEY:-legacyhub}
MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY:-legacyhub-secret}
ports:
- "9000:9000"
- "9001:9001"
volumes:
- minio_data:/data
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 10s
timeout: 5s
retries: 10
opensearch:
image: opensearchproject/opensearch:2.15.0
restart: unless-stopped
environment:
- discovery.type=single-node
- bootstrap.memory_lock=true
- "OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g"
- DISABLE_SECURITY_PLUGIN=true
- DISABLE_INSTALL_DEMO_CONFIG=true
ulimits:
memlock:
soft: -1
hard: -1
nofile:
soft: 65536
hard: 65536
ports:
- "9200:9200"
- "9600:9600"
volumes:
- opensearch_data:/usr/share/opensearch/data
healthcheck:
test: ["CMD-SHELL", "curl -fsS http://localhost:9200/_cluster/health | grep -q '\"status\":\"\\(green\\|yellow\\)\"'"]
interval: 15s
timeout: 10s
retries: 20
qdrant:
image: qdrant/qdrant:v1.11.3
restart: unless-stopped
ports:
- "6333:6333"
- "6334:6334"
volumes:
- qdrant_data:/qdrant/storage
healthcheck:
test: ["CMD-SHELL", "bash -c '</dev/tcp/127.0.0.1/6333'"]
interval: 15s
timeout: 5s
retries: 10
redis:
image: redis:7-alpine
restart: unless-stopped
ports:
- "6379:6379"
volumes:
- redis_data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 10
api:
build:
context: .
dockerfile: docker/Dockerfile
image: legacyhub/api:latest
restart: unless-stopped
environment:
<<: *common-env
APP_HOST: 0.0.0.0
APP_PORT: 8000
command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
ports:
- "8000:8000"
depends_on:
postgres:
condition: service_healthy
minio:
condition: service_healthy
opensearch:
condition: service_healthy
qdrant:
condition: service_healthy
redis:
condition: service_healthy
volumes:
- ./data/input:/data/input
- ./data/work:/data/work
- hf_cache:/root/.cache/huggingface
worker:
build:
context: .
dockerfile: docker/Dockerfile
image: legacyhub/api:latest
restart: unless-stopped
environment:
<<: *common-env
command: ["celery", "-A", "app.workers.celery_app", "worker", "--loglevel=INFO", "--concurrency=2"]
depends_on:
postgres:
condition: service_healthy
minio:
condition: service_healthy
opensearch:
condition: service_healthy
qdrant:
condition: service_healthy
redis:
condition: service_healthy
volumes:
- ./data/input:/data/input
- ./data/work:/data/work
- hf_cache:/root/.cache/huggingface
volumes:
postgres_data:
minio_data:
opensearch_data:
qdrant_data:
redis_data:
hf_cache: