chore: bootstrap repository with governance docs
Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line endings), AGENTS.md (entry points, stack, discovery order, baseline checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion, failures, rollback, scaling notes), .env.prod.example with rotated credential placeholders, and dev-only warnings on .env.example. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
111
app/config.py
Normal file
111
app/config.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""Centralized typed configuration loaded from environment variables.
|
||||
|
||||
All other modules import :data:`settings` and never touch ``os.environ`` directly.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import lru_cache
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(
|
||||
env_file=".env",
|
||||
env_file_encoding="utf-8",
|
||||
case_sensitive=False,
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
# ---------------- App ----------------
|
||||
app_log_level: str = Field("INFO", alias="APP_LOG_LEVEL")
|
||||
app_host: str = Field("0.0.0.0", alias="APP_HOST")
|
||||
app_port: int = Field(8000, alias="APP_PORT")
|
||||
app_input_dir: str = Field("/data/input", alias="APP_INPUT_DIR")
|
||||
app_work_dir: str = Field("/data/work", alias="APP_WORK_DIR")
|
||||
app_api_prefix: str = Field("/api/v1", alias="APP_API_PREFIX")
|
||||
|
||||
# ---------------- Postgres ----------------
|
||||
postgres_host: str = Field("postgres", alias="POSTGRES_HOST")
|
||||
postgres_port: int = Field(5432, alias="POSTGRES_PORT")
|
||||
postgres_db: str = Field("legacyhub", alias="POSTGRES_DB")
|
||||
postgres_user: str = Field("legacyhub", alias="POSTGRES_USER")
|
||||
postgres_password: str = Field("legacyhub", alias="POSTGRES_PASSWORD")
|
||||
|
||||
@property
|
||||
def database_url(self) -> str:
|
||||
return (
|
||||
f"postgresql+psycopg://{self.postgres_user}:{self.postgres_password}"
|
||||
f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}"
|
||||
)
|
||||
|
||||
# ---------------- MinIO ----------------
|
||||
minio_endpoint: str = Field("minio:9000", alias="MINIO_ENDPOINT")
|
||||
minio_access_key: str = Field("legacyhub", alias="MINIO_ACCESS_KEY")
|
||||
minio_secret_key: str = Field("legacyhub-secret", alias="MINIO_SECRET_KEY")
|
||||
minio_bucket_originals: str = Field("legacyhub-originals", alias="MINIO_BUCKET_ORIGINALS")
|
||||
minio_bucket_derived: str = Field("legacyhub-derived", alias="MINIO_BUCKET_DERIVED")
|
||||
minio_secure: bool = Field(False, alias="MINIO_SECURE")
|
||||
minio_region: str = Field("us-east-1", alias="MINIO_REGION")
|
||||
|
||||
# ---------------- OpenSearch ----------------
|
||||
opensearch_host: str = Field("opensearch", alias="OPENSEARCH_HOST")
|
||||
opensearch_port: int = Field(9200, alias="OPENSEARCH_PORT")
|
||||
opensearch_use_ssl: bool = Field(False, alias="OPENSEARCH_USE_SSL")
|
||||
opensearch_verify_certs: bool = Field(False, alias="OPENSEARCH_VERIFY_CERTS")
|
||||
opensearch_user: str = Field("", alias="OPENSEARCH_USER")
|
||||
opensearch_password: str = Field("", alias="OPENSEARCH_PASSWORD")
|
||||
opensearch_index_chunks: str = Field("legacy_chunks", alias="OPENSEARCH_INDEX_CHUNKS")
|
||||
|
||||
# ---------------- Qdrant ----------------
|
||||
qdrant_host: str = Field("qdrant", alias="QDRANT_HOST")
|
||||
qdrant_port: int = Field(6333, alias="QDRANT_PORT")
|
||||
qdrant_api_key: str = Field("", alias="QDRANT_API_KEY")
|
||||
qdrant_collection_chunks: str = Field("legacy_chunks", alias="QDRANT_COLLECTION_CHUNKS")
|
||||
|
||||
# ---------------- Redis ----------------
|
||||
redis_url: str = Field("redis://redis:6379/0", alias="REDIS_URL")
|
||||
|
||||
# ---------------- OCR ----------------
|
||||
ocr_languages: str = Field("rus+eng", alias="OCR_LANGUAGES")
|
||||
ocr_enabled: bool = Field(True, alias="OCR_ENABLED")
|
||||
docling_ocr_enabled: bool = Field(False, alias="DOCLING_OCR_ENABLED")
|
||||
max_document_timeout_seconds: int = Field(180, alias="MAX_DOCUMENT_TIMEOUT_SECONDS")
|
||||
ocr_deskew: bool = Field(True, alias="OCR_DESKEW")
|
||||
ocr_clean: bool = Field(True, alias="OCR_CLEAN")
|
||||
ocr_optimize: int = Field(1, alias="OCR_OPTIMIZE")
|
||||
|
||||
# ---------------- Embeddings / Reranker ----------------
|
||||
embedding_model: str = Field("BAAI/bge-m3", alias="EMBEDDING_MODEL")
|
||||
embedding_dim: int = Field(1024, alias="EMBEDDING_DIM")
|
||||
embedding_device: Literal["cpu", "cuda", "mps"] = Field("cpu", alias="EMBEDDING_DEVICE")
|
||||
embedding_batch_size: int = Field(8, alias="EMBEDDING_BATCH_SIZE")
|
||||
embedding_normalize: bool = Field(True, alias="EMBEDDING_NORMALIZE")
|
||||
|
||||
reranker_model: str = Field("BAAI/bge-reranker-v2-m3", alias="RERANKER_MODEL")
|
||||
reranker_device: Literal["cpu", "cuda", "mps"] = Field("cpu", alias="RERANKER_DEVICE")
|
||||
reranker_enabled: bool = Field(True, alias="RERANKER_ENABLED")
|
||||
reranker_batch_size: int = Field(8, alias="RERANKER_BATCH_SIZE")
|
||||
|
||||
# ---------------- Chunking ----------------
|
||||
chunk_target_tokens: int = Field(700, alias="CHUNK_TARGET_TOKENS")
|
||||
chunk_min_tokens: int = Field(120, alias="CHUNK_MIN_TOKENS")
|
||||
chunk_max_tokens: int = Field(900, alias="CHUNK_MAX_TOKENS")
|
||||
chunk_overlap_tokens: int = Field(100, alias="CHUNK_OVERLAP_TOKENS")
|
||||
|
||||
# ---------------- Hybrid search ----------------
|
||||
hybrid_opensearch_top_k: int = Field(50, alias="HYBRID_OPENSEARCH_TOP_K")
|
||||
hybrid_qdrant_top_k: int = Field(50, alias="HYBRID_QDRANT_TOP_K")
|
||||
hybrid_rrf_k: int = Field(60, alias="HYBRID_RRF_K")
|
||||
rerank_candidates: int = Field(40, alias="RERANK_CANDIDATES")
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_settings() -> Settings:
|
||||
return Settings() # type: ignore[call-arg]
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
Reference in New Issue
Block a user