"""Centralized typed configuration loaded from environment variables. All other modules import :data:`settings` and never touch ``os.environ`` directly. """ from __future__ import annotations from functools import lru_cache from typing import Literal from pydantic import Field from pydantic_settings import BaseSettings, SettingsConfigDict class Settings(BaseSettings): model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", case_sensitive=False, extra="ignore", ) # ---------------- App ---------------- app_log_level: str = Field("INFO", alias="APP_LOG_LEVEL") app_host: str = Field("0.0.0.0", alias="APP_HOST") app_port: int = Field(8000, alias="APP_PORT") app_input_dir: str = Field("/data/input", alias="APP_INPUT_DIR") app_work_dir: str = Field("/data/work", alias="APP_WORK_DIR") app_api_prefix: str = Field("/api/v1", alias="APP_API_PREFIX") # ---------------- Postgres ---------------- postgres_host: str = Field("postgres", alias="POSTGRES_HOST") postgres_port: int = Field(5432, alias="POSTGRES_PORT") postgres_db: str = Field("legacyhub", alias="POSTGRES_DB") postgres_user: str = Field("legacyhub", alias="POSTGRES_USER") postgres_password: str = Field("legacyhub", alias="POSTGRES_PASSWORD") @property def database_url(self) -> str: return ( f"postgresql+psycopg://{self.postgres_user}:{self.postgres_password}" f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}" ) # ---------------- MinIO ---------------- minio_endpoint: str = Field("minio:9000", alias="MINIO_ENDPOINT") minio_access_key: str = Field("legacyhub", alias="MINIO_ACCESS_KEY") minio_secret_key: str = Field("legacyhub-secret", alias="MINIO_SECRET_KEY") minio_bucket_originals: str = Field("legacyhub-originals", alias="MINIO_BUCKET_ORIGINALS") minio_bucket_derived: str = Field("legacyhub-derived", alias="MINIO_BUCKET_DERIVED") minio_secure: bool = Field(False, alias="MINIO_SECURE") minio_region: str = Field("us-east-1", alias="MINIO_REGION") # ---------------- OpenSearch ---------------- opensearch_host: str = Field("opensearch", alias="OPENSEARCH_HOST") opensearch_port: int = Field(9200, alias="OPENSEARCH_PORT") opensearch_use_ssl: bool = Field(False, alias="OPENSEARCH_USE_SSL") opensearch_verify_certs: bool = Field(False, alias="OPENSEARCH_VERIFY_CERTS") opensearch_user: str = Field("", alias="OPENSEARCH_USER") opensearch_password: str = Field("", alias="OPENSEARCH_PASSWORD") opensearch_index_chunks: str = Field("legacy_chunks", alias="OPENSEARCH_INDEX_CHUNKS") # ---------------- Qdrant ---------------- qdrant_host: str = Field("qdrant", alias="QDRANT_HOST") qdrant_port: int = Field(6333, alias="QDRANT_PORT") qdrant_api_key: str = Field("", alias="QDRANT_API_KEY") qdrant_collection_chunks: str = Field("legacy_chunks", alias="QDRANT_COLLECTION_CHUNKS") # ---------------- Redis ---------------- redis_url: str = Field("redis://redis:6379/0", alias="REDIS_URL") # ---------------- OCR ---------------- ocr_languages: str = Field("rus+eng", alias="OCR_LANGUAGES") ocr_enabled: bool = Field(True, alias="OCR_ENABLED") docling_ocr_enabled: bool = Field(False, alias="DOCLING_OCR_ENABLED") max_document_timeout_seconds: int = Field(180, alias="MAX_DOCUMENT_TIMEOUT_SECONDS") ocr_deskew: bool = Field(True, alias="OCR_DESKEW") ocr_clean: bool = Field(True, alias="OCR_CLEAN") ocr_optimize: int = Field(1, alias="OCR_OPTIMIZE") # ---------------- Embeddings / Reranker ---------------- embedding_model: str = Field("BAAI/bge-m3", alias="EMBEDDING_MODEL") embedding_dim: int = Field(1024, alias="EMBEDDING_DIM") embedding_device: Literal["cpu", "cuda", "mps"] = Field("cpu", alias="EMBEDDING_DEVICE") embedding_batch_size: int = Field(8, alias="EMBEDDING_BATCH_SIZE") embedding_normalize: bool = Field(True, alias="EMBEDDING_NORMALIZE") reranker_model: str = Field("BAAI/bge-reranker-v2-m3", alias="RERANKER_MODEL") reranker_device: Literal["cpu", "cuda", "mps"] = Field("cpu", alias="RERANKER_DEVICE") reranker_enabled: bool = Field(True, alias="RERANKER_ENABLED") reranker_batch_size: int = Field(8, alias="RERANKER_BATCH_SIZE") # ---------------- Chunking ---------------- chunk_target_tokens: int = Field(700, alias="CHUNK_TARGET_TOKENS") chunk_min_tokens: int = Field(120, alias="CHUNK_MIN_TOKENS") chunk_max_tokens: int = Field(900, alias="CHUNK_MAX_TOKENS") chunk_overlap_tokens: int = Field(100, alias="CHUNK_OVERLAP_TOKENS") # ---------------- Hybrid search ---------------- hybrid_opensearch_top_k: int = Field(50, alias="HYBRID_OPENSEARCH_TOP_K") hybrid_qdrant_top_k: int = Field(50, alias="HYBRID_QDRANT_TOP_K") hybrid_rrf_k: int = Field(60, alias="HYBRID_RRF_K") rerank_candidates: int = Field(40, alias="RERANK_CANDIDATES") @lru_cache(maxsize=1) def get_settings() -> Settings: return Settings() # type: ignore[call-arg] settings = get_settings()