Adds defence-in-depth shared-secret auth that activates when API_KEY is set. Behaviour: - empty API_KEY (dev default): every request allowed, middleware is not even installed; - non-empty API_KEY: every request under APP_API_PREFIX except /health must carry X-API-Key: <value> or Authorization: Bearer <value>. /, /docs, /redoc, /openapi.json and CORS preflight stay open. hmac.compare_digest is used for the constant-time comparison. The middleware resolves settings lazily so test fixtures can reload app.config and have the new API_KEY take effect on the next install. Tests (tests/test_api_security.py, 5 cases): - /health remains open; - protected route rejects missing key (401); - protected route accepts X-API-Key header; - protected route accepts Authorization: Bearer header; - protected route rejects a wrong key. Frontend: - VITE_API_KEY env reads the key and Axios injects it on every request, falling back to no header when empty so SSO/reverse-proxy deployments stay unchanged. - vite-env.d.ts adds the new env entry. Docs/ops: - .env.example documents the dev-default empty key; - .env.prod.example marks API_KEY as a required rotation point; - docker-compose.yml forwards API_KEY (defaults to empty); - docker-compose.prod.yml fails the stack with ?:required when API_KEY is missing; - RUNBOOK gains an API authentication section with header examples and the reverse-proxy + key layering recommendation. pytest -q: 33 passed (5 new security + 28 prior). npx tsc --noEmit: clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
121 lines
5.4 KiB
Python
121 lines
5.4 KiB
Python
"""Centralized typed configuration loaded from environment variables.
|
|
|
|
All other modules import :data:`settings` and never touch ``os.environ`` directly.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from functools import lru_cache
|
|
from typing import Literal
|
|
|
|
from pydantic import Field
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
model_config = SettingsConfigDict(
|
|
env_file=".env",
|
|
env_file_encoding="utf-8",
|
|
case_sensitive=False,
|
|
extra="ignore",
|
|
)
|
|
|
|
# ---------------- App ----------------
|
|
app_log_level: str = Field("INFO", alias="APP_LOG_LEVEL")
|
|
app_host: str = Field("0.0.0.0", alias="APP_HOST")
|
|
app_port: int = Field(8000, alias="APP_PORT")
|
|
app_input_dir: str = Field("/data/input", alias="APP_INPUT_DIR")
|
|
app_work_dir: str = Field("/data/work", alias="APP_WORK_DIR")
|
|
app_api_prefix: str = Field("/api/v1", alias="APP_API_PREFIX")
|
|
cors_allowed_origins: str = Field(
|
|
"http://localhost:5173,http://localhost:5273,http://localhost:4173",
|
|
alias="CORS_ALLOWED_ORIGINS",
|
|
)
|
|
api_key: str = Field("", alias="API_KEY")
|
|
|
|
@property
|
|
def cors_origins(self) -> list[str]:
|
|
return [o.strip() for o in self.cors_allowed_origins.split(",") if o.strip()]
|
|
|
|
# ---------------- Postgres ----------------
|
|
postgres_host: str = Field("postgres", alias="POSTGRES_HOST")
|
|
postgres_port: int = Field(5432, alias="POSTGRES_PORT")
|
|
postgres_db: str = Field("legacyhub", alias="POSTGRES_DB")
|
|
postgres_user: str = Field("legacyhub", alias="POSTGRES_USER")
|
|
postgres_password: str = Field("legacyhub", alias="POSTGRES_PASSWORD")
|
|
|
|
@property
|
|
def database_url(self) -> str:
|
|
return (
|
|
f"postgresql+psycopg://{self.postgres_user}:{self.postgres_password}"
|
|
f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}"
|
|
)
|
|
|
|
# ---------------- MinIO ----------------
|
|
minio_endpoint: str = Field("minio:9000", alias="MINIO_ENDPOINT")
|
|
minio_access_key: str = Field("legacyhub", alias="MINIO_ACCESS_KEY")
|
|
minio_secret_key: str = Field("legacyhub-secret", alias="MINIO_SECRET_KEY")
|
|
minio_bucket_originals: str = Field("legacyhub-originals", alias="MINIO_BUCKET_ORIGINALS")
|
|
minio_bucket_derived: str = Field("legacyhub-derived", alias="MINIO_BUCKET_DERIVED")
|
|
minio_secure: bool = Field(False, alias="MINIO_SECURE")
|
|
minio_region: str = Field("us-east-1", alias="MINIO_REGION")
|
|
|
|
# ---------------- OpenSearch ----------------
|
|
opensearch_host: str = Field("opensearch", alias="OPENSEARCH_HOST")
|
|
opensearch_port: int = Field(9200, alias="OPENSEARCH_PORT")
|
|
opensearch_use_ssl: bool = Field(False, alias="OPENSEARCH_USE_SSL")
|
|
opensearch_verify_certs: bool = Field(False, alias="OPENSEARCH_VERIFY_CERTS")
|
|
opensearch_user: str = Field("", alias="OPENSEARCH_USER")
|
|
opensearch_password: str = Field("", alias="OPENSEARCH_PASSWORD")
|
|
opensearch_index_chunks: str = Field("legacy_chunks", alias="OPENSEARCH_INDEX_CHUNKS")
|
|
|
|
# ---------------- Qdrant ----------------
|
|
qdrant_host: str = Field("qdrant", alias="QDRANT_HOST")
|
|
qdrant_port: int = Field(6333, alias="QDRANT_PORT")
|
|
qdrant_api_key: str = Field("", alias="QDRANT_API_KEY")
|
|
qdrant_collection_chunks: str = Field("legacy_chunks", alias="QDRANT_COLLECTION_CHUNKS")
|
|
|
|
# ---------------- Redis ----------------
|
|
redis_url: str = Field("redis://redis:6379/0", alias="REDIS_URL")
|
|
|
|
# ---------------- OCR ----------------
|
|
ocr_languages: str = Field("rus+eng", alias="OCR_LANGUAGES")
|
|
ocr_enabled: bool = Field(True, alias="OCR_ENABLED")
|
|
docling_ocr_enabled: bool = Field(False, alias="DOCLING_OCR_ENABLED")
|
|
max_document_timeout_seconds: int = Field(180, alias="MAX_DOCUMENT_TIMEOUT_SECONDS")
|
|
ocr_deskew: bool = Field(True, alias="OCR_DESKEW")
|
|
ocr_clean: bool = Field(True, alias="OCR_CLEAN")
|
|
ocr_optimize: int = Field(1, alias="OCR_OPTIMIZE")
|
|
|
|
# ---------------- Embeddings / Reranker ----------------
|
|
embedding_model: str = Field("BAAI/bge-m3", alias="EMBEDDING_MODEL")
|
|
embedding_dim: int = Field(1024, alias="EMBEDDING_DIM")
|
|
embedding_device: Literal["cpu", "cuda", "mps"] = Field("cpu", alias="EMBEDDING_DEVICE")
|
|
embedding_batch_size: int = Field(8, alias="EMBEDDING_BATCH_SIZE")
|
|
embedding_normalize: bool = Field(True, alias="EMBEDDING_NORMALIZE")
|
|
|
|
reranker_model: str = Field("BAAI/bge-reranker-v2-m3", alias="RERANKER_MODEL")
|
|
reranker_device: Literal["cpu", "cuda", "mps"] = Field("cpu", alias="RERANKER_DEVICE")
|
|
reranker_enabled: bool = Field(True, alias="RERANKER_ENABLED")
|
|
reranker_batch_size: int = Field(8, alias="RERANKER_BATCH_SIZE")
|
|
|
|
# ---------------- Chunking ----------------
|
|
chunk_target_tokens: int = Field(700, alias="CHUNK_TARGET_TOKENS")
|
|
chunk_min_tokens: int = Field(120, alias="CHUNK_MIN_TOKENS")
|
|
chunk_max_tokens: int = Field(900, alias="CHUNK_MAX_TOKENS")
|
|
chunk_overlap_tokens: int = Field(100, alias="CHUNK_OVERLAP_TOKENS")
|
|
|
|
# ---------------- Hybrid search ----------------
|
|
hybrid_opensearch_top_k: int = Field(50, alias="HYBRID_OPENSEARCH_TOP_K")
|
|
hybrid_qdrant_top_k: int = Field(50, alias="HYBRID_QDRANT_TOP_K")
|
|
hybrid_rrf_k: int = Field(60, alias="HYBRID_RRF_K")
|
|
rerank_candidates: int = Field(40, alias="RERANK_CANDIDATES")
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
def get_settings() -> Settings:
|
|
return Settings() # type: ignore[call-arg]
|
|
|
|
|
|
settings = get_settings()
|