Files
LegacyHUB/app/config.py
Vadim Malanov 24282d1279 feat(api): optional API-key auth middleware
Adds defence-in-depth shared-secret auth that activates when API_KEY
is set. Behaviour:

- empty API_KEY (dev default): every request allowed, middleware is
  not even installed;
- non-empty API_KEY: every request under APP_API_PREFIX except
  /health must carry X-API-Key: <value> or
  Authorization: Bearer <value>. /, /docs, /redoc, /openapi.json and
  CORS preflight stay open. hmac.compare_digest is used for the
  constant-time comparison.

The middleware resolves settings lazily so test fixtures can reload
app.config and have the new API_KEY take effect on the next install.

Tests (tests/test_api_security.py, 5 cases):
- /health remains open;
- protected route rejects missing key (401);
- protected route accepts X-API-Key header;
- protected route accepts Authorization: Bearer header;
- protected route rejects a wrong key.

Frontend:
- VITE_API_KEY env reads the key and Axios injects it on every
  request, falling back to no header when empty so SSO/reverse-proxy
  deployments stay unchanged.
- vite-env.d.ts adds the new env entry.

Docs/ops:
- .env.example documents the dev-default empty key;
- .env.prod.example marks API_KEY as a required rotation point;
- docker-compose.yml forwards API_KEY (defaults to empty);
- docker-compose.prod.yml fails the stack with ?:required when API_KEY
  is missing;
- RUNBOOK gains an API authentication section with header examples
  and the reverse-proxy + key layering recommendation.

pytest -q: 33 passed (5 new security + 28 prior).
npx tsc --noEmit: clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 17:17:27 +03:00

121 lines
5.4 KiB
Python

"""Centralized typed configuration loaded from environment variables.
All other modules import :data:`settings` and never touch ``os.environ`` directly.
"""
from __future__ import annotations
from functools import lru_cache
from typing import Literal
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
extra="ignore",
)
# ---------------- App ----------------
app_log_level: str = Field("INFO", alias="APP_LOG_LEVEL")
app_host: str = Field("0.0.0.0", alias="APP_HOST")
app_port: int = Field(8000, alias="APP_PORT")
app_input_dir: str = Field("/data/input", alias="APP_INPUT_DIR")
app_work_dir: str = Field("/data/work", alias="APP_WORK_DIR")
app_api_prefix: str = Field("/api/v1", alias="APP_API_PREFIX")
cors_allowed_origins: str = Field(
"http://localhost:5173,http://localhost:5273,http://localhost:4173",
alias="CORS_ALLOWED_ORIGINS",
)
api_key: str = Field("", alias="API_KEY")
@property
def cors_origins(self) -> list[str]:
return [o.strip() for o in self.cors_allowed_origins.split(",") if o.strip()]
# ---------------- Postgres ----------------
postgres_host: str = Field("postgres", alias="POSTGRES_HOST")
postgres_port: int = Field(5432, alias="POSTGRES_PORT")
postgres_db: str = Field("legacyhub", alias="POSTGRES_DB")
postgres_user: str = Field("legacyhub", alias="POSTGRES_USER")
postgres_password: str = Field("legacyhub", alias="POSTGRES_PASSWORD")
@property
def database_url(self) -> str:
return (
f"postgresql+psycopg://{self.postgres_user}:{self.postgres_password}"
f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}"
)
# ---------------- MinIO ----------------
minio_endpoint: str = Field("minio:9000", alias="MINIO_ENDPOINT")
minio_access_key: str = Field("legacyhub", alias="MINIO_ACCESS_KEY")
minio_secret_key: str = Field("legacyhub-secret", alias="MINIO_SECRET_KEY")
minio_bucket_originals: str = Field("legacyhub-originals", alias="MINIO_BUCKET_ORIGINALS")
minio_bucket_derived: str = Field("legacyhub-derived", alias="MINIO_BUCKET_DERIVED")
minio_secure: bool = Field(False, alias="MINIO_SECURE")
minio_region: str = Field("us-east-1", alias="MINIO_REGION")
# ---------------- OpenSearch ----------------
opensearch_host: str = Field("opensearch", alias="OPENSEARCH_HOST")
opensearch_port: int = Field(9200, alias="OPENSEARCH_PORT")
opensearch_use_ssl: bool = Field(False, alias="OPENSEARCH_USE_SSL")
opensearch_verify_certs: bool = Field(False, alias="OPENSEARCH_VERIFY_CERTS")
opensearch_user: str = Field("", alias="OPENSEARCH_USER")
opensearch_password: str = Field("", alias="OPENSEARCH_PASSWORD")
opensearch_index_chunks: str = Field("legacy_chunks", alias="OPENSEARCH_INDEX_CHUNKS")
# ---------------- Qdrant ----------------
qdrant_host: str = Field("qdrant", alias="QDRANT_HOST")
qdrant_port: int = Field(6333, alias="QDRANT_PORT")
qdrant_api_key: str = Field("", alias="QDRANT_API_KEY")
qdrant_collection_chunks: str = Field("legacy_chunks", alias="QDRANT_COLLECTION_CHUNKS")
# ---------------- Redis ----------------
redis_url: str = Field("redis://redis:6379/0", alias="REDIS_URL")
# ---------------- OCR ----------------
ocr_languages: str = Field("rus+eng", alias="OCR_LANGUAGES")
ocr_enabled: bool = Field(True, alias="OCR_ENABLED")
docling_ocr_enabled: bool = Field(False, alias="DOCLING_OCR_ENABLED")
max_document_timeout_seconds: int = Field(180, alias="MAX_DOCUMENT_TIMEOUT_SECONDS")
ocr_deskew: bool = Field(True, alias="OCR_DESKEW")
ocr_clean: bool = Field(True, alias="OCR_CLEAN")
ocr_optimize: int = Field(1, alias="OCR_OPTIMIZE")
# ---------------- Embeddings / Reranker ----------------
embedding_model: str = Field("BAAI/bge-m3", alias="EMBEDDING_MODEL")
embedding_dim: int = Field(1024, alias="EMBEDDING_DIM")
embedding_device: Literal["cpu", "cuda", "mps"] = Field("cpu", alias="EMBEDDING_DEVICE")
embedding_batch_size: int = Field(8, alias="EMBEDDING_BATCH_SIZE")
embedding_normalize: bool = Field(True, alias="EMBEDDING_NORMALIZE")
reranker_model: str = Field("BAAI/bge-reranker-v2-m3", alias="RERANKER_MODEL")
reranker_device: Literal["cpu", "cuda", "mps"] = Field("cpu", alias="RERANKER_DEVICE")
reranker_enabled: bool = Field(True, alias="RERANKER_ENABLED")
reranker_batch_size: int = Field(8, alias="RERANKER_BATCH_SIZE")
# ---------------- Chunking ----------------
chunk_target_tokens: int = Field(700, alias="CHUNK_TARGET_TOKENS")
chunk_min_tokens: int = Field(120, alias="CHUNK_MIN_TOKENS")
chunk_max_tokens: int = Field(900, alias="CHUNK_MAX_TOKENS")
chunk_overlap_tokens: int = Field(100, alias="CHUNK_OVERLAP_TOKENS")
# ---------------- Hybrid search ----------------
hybrid_opensearch_top_k: int = Field(50, alias="HYBRID_OPENSEARCH_TOP_K")
hybrid_qdrant_top_k: int = Field(50, alias="HYBRID_QDRANT_TOP_K")
hybrid_rrf_k: int = Field(60, alias="HYBRID_RRF_K")
rerank_candidates: int = Field(40, alias="RERANK_CANDIDATES")
@lru_cache(maxsize=1)
def get_settings() -> Settings:
return Settings() # type: ignore[call-arg]
settings = get_settings()