diff --git a/.env.example b/.env.example index 4b4edbf..0da689a 100644 --- a/.env.example +++ b/.env.example @@ -80,3 +80,8 @@ APP_API_PREFIX=/api/v1 # Comma-separated list of allowed origins for the browser. Use specific origins # in production; * is accepted only for local development. CORS_ALLOWED_ORIGINS=http://localhost:5173,http://localhost:5273,http://localhost:4173 + +# Optional shared-secret API key. When empty, the API is open (dev default). +# When set, every request under APP_API_PREFIX except /health requires +# X-API-Key: or Authorization: Bearer . +API_KEY= diff --git a/.env.prod.example b/.env.prod.example index b7477f3..8503386 100644 --- a/.env.prod.example +++ b/.env.prod.example @@ -72,3 +72,6 @@ APP_API_PREFIX=/api/v1 # Comma-separated list of allowed origins. NEVER use * in production. CORS_ALLOWED_ORIGINS=https://legacyhub.teamhub.example + +# Mandatory in production. Use a long random value (e.g. `openssl rand -hex 32`). +API_KEY=__ROTATE_ME__ diff --git a/RUNBOOK.md b/RUNBOOK.md index ea6712c..5d318d1 100644 --- a/RUNBOOK.md +++ b/RUNBOOK.md @@ -95,6 +95,36 @@ docker compose exec postgres psql -U legacyhub -d legacyhub -c \ | Indexing stuck | OpenSearch + Qdrant health | `scripts/init_opensearch.py`, `scripts/init_qdrant.py` | | Reranker disabled | API logs → `reranker.disabled` | Ensure `RERANKER_ENABLED=true`; HF cache mounted | +## API authentication + +Two mechanisms layered together: + +1. **Reverse proxy / SSO** (preferred). Front the API with nginx, Traefik, or + an OAuth gateway. The reverse proxy terminates TLS and authenticates the + caller; LegacyHUB never sees a raw user identity. +2. **Shared-secret API key** (defence in depth). Set `API_KEY` to a long + random value (`openssl rand -hex 32`). Every request to `APP_API_PREFIX` + except `/health` must then carry either: + + ```http + X-API-Key: + ``` + or: + ```http + Authorization: Bearer + ``` + + `/health` is intentionally exempt so external probes do not need the + secret. + + In production this is required (`docker-compose.prod.yml` fails the + stack if `API_KEY` is empty). In development the key is optional and + the default empty value disables the middleware entirely. + + The frontend reads `VITE_API_KEY` and injects the header on every Axios + request. For SSO deployments leave `VITE_API_KEY` empty and let the + reverse proxy inject the header server-side. + ## Verification gates (per change) 1. `python -m pytest tests/ -q` — full unit suite (19+ tests). diff --git a/app/api/security.py b/app/api/security.py new file mode 100644 index 0000000..4637994 --- /dev/null +++ b/app/api/security.py @@ -0,0 +1,83 @@ +"""Optional API-key auth. + +Behaviour: + +- If ``API_KEY`` is empty (default) every request is allowed - matches the + original dev configuration. +- If ``API_KEY`` is set, every request to a route under ``app_api_prefix`` + must carry either ``X-API-Key: `` or ``Authorization: Bearer ``. +- ``/health`` is intentionally exempt so external probes (compose healthcheck, + reverse proxy, monitoring) keep working without leaking the key. +- The root ``/`` page stays open so the OpenAPI banner and docs links remain + reachable. + +This is a defence-in-depth layer behind whatever reverse proxy / OAuth gateway +runs in production - not a replacement. +""" + +from __future__ import annotations + +import hmac +from typing import Awaitable, Callable + +from fastapi import FastAPI, Request, Response +from fastapi.responses import JSONResponse +from starlette.types import ASGIApp + +from app.config import settings as _module_settings + +EXEMPT_PATHS: tuple[str, ...] = ("/", "/docs", "/redoc", "/openapi.json") +EXEMPT_SUFFIXES: tuple[str, ...] = ("/health",) + + +def _extract_token(request: Request) -> str | None: + header = request.headers.get("x-api-key") + if header: + return header.strip() + auth = request.headers.get("authorization") or "" + if auth.lower().startswith("bearer "): + return auth[7:].strip() + return None + + +def install_api_key_auth(app: FastAPI) -> None: + """Attach the middleware. Always safe to call; becomes a no-op when no key + is configured. + + Reads ``app.config.settings`` lazily so test fixtures can reload the config + module and have the new ``API_KEY`` value take effect on the next install. + """ + from app.config import settings as fresh_settings # re-resolve after reloads + + settings = fresh_settings + expected = settings.api_key.strip() if settings.api_key else "" + if not expected: + return + + @app.middleware("http") + async def _api_key_middleware( # type: ignore[no-redef] + request: Request, + call_next: Callable[[Request], Awaitable[Response]], + ) -> Response: + path = request.url.path + if request.method == "OPTIONS": + return await call_next(request) + if path in EXEMPT_PATHS: + return await call_next(request) + if any(path.endswith(s) for s in EXEMPT_SUFFIXES): + return await call_next(request) + if not path.startswith(settings.app_api_prefix): + return await call_next(request) + + token = _extract_token(request) + if not token or not hmac.compare_digest(token, expected): + return JSONResponse( + status_code=401, + content={"detail": "invalid or missing api key"}, + headers={"WWW-Authenticate": "Bearer"}, + ) + return await call_next(request) + + +__all__ = ["install_api_key_auth"] +_ = ASGIApp # re-export hint to keep mypy happy on older Starlette versions diff --git a/app/config.py b/app/config.py index ed8fe22..65e988d 100644 --- a/app/config.py +++ b/app/config.py @@ -31,6 +31,7 @@ class Settings(BaseSettings): "http://localhost:5173,http://localhost:5273,http://localhost:4173", alias="CORS_ALLOWED_ORIGINS", ) + api_key: str = Field("", alias="API_KEY") @property def cors_origins(self) -> list[str]: diff --git a/app/main.py b/app/main.py index 5a896af..698c295 100644 --- a/app/main.py +++ b/app/main.py @@ -10,6 +10,7 @@ from fastapi.middleware.cors import CORSMiddleware from app import __version__ from app.api import routes_health, routes_ingestion, routes_search +from app.api.security import install_api_key_auth from app.config import settings from app.logging_config import configure_logging, get_logger @@ -43,9 +44,10 @@ app.add_middleware( allow_origins=settings.cors_origins, allow_credentials=True, allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"], - allow_headers=["*"], + allow_headers=["*", "X-API-Key", "Authorization"], max_age=3600, ) +install_api_key_auth(app) app.include_router(routes_health.router, prefix=settings.app_api_prefix) app.include_router(routes_ingestion.router, prefix=settings.app_api_prefix) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 2a33c1d..c1ecbdc 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -92,6 +92,7 @@ services: APP_INPUT_DIR: /data/input APP_WORK_DIR: /data/work CORS_ALLOWED_ORIGINS: ${CORS_ALLOWED_ORIGINS:?CORS_ALLOWED_ORIGINS must be set (no * in production)} + API_KEY: ${API_KEY:?API_KEY must be set in production} restart: always worker: diff --git a/docker-compose.yml b/docker-compose.yml index 685fffd..be2f91a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -33,6 +33,7 @@ x-common-env: &common-env APP_INPUT_DIR: /data/input APP_WORK_DIR: /data/work CORS_ALLOWED_ORIGINS: ${CORS_ALLOWED_ORIGINS:-http://localhost:5173,http://localhost:5273,http://localhost:4173} + API_KEY: ${API_KEY:-} services: postgres: diff --git a/frontend/.env.example b/frontend/.env.example index 110ece7..9739f60 100644 --- a/frontend/.env.example +++ b/frontend/.env.example @@ -2,3 +2,8 @@ VITE_API_BASE_URL=/api/v1 VITE_USE_MOCK=true VITE_APP_NAME=LegacyHUB + +# Optional. When the backend has API_KEY set, the SPA must echo it on every +# request. For SSO/cookie deployments leave this empty and let the reverse +# proxy inject the header server-side. +VITE_API_KEY= diff --git a/frontend/src/services/apiClient.ts b/frontend/src/services/apiClient.ts index 7b6039e..bbab3c7 100644 --- a/frontend/src/services/apiClient.ts +++ b/frontend/src/services/apiClient.ts @@ -1,11 +1,15 @@ import axios, { type AxiosInstance, type AxiosError } from "axios"; const BASE_URL = import.meta.env.VITE_API_BASE_URL ?? "/api/v1"; +const API_KEY = import.meta.env.VITE_API_KEY ?? ""; + +const defaultHeaders: Record = { "Content-Type": "application/json" }; +if (API_KEY) defaultHeaders["X-API-Key"] = API_KEY; export const apiClient: AxiosInstance = axios.create({ baseURL: BASE_URL, timeout: 60_000, - headers: { "Content-Type": "application/json" }, + headers: defaultHeaders, }); apiClient.interceptors.response.use( diff --git a/frontend/src/vite-env.d.ts b/frontend/src/vite-env.d.ts index 85cdf64..d6953c4 100644 --- a/frontend/src/vite-env.d.ts +++ b/frontend/src/vite-env.d.ts @@ -4,6 +4,7 @@ interface ImportMetaEnv { readonly VITE_API_BASE_URL?: string; readonly VITE_USE_MOCK?: string; readonly VITE_APP_NAME?: string; + readonly VITE_API_KEY?: string; } interface ImportMeta { diff --git a/tests/test_api_security.py b/tests/test_api_security.py new file mode 100644 index 0000000..4d086c4 --- /dev/null +++ b/tests/test_api_security.py @@ -0,0 +1,167 @@ +"""Tests for the optional API-key auth middleware.""" + +from __future__ import annotations + +import importlib + +import pytest + +from fastapi.testclient import TestClient + + +KEY = "test-secret-key-DO-NOT-USE-IN-PROD" + + +@pytest.fixture +def secured_app(monkeypatch): + """Reload the FastAPI application with API_KEY set so the middleware + installs itself before the lifespan starts. Returns a TestClient bound to + that fresh app instance. + """ + monkeypatch.setenv("API_KEY", KEY) + + # Drop cached Settings and main so the new env vars are picked up. + import app.config as cfg + import app.main as main_module + + cfg.get_settings.cache_clear() + importlib.reload(cfg) + importlib.reload(main_module) + return main_module.app + + +def _patch_health(monkeypatch, module): + from app.api.schemas import ComponentHealth + + def _ok(name): + return ComponentHealth(name=name, status="ok", detail={}) + + for name in ( + "_check_postgres", + "_check_minio", + "_check_opensearch", + "_check_qdrant", + "_check_redis", + ): + monkeypatch.setattr(module, name, lambda n=name: _ok(n.removeprefix("_check_"))) + + +def test_health_remains_open_when_key_required(secured_app, monkeypatch): + from app.api import routes_health + from app.config import settings + + _patch_health(monkeypatch, routes_health) + client = TestClient(secured_app) + res = client.get(f"{settings.app_api_prefix}/health") + assert res.status_code == 200 + + +def test_protected_route_rejects_missing_key(secured_app, monkeypatch): + from app.config import settings + from app.indexing import hybrid_search + + monkeypatch.setattr(hybrid_search, "run_search", lambda req: pytest.fail("must not run")) + + client = TestClient(secured_app) + res = client.post( + f"{settings.app_api_prefix}/search", + json={ + "query": "anything", + "limit": 1, + "filters": { + "document_id": None, + "source_path": None, + "block_type": None, + "min_ocr_confidence": None, + }, + "search_mode": "hybrid", + }, + ) + assert res.status_code == 401 + assert res.json()["detail"].startswith("invalid") + + +def test_protected_route_accepts_x_api_key_header(secured_app, monkeypatch): + from app.config import settings + from app.indexing import hybrid_search + from app.api.schemas import SearchResponse + + monkeypatch.setattr( + hybrid_search, + "run_search", + lambda req: SearchResponse( + query=req.query, mode=req.search_mode, total_candidates=0, reranked=False, results=[] + ), + ) + + client = TestClient(secured_app) + res = client.post( + f"{settings.app_api_prefix}/search", + headers={"X-API-Key": KEY}, + json={ + "query": "x", + "limit": 1, + "filters": { + "document_id": None, + "source_path": None, + "block_type": None, + "min_ocr_confidence": None, + }, + "search_mode": "hybrid", + }, + ) + assert res.status_code == 200 + + +def test_protected_route_accepts_bearer_token(secured_app, monkeypatch): + from app.config import settings + from app.indexing import hybrid_search + from app.api.schemas import SearchResponse + + monkeypatch.setattr( + hybrid_search, + "run_search", + lambda req: SearchResponse( + query=req.query, mode=req.search_mode, total_candidates=0, reranked=False, results=[] + ), + ) + + client = TestClient(secured_app) + res = client.post( + f"{settings.app_api_prefix}/search", + headers={"Authorization": f"Bearer {KEY}"}, + json={ + "query": "x", + "limit": 1, + "filters": { + "document_id": None, + "source_path": None, + "block_type": None, + "min_ocr_confidence": None, + }, + "search_mode": "hybrid", + }, + ) + assert res.status_code == 200 + + +def test_protected_route_rejects_wrong_key(secured_app): + from app.config import settings + + client = TestClient(secured_app) + res = client.post( + f"{settings.app_api_prefix}/search", + headers={"X-API-Key": "wrong"}, + json={ + "query": "x", + "limit": 1, + "filters": { + "document_id": None, + "source_path": None, + "block_type": None, + "min_ocr_confidence": None, + }, + "search_mode": "hybrid", + }, + ) + assert res.status_code == 401