commit 7f721715721ac18eb0017041dd36d573e8e70699
Author: Vadim Malanov <vadim.malanov@gmail.com>
Date:   Wed May 13 16:41:50 2026 +0300

    chore: bootstrap repository with governance docs
    
    Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line
    endings), AGENTS.md (entry points, stack, discovery order, baseline
    checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion,
    failures, rollback, scaling notes), .env.prod.example with rotated
    credential placeholders, and dev-only warnings on .env.example.
    
    Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..9e5635b
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,16 @@
+.git
+.gitignore
+.venv
+venv
+__pycache__
+*.pyc
+.env
+.env.local
+data
+tests
+.pytest_cache
+.mypy_cache
+.ruff_cache
+.idea
+.vscode
+README.md
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..4b4edbf
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,82 @@
+# ---- DEVELOPMENT TEMPLATE ----
+# Copy to .env. Values below are intentionally weak defaults for local Docker
+# Compose. NEVER use them in production — see .env.prod.example.
+
+# ==== PostgreSQL ====
+POSTGRES_HOST=postgres
+POSTGRES_PORT=5432
+POSTGRES_DB=legacyhub
+POSTGRES_USER=legacyhub
+POSTGRES_PASSWORD=legacyhub
+
+# ==== MinIO ====
+MINIO_ENDPOINT=minio:9000
+MINIO_ACCESS_KEY=legacyhub
+MINIO_SECRET_KEY=legacyhub-secret
+MINIO_BUCKET_ORIGINALS=legacyhub-originals
+MINIO_BUCKET_DERIVED=legacyhub-derived
+MINIO_SECURE=false
+MINIO_REGION=us-east-1
+
+# ==== OpenSearch ====
+OPENSEARCH_HOST=opensearch
+OPENSEARCH_PORT=9200
+OPENSEARCH_USE_SSL=false
+OPENSEARCH_VERIFY_CERTS=false
+OPENSEARCH_USER=
+OPENSEARCH_PASSWORD=
+OPENSEARCH_INDEX_CHUNKS=legacy_chunks
+
+# ==== Qdrant ====
+QDRANT_HOST=qdrant
+QDRANT_PORT=6333
+QDRANT_API_KEY=
+QDRANT_COLLECTION_CHUNKS=legacy_chunks
+
+# ==== Redis ====
+REDIS_URL=redis://redis:6379/0
+
+# ==== OCR ====
+OCR_LANGUAGES=rus+eng
+OCR_ENABLED=true
+DOCLING_OCR_ENABLED=false
+MAX_DOCUMENT_TIMEOUT_SECONDS=180
+OCR_DESKEW=true
+OCR_CLEAN=true
+OCR_OPTIMIZE=1
+
+# ==== Embeddings / Reranker ====
+EMBEDDING_MODEL=BAAI/bge-m3
+EMBEDDING_DIM=1024
+EMBEDDING_DEVICE=cpu
+EMBEDDING_BATCH_SIZE=8
+EMBEDDING_NORMALIZE=true
+
+RERANKER_MODEL=BAAI/bge-reranker-v2-m3
+RERANKER_DEVICE=cpu
+RERANKER_ENABLED=true
+RERANKER_BATCH_SIZE=8
+
+# ==== Chunking ====
+CHUNK_TARGET_TOKENS=700
+CHUNK_MIN_TOKENS=120
+CHUNK_MAX_TOKENS=900
+CHUNK_OVERLAP_TOKENS=100
+
+# ==== Search ====
+HYBRID_OPENSEARCH_TOP_K=50
+HYBRID_QDRANT_TOP_K=50
+HYBRID_RRF_K=60
+RERANK_CANDIDATES=40
+
+# ==== App ====
+APP_LOG_LEVEL=INFO
+APP_HOST=0.0.0.0
+APP_PORT=8000
+APP_INPUT_DIR=/data/input
+APP_WORK_DIR=/data/work
+APP_API_PREFIX=/api/v1
+
+# Comma-separated list of allowed origins for the browser. Use specific origins
+# in production; * is accepted only for local development.
+CORS_ALLOWED_ORIGINS=http://localhost:5173,http://localhost:5273,http://localhost:4173
diff --git a/.env.prod.example b/.env.prod.example
new file mode 100644
index 0000000..b7477f3
--- /dev/null
+++ b/.env.prod.example
@@ -0,0 +1,74 @@
+# ---- PRODUCTION TEMPLATE ----
+# Copy to .env.prod and replace every PLACEHOLDER value.
+# Never commit .env.prod.
+# All values below are placeholders — rotation required before use.
+
+# ==== PostgreSQL ====
+POSTGRES_HOST=postgres
+POSTGRES_PORT=5432
+POSTGRES_DB=legacyhub
+POSTGRES_USER=legacyhub_prod
+POSTGRES_PASSWORD=__ROTATE_ME__
+
+# ==== MinIO ====
+MINIO_ENDPOINT=minio:9000
+MINIO_ACCESS_KEY=__ROTATE_ME__
+MINIO_SECRET_KEY=__ROTATE_ME__
+MINIO_BUCKET_ORIGINALS=legacyhub-originals
+MINIO_BUCKET_DERIVED=legacyhub-derived
+MINIO_SECURE=true
+MINIO_REGION=us-east-1
+
+# ==== OpenSearch (security plugin ON in prod overlay) ====
+OPENSEARCH_HOST=opensearch
+OPENSEARCH_PORT=9200
+OPENSEARCH_USE_SSL=true
+OPENSEARCH_VERIFY_CERTS=true
+OPENSEARCH_USER=admin
+OPENSEARCH_PASSWORD=__ROTATE_ME__
+OPENSEARCH_INDEX_CHUNKS=legacy_chunks
+OPENSEARCH_ADMIN_PASSWORD=__ROTATE_ME__
+
+# ==== Qdrant ====
+QDRANT_HOST=qdrant
+QDRANT_PORT=6333
+QDRANT_API_KEY=__ROTATE_ME__
+QDRANT_COLLECTION_CHUNKS=legacy_chunks
+
+# ==== Redis ====
+REDIS_URL=redis://:__ROTATE_ME__@redis:6379/0
+
+# ==== OCR ====
+OCR_LANGUAGES=rus+eng
+OCR_ENABLED=true
+DOCLING_OCR_ENABLED=false
+MAX_DOCUMENT_TIMEOUT_SECONDS=300
+
+# ==== Embeddings / Reranker ====
+EMBEDDING_MODEL=BAAI/bge-m3
+EMBEDDING_DIM=1024
+EMBEDDING_DEVICE=cuda
+EMBEDDING_BATCH_SIZE=32
+EMBEDDING_NORMALIZE=true
+
+RERANKER_MODEL=BAAI/bge-reranker-v2-m3
+RERANKER_DEVICE=cuda
+RERANKER_ENABLED=true
+RERANKER_BATCH_SIZE=32
+
+# ==== Hybrid search ====
+HYBRID_OPENSEARCH_TOP_K=50
+HYBRID_QDRANT_TOP_K=50
+HYBRID_RRF_K=60
+RERANK_CANDIDATES=40
+
+# ==== App ====
+APP_LOG_LEVEL=INFO
+APP_HOST=0.0.0.0
+APP_PORT=8000
+APP_INPUT_DIR=/data/input
+APP_WORK_DIR=/data/work
+APP_API_PREFIX=/api/v1
+
+# Comma-separated list of allowed origins. NEVER use * in production.
+CORS_ALLOWED_ORIGINS=https://legacyhub.teamhub.example
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..3b4ab98
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,23 @@
+* text=auto eol=lf
+*.py    text eol=lf
+*.ts    text eol=lf
+*.tsx   text eol=lf
+*.css   text eol=lf
+*.md    text eol=lf
+*.json  text eol=lf
+*.yml   text eol=lf
+*.yaml  text eol=lf
+*.toml  text eol=lf
+*.ini   text eol=lf
+*.mako  text eol=lf
+*.svg   text eol=lf
+*.cfg   text eol=lf
+Dockerfile text eol=lf
+Makefile   text eol=lf
+*.png   binary
+*.jpg   binary
+*.gif   binary
+*.pdf   binary
+*.ico   binary
+*.woff  binary
+*.woff2 binary
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..cb79ced
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,21 @@
+__pycache__/
+*.py[cod]
+*.egg-info/
+.eggs/
+build/
+dist/
+.venv/
+venv/
+.env
+.env.local
+.idea/
+.vscode/
+.mypy_cache/
+.pytest_cache/
+.ruff_cache/
+data/input/*
+data/work/*
+!data/input/.gitkeep
+!data/work/.gitkeep
+*.log
+.DS_Store
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..e802560
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,147 @@
+# AGENTS — LegacyHUB
+
+Operating instructions for AI agents working inside this repository.
+
+## What this project is
+
+LegacyHUB ingests legacy PDF archives at scale (~70k docs), runs OCR
+(OCRmyPDF/Tesseract), extracts structured content with Docling, indexes chunks
+into PostgreSQL + OpenSearch (BM25) + Qdrant (BGE-M3 dense), and serves a
+hybrid lexical + semantic search API (FastAPI) reranked by BGE.
+
+It is one module of the TeamHUB Suite.
+
+## Stack (canonical)
+
+| Layer    | Tech                                          |
+|----------|-----------------------------------------------|
+| API      | FastAPI, Pydantic v2, SQLAlchemy 2, Alembic   |
+| Workers  | Celery + Redis                                |
+| OCR      | OCRmyPDF + Tesseract (rus+eng)                |
+| Extract  | Docling                                       |
+| Store    | PostgreSQL 16, MinIO, OpenSearch 2.x, Qdrant  |
+| ML       | BAAI/bge-m3 (dense, 1024), bge-reranker-v2-m3 |
+| Frontend | React 18, TS 5, Vite 5, Tailwind, shadcn, TanStack Query, Zustand, Framer Motion, Recharts |
+| Tests    | pytest                                        |
+| CI       | GitHub Actions                                |
+
+## Entry points
+
+- **Backend API** — `app/main.py` (`uvicorn app.main:app`)
+- **Celery worker** — `celery -A app.workers.celery_app worker`
+- **CLI scripts** — `scripts/init_db.py`, `scripts/init_opensearch.py`,
+  `scripts/init_qdrant.py`, `scripts/ingest_folder.py`,
+  `scripts/reindex_document.py`, `scripts/smoke_test.py`
+- **Frontend dev** — `cd frontend && npm run dev` (port 5273)
+- **Docker** — `docker compose up -d --build` (dev), `docker compose -f
+  docker-compose.yml -f docker-compose.prod.yml ...` (prod)
+
+## Inventory
+
+```text
+legacy-knowledge-indexer/
+  app/
+    api/             routers + Pydantic schemas
+    db/              SQLAlchemy models + Alembic migrations
+    indexing/        OpenSearch + Qdrant clients, embeddings, reranker, hybrid
+    ingestion/       scanner, OCR, Docling, chunker, table/figure processors,
+                     quality, pipeline
+    storage/         MinIO client + key conventions + ensure_artifact helper
+    utils/           hashing, text cleaning, language detection, pdf helpers
+    workers/         Celery app + tasks
+  scripts/           init / ingest / reindex / smoke CLIs
+  tests/             pytest suite
+  docker/Dockerfile  API + worker image (OCRmyPDF + tesseract-rus+eng)
+  docker-compose.yml dev orchestration
+  docker-compose.prod.yml  production overlay
+  frontend/          React app — see frontend/README.md
+  .github/workflows  CI gate (ruff + pytest + tsc + vite build + compose config)
+```
+
+## Code discovery order
+
+Bounded discovery order for this repo. Use the first available that returns a
+usable answer; mark the rest "not available" for the task.
+
+1. **Grep / rg** — reliable fallback, always available. First choice for
+   strings, configs, docs, scripts, route paths, hashes.
+2. **Glob** — file shape lookups (`app/**/*.py`).
+3. **Semantic search** (if Sourcegraph, Zoekt, or Serena MCP is configured at
+   user level) — go-to-symbol, references. Document the smoke command before
+   relying on results.
+4. **Docling / extracted Markdown in MinIO** — for content questions about
+   ingested documents, not source code.
+
+Smoke command for layer 1:
+
+```bash
+rg --version && rg "@router" app/api -n
+```
+
+If any indexer times out or returns stale results, capture the error and fall
+through. Do not retry the same failing indexer.
+
+## Module contracts (high level)
+
+- `app/ingestion/pipeline.py::process_document_id(document_id, run_id)` — single
+  document end-to-end. Idempotent. Returns `{status, chunks, error?}`.
+- `app/indexing/hybrid_search.py::run_search(SearchRequest) -> SearchResponse` —
+  the only public search entry. Lexical + semantic + reranker.
+- `app/storage/artifacts.py::ensure_artifact(...)` — single source of truth for
+  `document_artifacts` upsert. Used by scanner, pipeline, table_processor,
+  figure_processor.
+- `app/storage/minio_client.py::MinioStorage` — bucket bootstrap + retryable
+  put/get. Never bypass for object IO.
+- `app/indexing/opensearch_client.py::ensure_index() / index_chunks()` — chunk
+  index lifecycle.
+- `app/indexing/qdrant_client.py::ensure_collection() / upsert_chunks()` —
+  vector index lifecycle.
+
+## Runtime vs legacy scope
+
+Everything under `app/` is runtime. `scripts/` are operational tools. `tests/`
+are non-runtime. There is no archived/legacy code yet.
+
+## Baseline checks
+
+```bash
+# Backend
+python -m pip check
+python -m compileall -q app scripts tests
+python -m pytest tests/ -q
+
+# Frontend
+cd frontend
+npx tsc --noEmit
+npm run lint
+npm run build
+
+# Docker
+docker compose config --quiet
+```
+
+## Operating rules for agents
+
+- Inspect before changing. `git status` first.
+- Small reviewable commits. One ownership boundary per commit.
+- Do not delete files, routes, migrations, or env vars without evidence (see
+  `software-project-delivery-governance` skill).
+- Do not invent secret values. Use `.env.example` placeholders.
+- Use `ensure_artifact` instead of re-implementing artifact upsert.
+- Use existing UI primitives in `frontend/src/components/ui/*` before adding new
+  ones.
+- Never commit `node_modules/`, `dist/`, `.env`, `data/input/*`, `data/work/*`.
+- Failures must be logged via `processing_events` (backend) or `sonner` toast
+  (frontend) — not silenced.
+
+## Ownership
+
+- Backend, ingestion, search — Vadim Malanov.
+- Frontend, design system — Vadim Malanov.
+
+## Where to update what
+
+- New behavior — update `README.md`.
+- New repeated agent rule — update this file.
+- New deployment / recovery step — update `RUNBOOK.md`.
+- Cleanup findings — `docs/cleanup-report.md` (create on demand).
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..f39a6f2
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for describing the origin of the Work and
+      reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Support. While redistributing
+      the Work or Derivative Works thereof, You may accept and charge a
+      fee for acceptance of support, warranty, indemnity, or other liability
+      obligations and/or rights consistent with this License. However, in
+      accepting such obligations, You may act only on Your own behalf and on
+      Your sole responsibility, not on behalf of any other Contributor, and
+      only if You agree to indemnify, defend, and hold each Contributor
+      harmless for any liability incurred by, or claims asserted against,
+      such Contributor by reason of your accepting any such warranty or
+      support.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed by" line as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2026 TeamHUB
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied. See the License for the specific language governing
+   permissions and limitations under the License.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..74014de
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,39 @@
+.PHONY: help up down logs build api worker init smoke test fmt lint
+
+help:
+	@echo "make up        - start all services"
+	@echo "make down      - stop all services"
+	@echo "make build     - rebuild api/worker image"
+	@echo "make init      - run db migrations + bootstrap opensearch + qdrant"
+	@echo "make smoke     - run the smoke test inside the api container"
+	@echo "make test      - pytest"
+	@echo "make logs      - tail api+worker logs"
+
+up:
+	docker compose up -d --build
+
+down:
+	docker compose down
+
+build:
+	docker compose build api worker
+
+logs:
+	docker compose logs -f api worker
+
+init:
+	docker compose exec api python scripts/init_db.py
+	docker compose exec api python scripts/init_opensearch.py
+	docker compose exec api python scripts/init_qdrant.py
+
+smoke:
+	docker compose exec api python scripts/smoke_test.py
+
+test:
+	pytest -q
+
+fmt:
+	ruff format app scripts tests
+
+lint:
+	ruff check app scripts tests
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..89345b3
--- /dev/null
+++ b/README.md
@@ -0,0 +1,233 @@
+# LegacyHUB - Knowledge Indexing & Hybrid Search for Legacy PDF Archives
+
+LegacyHUB is a production-oriented, fully open-source backend for ingesting,
+OCR-ing, structurally extracting, and hybrid-searching large legacy PDF
+archives (designed for ~70,000 documents).
+
+It is part of the **TeamHUB** suite.
+
+```
+PDFs ──▶ Scanner ──▶ MinIO (originals)
+                  └▶ OCRmyPDF (Tesseract) ──▶ MinIO (ocr_pdf)
+                                          └▶ Docling ──▶ MD + JSON ──▶ MinIO
+                                                       └▶ blocks/tables/figures
+                                                                ├▶ PostgreSQL
+                                                                ├▶ OpenSearch (BM25)
+                                                                └▶ Qdrant (BGE-M3 dense)
+                                                                          │
+FastAPI /search ◀── BGE Reranker ◀── RRF merge ◀───────────────────────────┘
+```
+
+## Stack
+
+| Component        | Tech                                     |
+|------------------|------------------------------------------|
+| OCR              | OCRmyPDF + Tesseract (rus + eng)         |
+| Extraction       | Docling (layout, tables, figures)        |
+| Object storage   | MinIO (S3-compatible)                    |
+| Relational store | PostgreSQL 16                            |
+| Lexical search   | OpenSearch 2.x (BM25 + ru/en analyzers)  |
+| Vector search    | Qdrant 1.x (named dense vector)          |
+| Embeddings       | BAAI/bge-m3 (dense, 1024d)               |
+| Reranker         | BAAI/bge-reranker-v2-m3                  |
+| API              | FastAPI + Uvicorn                        |
+| Workers          | Celery + Redis                           |
+| Logging          | structlog (JSON)                         |
+
+## Quick start
+
+```bash
+cp .env.example .env
+docker compose up -d --build
+docker compose exec api python scripts/init_db.py
+docker compose exec api python scripts/init_opensearch.py
+docker compose exec api python scripts/init_qdrant.py
+docker compose exec api python scripts/smoke_test.py
+```
+
+Health check:
+
+```bash
+curl http://localhost:8000/api/v1/health | jq .
+```
+
+Open the interactive Swagger docs at <http://localhost:8000/docs>.
+
+## Ingest documents
+
+Mount a folder into the container at `/data/input` (the compose file already
+mounts `./data/input` for you), drop PDFs into it, and call:
+
+```bash
+curl -X POST http://localhost:8000/api/v1/ingest/folder \
+  -H "Content-Type: application/json" \
+  -d '{"path":"/data/input","recursive":true,"force":false}'
+```
+
+Or run inline (no Celery, useful for ad-hoc tests):
+
+```bash
+docker compose exec api python scripts/ingest_folder.py \
+  --path /data/input --recursive --mode inline
+```
+
+To re-process a single document by ID:
+
+```bash
+docker compose exec api python scripts/reindex_document.py \
+  --document-id <uuid>
+```
+
+## Search
+
+```bash
+curl -X POST http://localhost:8000/api/v1/search \
+  -H "Content-Type: application/json" \
+  -d '{
+        "query": "ГОСТ 21.501-93 рабочие чертежи",
+        "limit": 10,
+        "search_mode": "hybrid",
+        "filters": {"min_ocr_confidence": 0.5}
+      }' | jq .
+```
+
+`search_mode` can be `lexical`, `semantic`, or `hybrid`. Hybrid mode does:
+
+1. BM25 top-K from OpenSearch
+2. Dense top-K from Qdrant (BGE-M3)
+3. Reciprocal Rank Fusion merge
+4. Top 30-50 candidates re-scored by the BGE reranker (if available)
+5. Final top-N returned with citation metadata
+
+Each hit includes the document name, page, block id, table/figure id where
+applicable, and quality flags - so AI consumers can produce verifiable answers
+with citations.
+
+## Inspect the system
+
+| Service       | URL                                  | Credentials                |
+|---------------|--------------------------------------|----------------------------|
+| API docs      | <http://localhost:8000/docs>         | -                          |
+| MinIO console | <http://localhost:9001>              | `legacyhub` / `legacyhub-secret` |
+| OpenSearch    | <http://localhost:9200>              | -                          |
+| Qdrant UI     | <http://localhost:6333/dashboard>    | -                          |
+| Postgres      | `localhost:5432`                     | `legacyhub` / `legacyhub`  |
+
+```bash
+# Count docs in OpenSearch
+curl 'http://localhost:9200/legacy_chunks/_count'
+# Inspect Qdrant collection
+curl 'http://localhost:6333/collections/legacy_chunks'
+# Browse Postgres
+docker compose exec postgres psql -U legacyhub -d legacyhub \
+  -c "SELECT id, original_file_name, status FROM documents LIMIT 20;"
+```
+
+## Environment variables
+
+See [`.env.example`](.env.example) for the full list. Key ones:
+
+- `OCR_LANGUAGES` - Tesseract language packs (default `rus+eng`).
+- `OCR_ENABLED` - set `false` to skip OCR completely.
+- `DOCLING_OCR_ENABLED` - prefer OCRmyPDF; only enable if you do not run OCRmyPDF.
+- `EMBEDDING_DEVICE` / `RERANKER_DEVICE` - `cpu`, `cuda`, or `mps`.
+- `MAX_DOCUMENT_TIMEOUT_SECONDS` - per-document soft timeout for extraction.
+
+## Handling poor OCR
+
+- The pipeline computes per-chunk `quality_flags`:
+  - `low_ocr_confidence`, `very_short_text`, `possible_garbled_text`
+  - `table_detected`, `figure_detected`, `handwriting_detected`
+  - `needs_manual_review` (any of the above except table/figure detection)
+- Garbled chunks are still indexed - so they remain searchable - but the flags
+  let you filter them out at query time via `filters.min_ocr_confidence`.
+- Original text is always preserved verbatim (no destructive cleaning); the
+  `normalized_text` field is a derived form used purely for recall.
+- We deliberately preserve technical / legal identifiers (ГОСТ, document
+  numbers, dates, serials, slashes, dashes, dots, brackets) during normalization.
+
+## Handling handwriting
+
+- We do not attempt to recognize handwriting reliably. Suspected handwritten
+  fragments are flagged with `block_type=handwriting` and
+  `quality_flags.handwriting_detected=true` plus `needs_manual_review=true`.
+- The API does not present handwriting recognition output as authoritative.
+
+## Idempotency
+
+- Document identity = SHA256 of the original PDF. Re-ingesting the same PDF
+  reuses the existing `documents` row.
+- The pipeline deletes existing chunks for the document and re-creates them
+  before re-indexing; OpenSearch and Qdrant entries are deleted-by-document
+  before re-upsert. So re-running ingestion does not duplicate data.
+
+## Failure handling
+
+- Each pipeline stage records a row in `processing_events` with `level` and
+  `data` JSON.
+- A document that fails OCR is marked `OCR_FAILED` and the pipeline moves on.
+- A document that fails Docling is marked `EXTRACTION_FAILED`.
+- Indexing failures bring the document to `FAILED`; re-running
+  `scripts/reindex_document.py` resumes processing.
+
+## Scaling notes (~70k PDFs)
+
+- The Celery `worker` service is horizontally scalable: `docker compose up -d
+  --scale worker=8` (or run several Compose stacks pointing at the same
+  Postgres / MinIO / OpenSearch / Qdrant).
+- The embedding step is the biggest cost. Set `EMBEDDING_DEVICE=cuda` and a
+  GPU-aware worker image if available.
+- OpenSearch defaults to 1 shard / 0 replicas - increase for production
+  (`PUT /legacy_chunks/_settings`).
+- Qdrant is single-node by default; for very large corpora use the cluster
+  build of Qdrant or shard by document hash.
+- For 70k PDFs at ~50 chunks each, expect ~3.5M vectors. BGE-M3 dense at 1024d
+  is ~14 GB on disk; budget memory accordingly.
+
+## Tests
+
+```bash
+pip install -e ".[dev]"
+pytest -q
+```
+
+The unit suite covers hashing, chunking, quality flags, hybrid result merging,
+and duplicate detection. Integration tests run against the live Compose stack
+via `scripts/smoke_test.py`.
+
+## Repository layout
+
+```
+legacy-knowledge-indexer/
+  app/
+    api/            # FastAPI routes & schemas
+    db/             # SQLAlchemy models + Alembic migrations
+    indexing/       # OpenSearch, Qdrant, embeddings, reranker, hybrid search
+    ingestion/      # scanner, OCR, Docling, chunking, quality, pipeline
+    storage/        # MinIO client + key conventions
+    utils/          # hashing, text cleaning, language detection, PDF helpers
+    workers/        # Celery app + tasks
+  scripts/          # init / ingest / reindex / smoke
+  tests/            # unit tests
+  docker/Dockerfile # API + worker image
+  docker-compose.yml
+  .env.example
+  pyproject.toml
+  alembic.ini
+```
+
+## Known limitations
+
+- Docling's exact JSON shape varies between versions. The extractor uses
+  defensive lookups and falls back to `paragraph` when a label is unknown.
+- We do not currently ship a sparse vector path (BGE-M3 supports it). Hybrid
+  recall is achieved via OpenSearch BM25 + Qdrant dense, merged with RRF -
+  which has been observed to outperform sparse-only or dense-only setups on
+  noisy OCR.
+- Figure description does not invoke a VLM; captions plus a placeholder are
+  used. Plug a VLM into `figure_processor.persist_figures` if needed.
+- No authentication on the API surface - put it behind your reverse proxy.
+
+## License
+
+Apache-2.0.
diff --git a/RUNBOOK.md b/RUNBOOK.md
new file mode 100644
index 0000000..d5ebe48
--- /dev/null
+++ b/RUNBOOK.md
@@ -0,0 +1,146 @@
+# LegacyHUB — Operational Runbook
+
+## Quick boot (dev)
+
+```bash
+cp .env.example .env
+docker compose up -d --build
+docker compose exec api python scripts/init_db.py
+docker compose exec api python scripts/init_opensearch.py
+docker compose exec api python scripts/init_qdrant.py
+docker compose exec api python scripts/smoke_test.py
+```
+
+Verify:
+
+```bash
+curl -fsS http://localhost:8000/api/v1/health | jq .
+```
+
+Frontend dev:
+
+```bash
+cd frontend && cp .env.example .env && npm install && npm run dev
+# http://localhost:5273
+```
+
+## Production deploy
+
+Production overlay enables OpenSearch security plugin, removes default ports,
+forces externally-supplied credentials, and disables debug routes.
+
+```bash
+# 1. Ensure secrets exist
+cp .env.prod.example .env.prod
+$EDITOR .env.prod          # rotate every credential, never commit
+
+# 2. Build + recreate
+docker compose \
+  -f docker-compose.yml -f docker-compose.prod.yml \
+  --env-file .env.prod \
+  up -d --build --force-recreate api worker
+
+# 3. Migrations
+docker compose -f docker-compose.yml -f docker-compose.prod.yml \
+  --env-file .env.prod exec api python scripts/init_db.py
+
+# 4. Health gate
+docker compose -f docker-compose.yml -f docker-compose.prod.yml \
+  --env-file .env.prod exec api python scripts/smoke_test.py
+curl -fsS https://<host>/api/v1/health | jq -e '.status == "ok"'
+```
+
+Hardening notes (mandatory for prod):
+
+- Rotate every credential in `.env.prod` from `.env.prod.example` placeholders.
+- Put OpenSearch behind TLS and admin password. Remove
+  `DISABLE_SECURITY_PLUGIN=true` (handled by overlay).
+- Front the API with a reverse proxy that performs auth + TLS termination.
+- Restrict CORS via `CORS_ALLOWED_ORIGINS` (comma-separated) — never `*` in
+  prod.
+- MinIO root key/secret in prod must come from a secret store, not the repo.
+- Mount `data/input` and `data/work` from durable storage, not the workstation.
+
+## Ingestion
+
+```bash
+# trigger from the API
+curl -X POST http://localhost:8000/api/v1/ingest/folder \
+  -H "Content-Type: application/json" \
+  -d '{"path":"/data/input","recursive":true,"force":false}'
+
+# or inline (no Celery)
+docker compose exec api python scripts/ingest_folder.py \
+  --path /data/input --recursive --mode inline
+
+# re-index a single doc
+docker compose exec api python scripts/reindex_document.py \
+  --document-id <uuid>
+```
+
+## Failure handling
+
+Each stage emits a row to `processing_events` with `level` and `data`. Inspect:
+
+```bash
+docker compose exec postgres psql -U legacyhub -d legacyhub -c \
+  "SELECT created_at, stage, level, message FROM processing_events
+   ORDER BY created_at DESC LIMIT 50;"
+```
+
+| Failure              | Where to look                                       | Fix                              |
+|----------------------|-----------------------------------------------------|----------------------------------|
+| `OCR_FAILED`         | `processing_events` → `OCR_STARTED` then error      | Confirm `tesseract-ocr-rus` package; rerun `scripts/reindex_document.py` |
+| `EXTRACTION_FAILED`  | `processing_events` → Docling stage                 | Check timeout; verify Docling version pin |
+| Indexing stuck       | OpenSearch + Qdrant health                          | `scripts/init_opensearch.py`, `scripts/init_qdrant.py` |
+| Reranker disabled    | API logs → `reranker.disabled`                      | Ensure `RERANKER_ENABLED=true`; HF cache mounted |
+
+## Verification gates (per change)
+
+1. `python -m pytest tests/ -q` — full unit suite (19+ tests).
+2. `python -m compileall -q app scripts tests`.
+3. `docker compose config --quiet`.
+4. Frontend: `npx tsc --noEmit && npm run build`.
+5. `/api/v1/health` returns `{"status":"ok"}`.
+6. One smoke ingest of a known PDF; verify `/search` returns a result.
+
+## Rollback
+
+1. Capture deployed commit SHA before deploy (`git rev-parse HEAD`).
+2. To roll back the API/worker image only:
+   ```bash
+   docker compose -f docker-compose.yml -f docker-compose.prod.yml \
+     --env-file .env.prod up -d --build --force-recreate api worker \
+     --no-deps  # keep PG/MinIO/OS/Qdrant intact
+   ```
+3. Data services (PostgreSQL, MinIO, OpenSearch, Qdrant) are stateful and
+   should not be rolled back casually. Restore from backup via the standard
+   TeamHUB Suite backup runbook.
+
+## Scaling notes (~70k PDFs)
+
+- Workers horizontally scale: `docker compose up -d --scale worker=8`.
+- Set `EMBEDDING_DEVICE=cuda` on a GPU-capable worker image for ~10× embedding
+  throughput.
+- OpenSearch single shard suffices to ~10M chunks; increase shards and add
+  replicas in prod.
+- Qdrant single-node OK for ~5M vectors; switch to cluster build beyond that.
+
+## Common one-liners
+
+```bash
+# count indexed chunks in OpenSearch
+curl 'http://localhost:9200/legacy_chunks/_count' | jq .
+
+# inspect Qdrant collection
+curl 'http://localhost:6333/collections/legacy_chunks' | jq .
+
+# list MinIO buckets
+docker compose exec minio mc alias set local http://localhost:9000 \
+  "$MINIO_ACCESS_KEY" "$MINIO_SECRET_KEY"
+docker compose exec minio mc ls local
+
+# how many docs reached INDEXING_COMPLETED
+docker compose exec postgres psql -U legacyhub -d legacyhub -c \
+  "SELECT status, COUNT(*) FROM documents GROUP BY status;"
+```
diff --git a/alembic.ini b/alembic.ini
new file mode 100644
index 0000000..4aaf2b6
--- /dev/null
+++ b/alembic.ini
@@ -0,0 +1,40 @@
+[alembic]
+script_location = app/db/migrations
+prepend_sys_path = .
+sqlalchemy.url = driver://user:pass@host/dbname
+
+[post_write_hooks]
+
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000..7872bc4
--- /dev/null
+++ b/app/__init__.py
@@ -0,0 +1,3 @@
+"""LegacyHUB - knowledge indexing and hybrid search over legacy PDF archives."""
+
+__version__ = "0.1.0"
diff --git a/app/api/__init__.py b/app/api/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/api/routes_health.py b/app/api/routes_health.py
new file mode 100644
index 0000000..aa2e550
--- /dev/null
+++ b/app/api/routes_health.py
@@ -0,0 +1,96 @@
+"""Health endpoint - probes Postgres, MinIO, OpenSearch, Qdrant, Redis."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from fastapi import APIRouter
+from sqlalchemy import text
+
+from app import __version__
+from app.api.schemas import ComponentHealth, HealthResponse
+from app.config import settings
+from app.db.session import get_engine
+from app.logging_config import get_logger
+from app.storage.minio_client import get_storage
+
+logger = get_logger(__name__)
+
+router = APIRouter(tags=["health"])
+
+
+def _check_postgres() -> ComponentHealth:
+    try:
+        with get_engine().connect() as conn:
+            conn.execute(text("SELECT 1"))
+        return ComponentHealth(name="postgres", status="ok")
+    except Exception as exc:  # noqa: BLE001
+        return ComponentHealth(name="postgres", status="error", detail={"error": str(exc)})
+
+
+def _check_minio() -> ComponentHealth:
+    info: dict[str, Any] = get_storage().health()
+    if info.get("status") == "ok":
+        return ComponentHealth(name="minio", status="ok", detail=info)
+    return ComponentHealth(name="minio", status="error", detail=info)
+
+
+def _check_opensearch() -> ComponentHealth:
+    try:
+        from app.indexing.opensearch_client import get_opensearch
+
+        client = get_opensearch()
+        info = client.cluster.health()
+        cluster_status = info.get("status")
+        status = "ok" if cluster_status in ("green", "yellow") else "degraded"
+        return ComponentHealth(
+            name="opensearch",
+            status=status,  # type: ignore[arg-type]
+            detail={"cluster_status": cluster_status, "nodes": info.get("number_of_nodes")},
+        )
+    except Exception as exc:  # noqa: BLE001
+        return ComponentHealth(name="opensearch", status="error", detail={"error": str(exc)})
+
+
+def _check_qdrant() -> ComponentHealth:
+    try:
+        from app.indexing.qdrant_client import get_qdrant
+
+        client = get_qdrant()
+        cols = client.get_collections()
+        return ComponentHealth(
+            name="qdrant",
+            status="ok",
+            detail={"collections": [c.name for c in cols.collections]},
+        )
+    except Exception as exc:  # noqa: BLE001
+        return ComponentHealth(name="qdrant", status="error", detail={"error": str(exc)})
+
+
+def _check_redis() -> ComponentHealth:
+    try:
+        import redis
+
+        r = redis.Redis.from_url(settings.redis_url, socket_connect_timeout=2)
+        r.ping()
+        return ComponentHealth(name="redis", status="ok")
+    except Exception as exc:  # noqa: BLE001
+        return ComponentHealth(name="redis", status="error", detail={"error": str(exc)})
+
+
+@router.get("/health", response_model=HealthResponse)
+def health() -> HealthResponse:
+    components = [
+        _check_postgres(),
+        _check_minio(),
+        _check_opensearch(),
+        _check_qdrant(),
+        _check_redis(),
+    ]
+    if any(c.status == "error" for c in components):
+        overall = "error"
+    elif any(c.status == "degraded" for c in components):
+        overall = "degraded"
+    else:
+        overall = "ok"
+    return HealthResponse(status=overall, version=__version__, components=components)  # type: ignore[arg-type]
diff --git a/app/api/routes_ingestion.py b/app/api/routes_ingestion.py
new file mode 100644
index 0000000..eaf8e08
--- /dev/null
+++ b/app/api/routes_ingestion.py
@@ -0,0 +1,63 @@
+"""Ingestion endpoints."""
+
+from __future__ import annotations
+
+import uuid
+from pathlib import Path
+
+from fastapi import APIRouter, HTTPException
+
+from app.api.schemas import IngestFolderRequest, IngestFolderResponse
+from app.logging_config import get_logger
+
+logger = get_logger(__name__)
+
+router = APIRouter(prefix="/ingest", tags=["ingestion"])
+
+
+@router.post("/folder", response_model=IngestFolderResponse)
+def ingest_folder(req: IngestFolderRequest) -> IngestFolderResponse:
+    """Discover all PDFs under ``path`` and queue them for processing.
+
+    The request returns immediately after the discovery pass. Per-document
+    OCR / extraction / indexing happens asynchronously in Celery workers.
+    """
+    folder = Path(req.path)
+    if not folder.exists() or not folder.is_dir():
+        raise HTTPException(status_code=400, detail=f"Folder not found: {req.path}")
+
+    # Lazy import - keeps module load light.
+    from app.ingestion.scanner import discover_documents
+    from app.workers.tasks import process_document
+
+    run_id = uuid.uuid4()
+    discovered, queued, dups, invalid = 0, 0, 0, 0
+
+    for record in discover_documents(folder, recursive=req.recursive, force=req.force):
+        discovered += 1
+        if record.duplicate and not req.force:
+            dups += 1
+            continue
+        if not record.document_id:
+            invalid += 1
+            continue
+        process_document.delay(str(record.document_id), str(run_id))
+        queued += 1
+
+    logger.info(
+        "ingest.folder.queued",
+        path=str(folder),
+        discovered=discovered,
+        queued=queued,
+        skipped_duplicates=dups,
+        invalid=invalid,
+        run_id=str(run_id),
+    )
+
+    return IngestFolderResponse(
+        run_id=run_id,
+        discovered=discovered,
+        queued=queued,
+        skipped_duplicates=dups,
+        invalid_files=invalid,
+    )
diff --git a/app/api/routes_search.py b/app/api/routes_search.py
new file mode 100644
index 0000000..b50f00b
--- /dev/null
+++ b/app/api/routes_search.py
@@ -0,0 +1,16 @@
+"""Search endpoint - lexical / semantic / hybrid."""
+
+from __future__ import annotations
+
+from fastapi import APIRouter
+
+from app.api.schemas import SearchRequest, SearchResponse
+
+router = APIRouter(prefix="/search", tags=["search"])
+
+
+@router.post("", response_model=SearchResponse)
+def search(req: SearchRequest) -> SearchResponse:
+    from app.indexing.hybrid_search import run_search
+
+    return run_search(req)
diff --git a/app/api/schemas.py b/app/api/schemas.py
new file mode 100644
index 0000000..e06191e
--- /dev/null
+++ b/app/api/schemas.py
@@ -0,0 +1,99 @@
+"""Pydantic request/response schemas for the LegacyHUB API."""
+
+from __future__ import annotations
+
+import uuid
+from datetime import datetime
+from typing import Any, Literal
+
+from pydantic import BaseModel, Field
+
+
+# ---------------- Health ----------------
+
+class ComponentHealth(BaseModel):
+    name: str
+    status: Literal["ok", "error", "degraded"]
+    detail: dict[str, Any] = Field(default_factory=dict)
+
+
+class HealthResponse(BaseModel):
+    status: Literal["ok", "error", "degraded"]
+    version: str
+    components: list[ComponentHealth]
+
+
+# ---------------- Ingestion ----------------
+
+class IngestFolderRequest(BaseModel):
+    path: str = Field(..., description="Absolute path inside the API container")
+    recursive: bool = True
+    force: bool = False
+
+
+class IngestFolderResponse(BaseModel):
+    run_id: uuid.UUID
+    discovered: int
+    queued: int
+    skipped_duplicates: int
+    invalid_files: int
+
+
+class DocumentSummary(BaseModel):
+    id: uuid.UUID
+    original_file_name: str
+    source_path: str
+    sha256: str
+    status: str
+    file_size_bytes: int
+    created_at: datetime
+
+
+# ---------------- Search ----------------
+
+SearchMode = Literal["lexical", "semantic", "hybrid"]
+
+
+class SearchFilters(BaseModel):
+    document_id: uuid.UUID | None = None
+    source_path: str | None = None
+    block_type: str | None = None
+    min_ocr_confidence: float | None = Field(None, ge=0.0, le=1.0)
+
+
+class SearchRequest(BaseModel):
+    query: str = Field(..., min_length=1)
+    limit: int = Field(10, ge=1, le=100)
+    filters: SearchFilters = Field(default_factory=SearchFilters)
+    search_mode: SearchMode = "hybrid"
+
+
+class Citation(BaseModel):
+    pdf: str
+    page: int
+    block_id: str | None = None
+    table_id: str | None = None
+    figure_id: str | None = None
+
+
+class SearchHit(BaseModel):
+    rank: int
+    score: float
+    document_id: uuid.UUID
+    chunk_id: uuid.UUID
+    original_file_name: str
+    source_path: str
+    page_number: int
+    block_type: str
+    text: str
+    citation: Citation
+    quality_flags: dict[str, Any] = Field(default_factory=dict)
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+class SearchResponse(BaseModel):
+    query: str
+    mode: SearchMode
+    total_candidates: int
+    reranked: bool
+    results: list[SearchHit]
diff --git a/app/config.py b/app/config.py
new file mode 100644
index 0000000..400b08a
--- /dev/null
+++ b/app/config.py
@@ -0,0 +1,111 @@
+"""Centralized typed configuration loaded from environment variables.
+
+All other modules import :data:`settings` and never touch ``os.environ`` directly.
+"""
+
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import Literal
+
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        case_sensitive=False,
+        extra="ignore",
+    )
+
+    # ---------------- App ----------------
+    app_log_level: str = Field("INFO", alias="APP_LOG_LEVEL")
+    app_host: str = Field("0.0.0.0", alias="APP_HOST")
+    app_port: int = Field(8000, alias="APP_PORT")
+    app_input_dir: str = Field("/data/input", alias="APP_INPUT_DIR")
+    app_work_dir: str = Field("/data/work", alias="APP_WORK_DIR")
+    app_api_prefix: str = Field("/api/v1", alias="APP_API_PREFIX")
+
+    # ---------------- Postgres ----------------
+    postgres_host: str = Field("postgres", alias="POSTGRES_HOST")
+    postgres_port: int = Field(5432, alias="POSTGRES_PORT")
+    postgres_db: str = Field("legacyhub", alias="POSTGRES_DB")
+    postgres_user: str = Field("legacyhub", alias="POSTGRES_USER")
+    postgres_password: str = Field("legacyhub", alias="POSTGRES_PASSWORD")
+
+    @property
+    def database_url(self) -> str:
+        return (
+            f"postgresql+psycopg://{self.postgres_user}:{self.postgres_password}"
+            f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}"
+        )
+
+    # ---------------- MinIO ----------------
+    minio_endpoint: str = Field("minio:9000", alias="MINIO_ENDPOINT")
+    minio_access_key: str = Field("legacyhub", alias="MINIO_ACCESS_KEY")
+    minio_secret_key: str = Field("legacyhub-secret", alias="MINIO_SECRET_KEY")
+    minio_bucket_originals: str = Field("legacyhub-originals", alias="MINIO_BUCKET_ORIGINALS")
+    minio_bucket_derived: str = Field("legacyhub-derived", alias="MINIO_BUCKET_DERIVED")
+    minio_secure: bool = Field(False, alias="MINIO_SECURE")
+    minio_region: str = Field("us-east-1", alias="MINIO_REGION")
+
+    # ---------------- OpenSearch ----------------
+    opensearch_host: str = Field("opensearch", alias="OPENSEARCH_HOST")
+    opensearch_port: int = Field(9200, alias="OPENSEARCH_PORT")
+    opensearch_use_ssl: bool = Field(False, alias="OPENSEARCH_USE_SSL")
+    opensearch_verify_certs: bool = Field(False, alias="OPENSEARCH_VERIFY_CERTS")
+    opensearch_user: str = Field("", alias="OPENSEARCH_USER")
+    opensearch_password: str = Field("", alias="OPENSEARCH_PASSWORD")
+    opensearch_index_chunks: str = Field("legacy_chunks", alias="OPENSEARCH_INDEX_CHUNKS")
+
+    # ---------------- Qdrant ----------------
+    qdrant_host: str = Field("qdrant", alias="QDRANT_HOST")
+    qdrant_port: int = Field(6333, alias="QDRANT_PORT")
+    qdrant_api_key: str = Field("", alias="QDRANT_API_KEY")
+    qdrant_collection_chunks: str = Field("legacy_chunks", alias="QDRANT_COLLECTION_CHUNKS")
+
+    # ---------------- Redis ----------------
+    redis_url: str = Field("redis://redis:6379/0", alias="REDIS_URL")
+
+    # ---------------- OCR ----------------
+    ocr_languages: str = Field("rus+eng", alias="OCR_LANGUAGES")
+    ocr_enabled: bool = Field(True, alias="OCR_ENABLED")
+    docling_ocr_enabled: bool = Field(False, alias="DOCLING_OCR_ENABLED")
+    max_document_timeout_seconds: int = Field(180, alias="MAX_DOCUMENT_TIMEOUT_SECONDS")
+    ocr_deskew: bool = Field(True, alias="OCR_DESKEW")
+    ocr_clean: bool = Field(True, alias="OCR_CLEAN")
+    ocr_optimize: int = Field(1, alias="OCR_OPTIMIZE")
+
+    # ---------------- Embeddings / Reranker ----------------
+    embedding_model: str = Field("BAAI/bge-m3", alias="EMBEDDING_MODEL")
+    embedding_dim: int = Field(1024, alias="EMBEDDING_DIM")
+    embedding_device: Literal["cpu", "cuda", "mps"] = Field("cpu", alias="EMBEDDING_DEVICE")
+    embedding_batch_size: int = Field(8, alias="EMBEDDING_BATCH_SIZE")
+    embedding_normalize: bool = Field(True, alias="EMBEDDING_NORMALIZE")
+
+    reranker_model: str = Field("BAAI/bge-reranker-v2-m3", alias="RERANKER_MODEL")
+    reranker_device: Literal["cpu", "cuda", "mps"] = Field("cpu", alias="RERANKER_DEVICE")
+    reranker_enabled: bool = Field(True, alias="RERANKER_ENABLED")
+    reranker_batch_size: int = Field(8, alias="RERANKER_BATCH_SIZE")
+
+    # ---------------- Chunking ----------------
+    chunk_target_tokens: int = Field(700, alias="CHUNK_TARGET_TOKENS")
+    chunk_min_tokens: int = Field(120, alias="CHUNK_MIN_TOKENS")
+    chunk_max_tokens: int = Field(900, alias="CHUNK_MAX_TOKENS")
+    chunk_overlap_tokens: int = Field(100, alias="CHUNK_OVERLAP_TOKENS")
+
+    # ---------------- Hybrid search ----------------
+    hybrid_opensearch_top_k: int = Field(50, alias="HYBRID_OPENSEARCH_TOP_K")
+    hybrid_qdrant_top_k: int = Field(50, alias="HYBRID_QDRANT_TOP_K")
+    hybrid_rrf_k: int = Field(60, alias="HYBRID_RRF_K")
+    rerank_candidates: int = Field(40, alias="RERANK_CANDIDATES")
+
+
+@lru_cache(maxsize=1)
+def get_settings() -> Settings:
+    return Settings()  # type: ignore[call-arg]
+
+
+settings = get_settings()
diff --git a/app/db/__init__.py b/app/db/__init__.py
new file mode 100644
index 0000000..70c4785
--- /dev/null
+++ b/app/db/__init__.py
@@ -0,0 +1,3 @@
+from app.db.models import Base
+
+__all__ = ["Base"]
diff --git a/app/db/migrations/env.py b/app/db/migrations/env.py
new file mode 100644
index 0000000..94ea233
--- /dev/null
+++ b/app/db/migrations/env.py
@@ -0,0 +1,55 @@
+"""Alembic environment - online & offline migrations using app config."""
+
+from __future__ import annotations
+
+from logging.config import fileConfig
+
+from alembic import context
+from sqlalchemy import engine_from_config, pool
+
+from app.config import settings
+from app.db.models import Base
+
+config = context.config
+config.set_main_option("sqlalchemy.url", settings.database_url)
+
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+target_metadata = Base.metadata
+
+
+def run_migrations_offline() -> None:
+    context.configure(
+        url=settings.database_url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+        compare_type=True,
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    section = config.get_section(config.config_ini_section, {})
+    section["sqlalchemy.url"] = settings.database_url
+    connectable = engine_from_config(
+        section,
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection,
+            target_metadata=target_metadata,
+            compare_type=True,
+        )
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/app/db/migrations/script.py.mako b/app/db/migrations/script.py.mako
new file mode 100644
index 0000000..9f734e6
--- /dev/null
+++ b/app/db/migrations/script.py.mako
@@ -0,0 +1,27 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+revision: str = ${repr(up_revision)}
+down_revision: str | None = ${repr(down_revision)}
+branch_labels: str | Sequence[str] | None = ${repr(branch_labels)}
+depends_on: str | Sequence[str] | None = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
diff --git a/app/db/migrations/versions/0001_initial.py b/app/db/migrations/versions/0001_initial.py
new file mode 100644
index 0000000..8fa3fad
--- /dev/null
+++ b/app/db/migrations/versions/0001_initial.py
@@ -0,0 +1,171 @@
+"""initial schema
+
+Revision ID: 0001_initial
+Revises:
+Create Date: 2026-05-10
+
+"""
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+revision: str = "0001_initial"
+down_revision: str | None = None
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "documents",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("source_path", sa.Text, nullable=False),
+        sa.Column("original_file_name", sa.Text, nullable=False),
+        sa.Column("sha256", sa.String(64), nullable=False, unique=True),
+        sa.Column("file_size_bytes", sa.BigInteger, nullable=False),
+        sa.Column("mime_type", sa.Text, nullable=False, server_default="application/pdf"),
+        sa.Column("language_hint", sa.Text, nullable=True),
+        sa.Column("status", sa.String(64), nullable=False, server_default="DISCOVERED"),
+        sa.Column("error_message", sa.Text, nullable=True),
+        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
+    )
+    op.create_index("ix_documents_status", "documents", ["status"])
+    op.create_index("ix_documents_sha256", "documents", ["sha256"])
+
+    op.create_table(
+        "document_artifacts",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("document_id", postgresql.UUID(as_uuid=True),
+                  sa.ForeignKey("documents.id", ondelete="CASCADE"), nullable=False),
+        sa.Column("artifact_type", sa.String(64), nullable=False),
+        sa.Column("storage_bucket", sa.Text, nullable=False),
+        sa.Column("storage_key", sa.Text, nullable=False),
+        sa.Column("page_number", sa.Integer, nullable=True),
+        sa.Column("checksum", sa.String(64), nullable=True),
+        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
+    )
+    op.create_index("ix_artifacts_doc_type", "document_artifacts", ["document_id", "artifact_type"])
+
+    op.create_table(
+        "pages",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("document_id", postgresql.UUID(as_uuid=True),
+                  sa.ForeignKey("documents.id", ondelete="CASCADE"), nullable=False),
+        sa.Column("page_number", sa.Integer, nullable=False),
+        sa.Column("text", sa.Text, nullable=False, server_default=""),
+        sa.Column("ocr_confidence", sa.Float, nullable=True),
+        sa.Column("has_tables", sa.Boolean, nullable=False, server_default=sa.false()),
+        sa.Column("has_figures", sa.Boolean, nullable=False, server_default=sa.false()),
+        sa.Column("has_handwriting", sa.Boolean, nullable=False, server_default=sa.false()),
+        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.UniqueConstraint("document_id", "page_number", name="uq_pages_doc_page"),
+    )
+
+    op.create_table(
+        "chunks",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("document_id", postgresql.UUID(as_uuid=True),
+                  sa.ForeignKey("documents.id", ondelete="CASCADE"), nullable=False),
+        sa.Column("page_id", postgresql.UUID(as_uuid=True),
+                  sa.ForeignKey("pages.id", ondelete="SET NULL"), nullable=True),
+        sa.Column("page_number", sa.Integer, nullable=False),
+        sa.Column("block_id", sa.Text, nullable=True),
+        sa.Column("chunk_index", sa.Integer, nullable=False),
+        sa.Column("block_type", sa.String(32), nullable=False, server_default="paragraph"),
+        sa.Column("text", sa.Text, nullable=False),
+        sa.Column("normalized_text", sa.Text, nullable=False, server_default=""),
+        sa.Column("token_count", sa.Integer, nullable=True),
+        sa.Column("ocr_confidence", sa.Float, nullable=True),
+        sa.Column("quality_flags", postgresql.JSONB, nullable=False, server_default=sa.text("'{}'::jsonb")),
+        sa.Column("metadata", postgresql.JSONB, nullable=False, server_default=sa.text("'{}'::jsonb")),
+        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.UniqueConstraint("document_id", "chunk_index", name="uq_chunks_doc_idx"),
+    )
+    op.create_index("ix_chunks_doc_page", "chunks", ["document_id", "page_number"])
+    op.create_index("ix_chunks_block_type", "chunks", ["block_type"])
+
+    op.create_table(
+        "tables",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("document_id", postgresql.UUID(as_uuid=True),
+                  sa.ForeignKey("documents.id", ondelete="CASCADE"), nullable=False),
+        sa.Column("page_id", postgresql.UUID(as_uuid=True),
+                  sa.ForeignKey("pages.id", ondelete="SET NULL"), nullable=True),
+        sa.Column("page_number", sa.Integer, nullable=False),
+        sa.Column("table_index", sa.Integer, nullable=False),
+        sa.Column("markdown", sa.Text, nullable=False, server_default=""),
+        sa.Column("csv_text", sa.Text, nullable=True),
+        sa.Column("json_data", postgresql.JSONB, nullable=True),
+        sa.Column("summary", sa.Text, nullable=True),
+        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.UniqueConstraint("document_id", "table_index", name="uq_tables_doc_idx"),
+    )
+
+    op.create_table(
+        "figures",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("document_id", postgresql.UUID(as_uuid=True),
+                  sa.ForeignKey("documents.id", ondelete="CASCADE"), nullable=False),
+        sa.Column("page_id", postgresql.UUID(as_uuid=True),
+                  sa.ForeignKey("pages.id", ondelete="SET NULL"), nullable=True),
+        sa.Column("page_number", sa.Integer, nullable=False),
+        sa.Column("figure_index", sa.Integer, nullable=False),
+        sa.Column("caption", sa.Text, nullable=True),
+        sa.Column("description", sa.Text, nullable=True),
+        sa.Column("storage_bucket", sa.Text, nullable=True),
+        sa.Column("storage_key", sa.Text, nullable=True),
+        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.UniqueConstraint("document_id", "figure_index", name="uq_figures_doc_idx"),
+    )
+
+    op.create_table(
+        "ingestion_runs",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("started_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.Column("finished_at", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("status", sa.String(32), nullable=False, server_default="RUNNING"),
+        sa.Column("source_folder", sa.Text, nullable=False),
+        sa.Column("total_files", sa.Integer, nullable=False, server_default="0"),
+        sa.Column("processed_files", sa.Integer, nullable=False, server_default="0"),
+        sa.Column("failed_files", sa.Integer, nullable=False, server_default="0"),
+        sa.Column("metadata", postgresql.JSONB, nullable=False, server_default=sa.text("'{}'::jsonb")),
+    )
+
+    op.create_table(
+        "processing_events",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("run_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column("document_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column("stage", sa.String(64), nullable=False),
+        sa.Column("level", sa.String(16), nullable=False, server_default="INFO"),
+        sa.Column("message", sa.Text, nullable=False),
+        sa.Column("data", postgresql.JSONB, nullable=False, server_default=sa.text("'{}'::jsonb")),
+        sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
+    )
+    op.create_index("ix_events_doc", "processing_events", ["document_id"])
+    op.create_index("ix_events_run", "processing_events", ["run_id"])
+    op.create_index("ix_events_stage", "processing_events", ["stage"])
+
+
+def downgrade() -> None:
+    op.drop_index("ix_events_stage", table_name="processing_events")
+    op.drop_index("ix_events_run", table_name="processing_events")
+    op.drop_index("ix_events_doc", table_name="processing_events")
+    op.drop_table("processing_events")
+    op.drop_table("ingestion_runs")
+    op.drop_table("figures")
+    op.drop_table("tables")
+    op.drop_index("ix_chunks_block_type", table_name="chunks")
+    op.drop_index("ix_chunks_doc_page", table_name="chunks")
+    op.drop_table("chunks")
+    op.drop_table("pages")
+    op.drop_index("ix_artifacts_doc_type", table_name="document_artifacts")
+    op.drop_table("document_artifacts")
+    op.drop_index("ix_documents_sha256", table_name="documents")
+    op.drop_index("ix_documents_status", table_name="documents")
+    op.drop_table("documents")
diff --git a/app/db/models.py b/app/db/models.py
new file mode 100644
index 0000000..05a0425
--- /dev/null
+++ b/app/db/models.py
@@ -0,0 +1,266 @@
+"""SQLAlchemy ORM models for LegacyHUB."""
+
+from __future__ import annotations
+
+import uuid
+from datetime import datetime
+from typing import Any
+
+from sqlalchemy import (
+    BigInteger,
+    Boolean,
+    DateTime,
+    Float,
+    ForeignKey,
+    Index,
+    Integer,
+    String,
+    Text,
+    UniqueConstraint,
+    func,
+)
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
+
+
+class Base(DeclarativeBase):
+    pass
+
+
+# ---- Status / type literals (kept as plain strings to avoid PG enum churn) ----
+
+class DocumentStatus:
+    DISCOVERED = "DISCOVERED"
+    STORED_ORIGINAL = "STORED_ORIGINAL"
+    OCR_STARTED = "OCR_STARTED"
+    OCR_COMPLETED = "OCR_COMPLETED"
+    OCR_FAILED = "OCR_FAILED"
+    EXTRACTION_STARTED = "EXTRACTION_STARTED"
+    EXTRACTION_COMPLETED = "EXTRACTION_COMPLETED"
+    EXTRACTION_FAILED = "EXTRACTION_FAILED"
+    CHUNKING_COMPLETED = "CHUNKING_COMPLETED"
+    INDEXING_COMPLETED = "INDEXING_COMPLETED"
+    FAILED = "FAILED"
+
+
+class ArtifactType:
+    ORIGINAL_PDF = "original_pdf"
+    OCR_PDF = "ocr_pdf"
+    DOCLING_JSON = "docling_json"
+    MARKDOWN = "markdown"
+    PAGE_IMAGE = "page_image"
+    FIGURE_CROP = "figure_crop"
+    TABLE_JSON = "table_json"
+
+
+class BlockType:
+    TITLE = "title"
+    HEADING = "heading"
+    PARAGRAPH = "paragraph"
+    LIST = "list"
+    TABLE = "table"
+    FIGURE_CAPTION = "figure_caption"
+    FIGURE_DESCRIPTION = "figure_description"
+    HANDWRITING = "handwriting"
+    UNKNOWN = "unknown"
+
+
+# ---- Tables ----
+
+class Document(Base):
+    __tablename__ = "documents"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    source_path: Mapped[str] = mapped_column(Text, nullable=False)
+    original_file_name: Mapped[str] = mapped_column(Text, nullable=False)
+    sha256: Mapped[str] = mapped_column(String(64), nullable=False, unique=True, index=True)
+    file_size_bytes: Mapped[int] = mapped_column(BigInteger, nullable=False)
+    mime_type: Mapped[str] = mapped_column(Text, nullable=False, default="application/pdf")
+    language_hint: Mapped[str | None] = mapped_column(Text, nullable=True)
+    status: Mapped[str] = mapped_column(
+        String(64), nullable=False, default=DocumentStatus.DISCOVERED, index=True
+    )
+    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
+    )
+
+    artifacts: Mapped[list[DocumentArtifact]] = relationship(
+        back_populates="document", cascade="all, delete-orphan"
+    )
+    pages: Mapped[list[Page]] = relationship(
+        back_populates="document", cascade="all, delete-orphan"
+    )
+    chunks: Mapped[list[Chunk]] = relationship(
+        back_populates="document", cascade="all, delete-orphan"
+    )
+
+
+class DocumentArtifact(Base):
+    __tablename__ = "document_artifacts"
+    __table_args__ = (
+        Index("ix_artifacts_doc_type", "document_id", "artifact_type"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    document_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("documents.id", ondelete="CASCADE"), nullable=False
+    )
+    artifact_type: Mapped[str] = mapped_column(String(64), nullable=False)
+    storage_bucket: Mapped[str] = mapped_column(Text, nullable=False)
+    storage_key: Mapped[str] = mapped_column(Text, nullable=False)
+    page_number: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    checksum: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    document: Mapped[Document] = relationship(back_populates="artifacts")
+
+
+class Page(Base):
+    __tablename__ = "pages"
+    __table_args__ = (
+        UniqueConstraint("document_id", "page_number", name="uq_pages_doc_page"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    document_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("documents.id", ondelete="CASCADE"), nullable=False
+    )
+    page_number: Mapped[int] = mapped_column(Integer, nullable=False)
+    text: Mapped[str] = mapped_column(Text, nullable=False, default="")
+    ocr_confidence: Mapped[float | None] = mapped_column(Float, nullable=True)
+    has_tables: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
+    has_figures: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
+    has_handwriting: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    document: Mapped[Document] = relationship(back_populates="pages")
+    chunks: Mapped[list[Chunk]] = relationship(back_populates="page")
+
+
+class Chunk(Base):
+    __tablename__ = "chunks"
+    __table_args__ = (
+        UniqueConstraint("document_id", "chunk_index", name="uq_chunks_doc_idx"),
+        Index("ix_chunks_doc_page", "document_id", "page_number"),
+        Index("ix_chunks_block_type", "block_type"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    document_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("documents.id", ondelete="CASCADE"), nullable=False
+    )
+    page_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("pages.id", ondelete="SET NULL"), nullable=True
+    )
+    page_number: Mapped[int] = mapped_column(Integer, nullable=False)
+    block_id: Mapped[str | None] = mapped_column(Text, nullable=True)
+    chunk_index: Mapped[int] = mapped_column(Integer, nullable=False)
+    block_type: Mapped[str] = mapped_column(String(32), nullable=False, default=BlockType.PARAGRAPH)
+    text: Mapped[str] = mapped_column(Text, nullable=False)
+    normalized_text: Mapped[str] = mapped_column(Text, nullable=False, default="")
+    token_count: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    ocr_confidence: Mapped[float | None] = mapped_column(Float, nullable=True)
+    quality_flags: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict)
+    chunk_metadata: Mapped[dict[str, Any]] = mapped_column(
+        "metadata", JSONB, nullable=False, default=dict
+    )
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    document: Mapped[Document] = relationship(back_populates="chunks")
+    page: Mapped[Page | None] = relationship(back_populates="chunks")
+
+
+class Table(Base):
+    __tablename__ = "tables"
+    __table_args__ = (
+        UniqueConstraint("document_id", "table_index", name="uq_tables_doc_idx"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    document_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("documents.id", ondelete="CASCADE"), nullable=False
+    )
+    page_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("pages.id", ondelete="SET NULL"), nullable=True
+    )
+    page_number: Mapped[int] = mapped_column(Integer, nullable=False)
+    table_index: Mapped[int] = mapped_column(Integer, nullable=False)
+    markdown: Mapped[str] = mapped_column(Text, nullable=False, default="")
+    csv_text: Mapped[str | None] = mapped_column(Text, nullable=True)
+    json_data: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True)
+    summary: Mapped[str | None] = mapped_column(Text, nullable=True)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+
+class Figure(Base):
+    __tablename__ = "figures"
+    __table_args__ = (
+        UniqueConstraint("document_id", "figure_index", name="uq_figures_doc_idx"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    document_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("documents.id", ondelete="CASCADE"), nullable=False
+    )
+    page_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("pages.id", ondelete="SET NULL"), nullable=True
+    )
+    page_number: Mapped[int] = mapped_column(Integer, nullable=False)
+    figure_index: Mapped[int] = mapped_column(Integer, nullable=False)
+    caption: Mapped[str | None] = mapped_column(Text, nullable=True)
+    description: Mapped[str | None] = mapped_column(Text, nullable=True)
+    storage_bucket: Mapped[str | None] = mapped_column(Text, nullable=True)
+    storage_key: Mapped[str | None] = mapped_column(Text, nullable=True)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+
+class IngestionRun(Base):
+    __tablename__ = "ingestion_runs"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    started_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+    finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+    status: Mapped[str] = mapped_column(String(32), nullable=False, default="RUNNING")
+    source_folder: Mapped[str] = mapped_column(Text, nullable=False)
+    total_files: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
+    processed_files: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
+    failed_files: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
+    run_metadata: Mapped[dict[str, Any]] = mapped_column(
+        "metadata", JSONB, nullable=False, default=dict
+    )
+
+
+class ProcessingEvent(Base):
+    __tablename__ = "processing_events"
+    __table_args__ = (
+        Index("ix_events_doc", "document_id"),
+        Index("ix_events_run", "run_id"),
+        Index("ix_events_stage", "stage"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    run_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), nullable=True)
+    document_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), nullable=True)
+    stage: Mapped[str] = mapped_column(String(64), nullable=False)
+    level: Mapped[str] = mapped_column(String(16), nullable=False, default="INFO")
+    message: Mapped[str] = mapped_column(Text, nullable=False)
+    data: Mapped[dict[str, Any]] = mapped_column(JSONB, nullable=False, default=dict)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
diff --git a/app/db/session.py b/app/db/session.py
new file mode 100644
index 0000000..42fc4a6
--- /dev/null
+++ b/app/db/session.py
@@ -0,0 +1,66 @@
+"""SQLAlchemy engine and session factory."""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+from contextlib import contextmanager
+
+from sqlalchemy import create_engine
+from sqlalchemy.engine import Engine
+from sqlalchemy.orm import Session, sessionmaker
+
+from app.config import settings
+
+_engine: Engine | None = None
+_SessionFactory: sessionmaker[Session] | None = None
+
+
+def get_engine() -> Engine:
+    global _engine
+    if _engine is None:
+        _engine = create_engine(
+            settings.database_url,
+            pool_pre_ping=True,
+            pool_size=10,
+            max_overflow=20,
+            future=True,
+        )
+    return _engine
+
+
+def get_session_factory() -> sessionmaker[Session]:
+    global _SessionFactory
+    if _SessionFactory is None:
+        _SessionFactory = sessionmaker(
+            bind=get_engine(),
+            autoflush=False,
+            autocommit=False,
+            expire_on_commit=False,
+            future=True,
+        )
+    return _SessionFactory
+
+
+@contextmanager
+def session_scope() -> Iterator[Session]:
+    """Provide a transactional scope: commits on success, rolls back on error."""
+    factory = get_session_factory()
+    session = factory()
+    try:
+        yield session
+        session.commit()
+    except Exception:
+        session.rollback()
+        raise
+    finally:
+        session.close()
+
+
+def get_db() -> Iterator[Session]:
+    """FastAPI dependency."""
+    factory = get_session_factory()
+    session = factory()
+    try:
+        yield session
+    finally:
+        session.close()
diff --git a/app/indexing/__init__.py b/app/indexing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/indexing/embeddings.py b/app/indexing/embeddings.py
new file mode 100644
index 0000000..3aa1c75
--- /dev/null
+++ b/app/indexing/embeddings.py
@@ -0,0 +1,90 @@
+"""BGE-M3 dense embedder with batching and CPU/GPU support.
+
+We prefer FlagEmbedding's ``BGEM3FlagModel`` because it is the canonical
+implementation and supports dense + sparse output. We fall back to
+``sentence-transformers`` for portability.
+"""
+
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import Sequence
+
+import numpy as np
+
+from app.config import settings
+from app.logging_config import get_logger
+
+logger = get_logger(__name__)
+
+
+class Embedder:
+    def __init__(self, model_name: str, device: str, normalize: bool, batch_size: int) -> None:
+        self.model_name = model_name
+        self.device = device
+        self.normalize = normalize
+        self.batch_size = batch_size
+        self._impl = "flagembedding"
+        self._model = None
+        self._st_model = None
+        self._load()
+
+    def _load(self) -> None:
+        try:
+            from FlagEmbedding import BGEM3FlagModel  # type: ignore
+            use_fp16 = self.device != "cpu"
+            self._model = BGEM3FlagModel(self.model_name, use_fp16=use_fp16, devices=self.device)
+            self._impl = "flagembedding"
+            logger.info("embedder.loaded", impl="flagembedding", model=self.model_name, device=self.device)
+            return
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("embedder.flagembedding_failed", error=str(exc))
+
+        from sentence_transformers import SentenceTransformer
+        self._st_model = SentenceTransformer(self.model_name, device=self.device)
+        self._impl = "sentence-transformers"
+        logger.info("embedder.loaded", impl="sentence-transformers", model=self.model_name, device=self.device)
+
+    def encode(self, texts: Sequence[str]) -> list[list[float]]:
+        if not texts:
+            return []
+        if self._impl == "flagembedding":
+            out = self._model.encode(  # type: ignore[union-attr]
+                list(texts),
+                batch_size=self.batch_size,
+                max_length=8192,
+                return_dense=True,
+                return_sparse=False,
+                return_colbert_vecs=False,
+            )
+            dense = out["dense_vecs"] if isinstance(out, dict) else out
+            arr = np.asarray(dense, dtype=np.float32)
+        else:
+            arr = self._st_model.encode(  # type: ignore[union-attr]
+                list(texts),
+                batch_size=self.batch_size,
+                normalize_embeddings=self.normalize,
+                convert_to_numpy=True,
+                show_progress_bar=False,
+            )
+            arr = arr.astype(np.float32)
+
+        if self.normalize and self._impl == "flagembedding":
+            norms = np.linalg.norm(arr, axis=1, keepdims=True)
+            norms[norms == 0] = 1.0
+            arr = arr / norms
+
+        return arr.tolist()
+
+    def encode_one(self, text: str) -> list[float]:
+        return self.encode([text])[0]
+
+
+@lru_cache(maxsize=1)
+def get_embedder() -> Embedder:
+    return Embedder(
+        model_name=settings.embedding_model,
+        device=settings.embedding_device,
+        normalize=settings.embedding_normalize,
+        batch_size=settings.embedding_batch_size,
+    )
diff --git a/app/indexing/hybrid_search.py b/app/indexing/hybrid_search.py
new file mode 100644
index 0000000..d7a2959
--- /dev/null
+++ b/app/indexing/hybrid_search.py
@@ -0,0 +1,327 @@
+"""Hybrid search: lexical (OpenSearch BM25) + semantic (Qdrant) + RRF + reranker.
+
+Always returns ``SearchResponse`` (never throws on missing index/collection -
+empty results are valid).
+"""
+
+from __future__ import annotations
+
+import uuid
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Any
+
+from qdrant_client.http import models as qm
+
+from app.api.schemas import (
+    Citation,
+    SearchFilters,
+    SearchHit,
+    SearchMode,
+    SearchRequest,
+    SearchResponse,
+)
+from app.config import settings
+from app.indexing.embeddings import get_embedder
+from app.indexing.opensearch_client import get_opensearch
+from app.indexing.qdrant_client import DENSE_VECTOR_NAME, get_qdrant
+from app.indexing.reranker import get_reranker
+from app.logging_config import get_logger
+from app.utils.text_cleaning import normalize_for_search
+
+logger = get_logger(__name__)
+
+
+@dataclass
+class _Candidate:
+    chunk_id: str
+    document_id: str
+    page_number: int
+    block_type: str
+    block_id: str | None
+    text: str
+    source_path: str
+    original_file_name: str
+    quality_flags: dict[str, Any]
+    metadata: dict[str, Any]
+    bm25_score: float | None = None
+    bm25_rank: int | None = None
+    dense_score: float | None = None
+    dense_rank: int | None = None
+
+
+def run_search(req: SearchRequest) -> SearchResponse:
+    mode: SearchMode = req.search_mode
+    filters = req.filters
+
+    lexical: list[_Candidate] = []
+    semantic: list[_Candidate] = []
+
+    if mode in ("lexical", "hybrid"):
+        try:
+            lexical = _lexical_search(req.query, filters, settings.hybrid_opensearch_top_k)
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("search.lexical_failed", error=str(exc))
+
+    if mode in ("semantic", "hybrid"):
+        try:
+            semantic = _semantic_search(req.query, filters, settings.hybrid_qdrant_top_k)
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("search.semantic_failed", error=str(exc))
+
+    merged = _merge(lexical, semantic, mode)
+    candidates = merged[: settings.rerank_candidates]
+
+    reranker = get_reranker()
+    reranked_flag = False
+    if settings.reranker_enabled and reranker.available and candidates:
+        scores = reranker.score(req.query, [c.text for c in candidates])
+        for c, s in zip(candidates, scores, strict=True):
+            c.dense_score = s
+        candidates.sort(key=lambda c: (c.dense_score or 0.0), reverse=True)
+        reranked_flag = True
+
+    final = candidates[: req.limit]
+
+    hits: list[SearchHit] = []
+    for rank, c in enumerate(final, start=1):
+        score = (
+            c.dense_score
+            if reranked_flag
+            else (c.dense_score if mode == "semantic" else c.bm25_score) or 0.0
+        )
+        hits.append(
+            SearchHit(
+                rank=rank,
+                score=float(score),
+                document_id=uuid.UUID(c.document_id),
+                chunk_id=uuid.UUID(c.chunk_id),
+                original_file_name=c.original_file_name,
+                source_path=c.source_path,
+                page_number=c.page_number,
+                block_type=c.block_type,
+                text=c.text,
+                citation=Citation(
+                    pdf=c.original_file_name,
+                    page=c.page_number,
+                    block_id=c.block_id,
+                    table_id=str(c.metadata.get("table_index")) if c.metadata.get("table_index") is not None else None,
+                    figure_id=str(c.metadata.get("figure_index")) if c.metadata.get("figure_index") is not None else None,
+                ),
+                quality_flags=c.quality_flags,
+                metadata=c.metadata,
+            )
+        )
+
+    return SearchResponse(
+        query=req.query,
+        mode=mode,
+        total_candidates=len(merged),
+        reranked=reranked_flag,
+        results=hits,
+    )
+
+
+# ---------------- lexical ----------------
+
+def _lexical_search(query: str, filters: SearchFilters, top_k: int) -> list[_Candidate]:
+    client = get_opensearch()
+    if not client.indices.exists(index=settings.opensearch_index_chunks):
+        return []
+
+    must = [
+        {
+            "multi_match": {
+                "query": query,
+                "fields": ["text^1.0", "text.ru^1.5", "text.en^1.5", "normalized_text^0.7"],
+                "type": "best_fields",
+                "operator": "or",
+            }
+        }
+    ]
+    norm = normalize_for_search(query)
+    if norm and norm != query.lower():
+        must.append({"match": {"normalized_text": {"query": norm, "boost": 0.5}}})
+
+    filter_clauses = _opensearch_filters(filters)
+    body = {
+        "size": top_k,
+        "query": {"bool": {"must": must, "filter": filter_clauses}},
+        "_source": [
+            "chunk_id",
+            "document_id",
+            "source_path",
+            "original_file_name",
+            "page_number",
+            "block_type",
+            "block_id",
+            "text",
+            "quality_flags",
+            "metadata",
+        ],
+    }
+    res = client.search(index=settings.opensearch_index_chunks, body=body, request_timeout=30)
+    out: list[_Candidate] = []
+    for rank, hit in enumerate(res.get("hits", {}).get("hits", []), start=1):
+        s = hit.get("_source", {})
+        out.append(
+            _Candidate(
+                chunk_id=s["chunk_id"],
+                document_id=s["document_id"],
+                page_number=int(s.get("page_number", 0)),
+                block_type=s.get("block_type", "paragraph"),
+                block_id=s.get("block_id"),
+                text=s.get("text", ""),
+                source_path=s.get("source_path", ""),
+                original_file_name=s.get("original_file_name", ""),
+                quality_flags=s.get("quality_flags") or {},
+                metadata=s.get("metadata") or {},
+                bm25_score=float(hit.get("_score") or 0.0),
+                bm25_rank=rank,
+            )
+        )
+    return out
+
+
+def _opensearch_filters(filters: SearchFilters) -> list[dict[str, Any]]:
+    clauses: list[dict[str, Any]] = []
+    if filters.document_id:
+        clauses.append({"term": {"document_id": str(filters.document_id)}})
+    if filters.source_path:
+        clauses.append({"term": {"source_path": filters.source_path}})
+    if filters.block_type:
+        clauses.append({"term": {"block_type": filters.block_type}})
+    if filters.min_ocr_confidence is not None:
+        clauses.append({"range": {"ocr_confidence": {"gte": filters.min_ocr_confidence}}})
+    return clauses
+
+
+# ---------------- semantic ----------------
+
+def _semantic_search(query: str, filters: SearchFilters, top_k: int) -> list[_Candidate]:
+    embedder = get_embedder()
+    vector = embedder.encode_one(query)
+    qf = _qdrant_filter(filters)
+
+    client = get_qdrant()
+    try:
+        results = client.query_points(
+            collection_name=settings.qdrant_collection_chunks,
+            query=vector,
+            using=DENSE_VECTOR_NAME,
+            limit=top_k,
+            with_payload=True,
+            query_filter=qf,
+        ).points
+    except Exception as exc:  # noqa: BLE001
+        logger.debug("qdrant.query_points_fallback", error=str(exc))
+        results = client.search(
+            collection_name=settings.qdrant_collection_chunks,
+            query_vector=(DENSE_VECTOR_NAME, vector),
+            query_filter=qf,
+            limit=top_k,
+            with_payload=True,
+        )
+
+    out: list[_Candidate] = []
+    for rank, p in enumerate(results, start=1):
+        payload = p.payload or {}
+        chunk_id = payload.get("chunk_id") or str(p.id)
+        out.append(
+            _Candidate(
+                chunk_id=str(chunk_id),
+                document_id=str(payload.get("document_id", "")),
+                page_number=int(payload.get("page_number") or 0),
+                block_type=payload.get("block_type", "paragraph"),
+                block_id=payload.get("block_id"),
+                text=payload.get("text_preview", ""),
+                source_path=payload.get("source_path", ""),
+                original_file_name=payload.get("original_file_name", ""),
+                quality_flags=payload.get("quality_flags") or {},
+                metadata=payload.get("metadata") or {},
+                dense_score=float(p.score or 0.0),
+                dense_rank=rank,
+            )
+        )
+    return out
+
+
+def _qdrant_filter(filters: SearchFilters) -> qm.Filter | None:
+    must: list[qm.FieldCondition | qm.Range] = []
+    if filters.document_id:
+        must.append(qm.FieldCondition(key="document_id", match=qm.MatchValue(value=str(filters.document_id))))
+    if filters.source_path:
+        must.append(qm.FieldCondition(key="source_path", match=qm.MatchValue(value=filters.source_path)))
+    if filters.block_type:
+        must.append(qm.FieldCondition(key="block_type", match=qm.MatchValue(value=filters.block_type)))
+    if filters.min_ocr_confidence is not None:
+        must.append(qm.FieldCondition(key="ocr_confidence", range=qm.Range(gte=filters.min_ocr_confidence)))
+    if not must:
+        return None
+    return qm.Filter(must=must)
+
+
+# ---------------- merge ----------------
+
+def _merge(lexical: list[_Candidate], semantic: list[_Candidate], mode: SearchMode) -> list[_Candidate]:
+    if mode == "lexical":
+        return lexical
+    if mode == "semantic":
+        return _hydrate_semantic_text(semantic)
+
+    by_id: dict[str, _Candidate] = {}
+    for c in lexical:
+        by_id[c.chunk_id] = c
+    for c in semantic:
+        if c.chunk_id in by_id:
+            by_id[c.chunk_id].dense_score = c.dense_score
+            by_id[c.chunk_id].dense_rank = c.dense_rank
+            if not by_id[c.chunk_id].text:
+                by_id[c.chunk_id].text = c.text
+        else:
+            by_id[c.chunk_id] = c
+
+    rrf: dict[str, float] = defaultdict(float)
+    k = settings.hybrid_rrf_k
+    for c in lexical:
+        if c.bm25_rank is not None:
+            rrf[c.chunk_id] += 1.0 / (k + c.bm25_rank)
+    for c in semantic:
+        if c.dense_rank is not None:
+            rrf[c.chunk_id] += 1.0 / (k + c.dense_rank)
+
+    items = sorted(by_id.values(), key=lambda c: rrf.get(c.chunk_id, 0.0), reverse=True)
+    return _hydrate_full_text(items)
+
+
+def _hydrate_full_text(candidates: list[_Candidate]) -> list[_Candidate]:
+    """For candidates whose text came only from Qdrant payload (preview), pull
+    the full chunk text from OpenSearch by id so the reranker sees full content.
+    """
+    missing = [c for c in candidates if len(c.text) <= 512]
+    if not missing:
+        return candidates
+    client = get_opensearch()
+    ids = [c.chunk_id for c in missing]
+    try:
+        res = client.mget(index=settings.opensearch_index_chunks, body={"ids": ids})
+    except Exception:
+        return candidates
+    by_id = {d["_id"]: d.get("_source", {}) for d in res.get("docs", []) if d.get("found")}
+    for c in missing:
+        s = by_id.get(c.chunk_id)
+        if s and s.get("text"):
+            c.text = s["text"]
+            if not c.original_file_name:
+                c.original_file_name = s.get("original_file_name", "")
+            if not c.source_path:
+                c.source_path = s.get("source_path", "")
+            if not c.metadata:
+                c.metadata = s.get("metadata") or {}
+            if not c.quality_flags:
+                c.quality_flags = s.get("quality_flags") or {}
+    return candidates
+
+
+def _hydrate_semantic_text(candidates: list[_Candidate]) -> list[_Candidate]:
+    return _hydrate_full_text(candidates)
diff --git a/app/indexing/opensearch_client.py b/app/indexing/opensearch_client.py
new file mode 100644
index 0000000..61679d0
--- /dev/null
+++ b/app/indexing/opensearch_client.py
@@ -0,0 +1,142 @@
+"""OpenSearch client + index bootstrap + chunk indexing helpers."""
+
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import Any, Iterable
+
+from opensearchpy import OpenSearch, RequestsHttpConnection
+from opensearchpy.helpers import bulk
+
+from app.config import settings
+from app.logging_config import get_logger
+
+logger = get_logger(__name__)
+
+# Index settings: 3 analyzers (russian, english, standard).
+# We index ``text`` with multi-fields (.ru, .en, .raw) so we can boost per language at query time.
+INDEX_SETTINGS: dict[str, Any] = {
+    "settings": {
+        "number_of_shards": 1,
+        "number_of_replicas": 0,
+        "analysis": {
+            "filter": {
+                "ru_stop": {"type": "stop", "stopwords": "_russian_"},
+                "ru_stemmer": {"type": "stemmer", "language": "russian"},
+                "en_stop": {"type": "stop", "stopwords": "_english_"},
+                "en_stemmer": {"type": "stemmer", "language": "english"},
+            },
+            "analyzer": {
+                "ru_analyzer": {
+                    "type": "custom",
+                    "tokenizer": "standard",
+                    "filter": ["lowercase", "ru_stop", "ru_stemmer"],
+                },
+                "en_analyzer": {
+                    "type": "custom",
+                    "tokenizer": "standard",
+                    "filter": ["lowercase", "en_stop", "en_stemmer"],
+                },
+                "code_analyzer": {
+                    "type": "custom",
+                    "tokenizer": "standard",
+                    "filter": ["lowercase"],
+                },
+            },
+        },
+    },
+    "mappings": {
+        "dynamic": "strict",
+        "properties": {
+            "chunk_id": {"type": "keyword"},
+            "document_id": {"type": "keyword"},
+            "source_path": {"type": "keyword"},
+            "original_file_name": {
+                "type": "text",
+                "fields": {"keyword": {"type": "keyword", "ignore_above": 512}},
+            },
+            "page_number": {"type": "integer"},
+            "block_type": {"type": "keyword"},
+            "block_id": {"type": "keyword"},
+            "text": {
+                "type": "text",
+                "analyzer": "code_analyzer",
+                "fields": {
+                    "ru": {"type": "text", "analyzer": "ru_analyzer"},
+                    "en": {"type": "text", "analyzer": "en_analyzer"},
+                },
+            },
+            "normalized_text": {
+                "type": "text",
+                "analyzer": "code_analyzer",
+            },
+            "ocr_confidence": {"type": "float"},
+            "language_hint": {"type": "keyword"},
+            "metadata": {"type": "object", "enabled": True},
+            "quality_flags": {"type": "object", "enabled": True},
+            "created_at": {"type": "date"},
+        },
+    },
+}
+
+
+@lru_cache(maxsize=1)
+def get_opensearch() -> OpenSearch:
+    auth = None
+    if settings.opensearch_user and settings.opensearch_password:
+        auth = (settings.opensearch_user, settings.opensearch_password)
+    return OpenSearch(
+        hosts=[{"host": settings.opensearch_host, "port": settings.opensearch_port}],
+        http_auth=auth,
+        use_ssl=settings.opensearch_use_ssl,
+        verify_certs=settings.opensearch_verify_certs,
+        ssl_show_warn=False,
+        connection_class=RequestsHttpConnection,
+        timeout=30,
+        max_retries=3,
+        retry_on_timeout=True,
+    )
+
+
+def ensure_index(index: str | None = None) -> None:
+    name = index or settings.opensearch_index_chunks
+    client = get_opensearch()
+    if client.indices.exists(index=name):
+        logger.debug("opensearch.index.exists", index=name)
+        return
+    logger.info("opensearch.index.create", index=name)
+    client.indices.create(index=name, body=INDEX_SETTINGS)
+
+
+def index_chunks(docs: Iterable[dict[str, Any]], index: str | None = None) -> tuple[int, int]:
+    """Bulk-upsert chunks. Returns (success, errors)."""
+    name = index or settings.opensearch_index_chunks
+    actions: list[dict[str, Any]] = []
+    for d in docs:
+        actions.append(
+            {
+                "_op_type": "index",
+                "_index": name,
+                "_id": d["chunk_id"],
+                "_source": d,
+            }
+        )
+    if not actions:
+        return 0, 0
+    success, errors = bulk(get_opensearch(), actions, raise_on_error=False, request_timeout=120)
+    if errors:
+        logger.warning("opensearch.bulk.errors", count=len(errors))
+    return success, len(errors) if isinstance(errors, list) else 0
+
+
+def delete_by_document(document_id: str, index: str | None = None) -> int:
+    name = index or settings.opensearch_index_chunks
+    client = get_opensearch()
+    if not client.indices.exists(index=name):
+        return 0
+    res = client.delete_by_query(
+        index=name,
+        body={"query": {"term": {"document_id": document_id}}},
+        refresh=True,
+    )
+    return int(res.get("deleted", 0))
diff --git a/app/indexing/qdrant_client.py b/app/indexing/qdrant_client.py
new file mode 100644
index 0000000..41f2b45
--- /dev/null
+++ b/app/indexing/qdrant_client.py
@@ -0,0 +1,103 @@
+"""Qdrant client + collection bootstrap + chunk upsert."""
+
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import Any, Sequence
+
+from qdrant_client import QdrantClient
+from qdrant_client.http import models as qm
+
+from app.config import settings
+from app.logging_config import get_logger
+
+logger = get_logger(__name__)
+
+DENSE_VECTOR_NAME = "dense"
+
+
+@lru_cache(maxsize=1)
+def get_qdrant() -> QdrantClient:
+    return QdrantClient(
+        host=settings.qdrant_host,
+        port=settings.qdrant_port,
+        api_key=settings.qdrant_api_key or None,
+        timeout=60,
+    )
+
+
+def ensure_collection(collection: str | None = None, dim: int | None = None) -> None:
+    name = collection or settings.qdrant_collection_chunks
+    vector_size = dim or settings.embedding_dim
+    client = get_qdrant()
+    existing = {c.name for c in client.get_collections().collections}
+    if name in existing:
+        logger.debug("qdrant.collection.exists", collection=name)
+        return
+    logger.info("qdrant.collection.create", collection=name, dim=vector_size)
+    client.create_collection(
+        collection_name=name,
+        vectors_config={
+            DENSE_VECTOR_NAME: qm.VectorParams(
+                size=vector_size,
+                distance=qm.Distance.COSINE,
+            )
+        },
+        optimizers_config=qm.OptimizersConfigDiff(default_segment_number=2),
+    )
+    # Payload indexes for filtering.
+    for field in ("document_id", "source_path", "block_type"):
+        client.create_payload_index(
+            collection_name=name,
+            field_name=field,
+            field_schema=qm.PayloadSchemaType.KEYWORD,
+        )
+    client.create_payload_index(
+        collection_name=name,
+        field_name="page_number",
+        field_schema=qm.PayloadSchemaType.INTEGER,
+    )
+    client.create_payload_index(
+        collection_name=name,
+        field_name="ocr_confidence",
+        field_schema=qm.PayloadSchemaType.FLOAT,
+    )
+
+
+def upsert_chunks(
+    points: Sequence[tuple[str, list[float], dict[str, Any]]],
+    collection: str | None = None,
+) -> int:
+    """Upsert (chunk_id, vector, payload) triples. Returns count upserted."""
+    name = collection or settings.qdrant_collection_chunks
+    if not points:
+        return 0
+    qpoints = [
+        qm.PointStruct(
+            id=_qid(chunk_id),
+            vector={DENSE_VECTOR_NAME: vector},
+            payload={**payload, "chunk_id": chunk_id},
+        )
+        for chunk_id, vector, payload in points
+    ]
+    get_qdrant().upsert(collection_name=name, points=qpoints, wait=False)
+    return len(qpoints)
+
+
+def delete_by_document(document_id: str, collection: str | None = None) -> int:
+    name = collection or settings.qdrant_collection_chunks
+    client = get_qdrant()
+    client.delete(
+        collection_name=name,
+        points_selector=qm.FilterSelector(
+            filter=qm.Filter(
+                must=[qm.FieldCondition(key="document_id", match=qm.MatchValue(value=document_id))]
+            )
+        ),
+    )
+    return 1
+
+
+def _qid(chunk_id: str) -> str:
+    """Qdrant accepts UUID strings or unsigned ints. Chunks are UUIDs already."""
+    return chunk_id
diff --git a/app/indexing/reranker.py b/app/indexing/reranker.py
new file mode 100644
index 0000000..898f84c
--- /dev/null
+++ b/app/indexing/reranker.py
@@ -0,0 +1,75 @@
+"""BGE reranker - cross-encoder style scoring of (query, passage) pairs.
+
+Designed to degrade gracefully:
+- If the model fails to load, ``rerank`` returns inputs unchanged with the
+  ``reranked`` flag set to False so the API can report the truth to clients.
+"""
+
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import Sequence
+
+from app.config import settings
+from app.logging_config import get_logger
+
+logger = get_logger(__name__)
+
+
+class Reranker:
+    def __init__(self, model_name: str, device: str, batch_size: int) -> None:
+        self.model_name = model_name
+        self.device = device
+        self.batch_size = batch_size
+        self._impl: str | None = None
+        self._model = None
+        self._load()
+
+    def _load(self) -> None:
+        try:
+            from FlagEmbedding import FlagReranker  # type: ignore
+            use_fp16 = self.device != "cpu"
+            self._model = FlagReranker(self.model_name, use_fp16=use_fp16, devices=self.device)
+            self._impl = "flagembedding"
+            logger.info("reranker.loaded", impl="flagembedding", model=self.model_name, device=self.device)
+            return
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("reranker.flagembedding_failed", error=str(exc))
+
+        try:
+            from sentence_transformers import CrossEncoder
+            self._model = CrossEncoder(self.model_name, device=self.device)
+            self._impl = "sentence-transformers"
+            logger.info("reranker.loaded", impl="sentence-transformers", model=self.model_name)
+        except Exception as exc:  # noqa: BLE001
+            logger.error("reranker.disabled", error=str(exc))
+            self._impl = None
+            self._model = None
+
+    @property
+    def available(self) -> bool:
+        return self._impl is not None and self._model is not None
+
+    def score(self, query: str, passages: Sequence[str]) -> list[float]:
+        if not self.available or not passages:
+            return [0.0] * len(passages)
+        pairs = [(query, p) for p in passages]
+        if self._impl == "flagembedding":
+            scores = self._model.compute_score(pairs, batch_size=self.batch_size, normalize=True)  # type: ignore[union-attr]
+        else:
+            scores = self._model.predict(pairs, batch_size=self.batch_size)  # type: ignore[union-attr]
+        if not isinstance(scores, list):
+            try:
+                scores = list(scores)
+            except TypeError:
+                scores = [float(scores)]
+        return [float(s) for s in scores]
+
+
+@lru_cache(maxsize=1)
+def get_reranker() -> Reranker:
+    return Reranker(
+        model_name=settings.reranker_model,
+        device=settings.reranker_device,
+        batch_size=settings.reranker_batch_size,
+    )
diff --git a/app/ingestion/__init__.py b/app/ingestion/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/ingestion/chunker.py b/app/ingestion/chunker.py
new file mode 100644
index 0000000..1761d2d
--- /dev/null
+++ b/app/ingestion/chunker.py
@@ -0,0 +1,317 @@
+"""Structure-aware chunking.
+
+Rules (per spec):
+- Chunk by document structure first, fixed-size second.
+- Hierarchy: title > heading > paragraph > list > table > figure caption.
+- Target 500-900 tokens (configurable).
+- Overlap 80-120 tokens for long narrative text only.
+- Never split tables - one table = one chunk (or one chunk per row group if huge).
+- Every chunk carries citation metadata.
+
+We use a deliberately simple ``len(text.split())`` token estimator. The downstream
+embedding model has its own tokenizer; this estimator is only a budget proxy.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+from app.config import settings
+from app.ingestion.docling_extractor import (
+    ExtractedBlock,
+    ExtractedFigure,
+    ExtractedTable,
+    ExtractionResult,
+)
+from app.ingestion.normalizer import normalize_block
+from app.ingestion.quality import compute_quality_flags
+
+
+@dataclass
+class ChunkRecord:
+    chunk_index: int
+    page_number: int
+    block_type: str
+    text: str
+    normalized_text: str
+    token_count: int
+    block_id: str | None = None
+    quality_flags: dict[str, Any] = field(default_factory=dict)
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+
+def _estimate_tokens(text: str) -> int:
+    return max(1, len(text.split()))
+
+
+def chunk_extraction(
+    extraction: ExtractionResult,
+    *,
+    document_ocr_confidence: float | None = None,
+) -> list[ChunkRecord]:
+    target = settings.chunk_target_tokens
+    minimum = settings.chunk_min_tokens
+    maximum = settings.chunk_max_tokens
+    overlap = settings.chunk_overlap_tokens
+
+    chunks: list[ChunkRecord] = []
+    idx = 0
+
+    # 1) Tables first - one chunk per table, never split.
+    for t in extraction.tables:
+        body = (t.markdown or "").strip()
+        if not body:
+            continue
+        summary = _summarize_table(t)
+        text = body
+        if summary:
+            text = f"{summary}\n\n{body}"
+        display, norm = normalize_block(text)
+        flags = compute_quality_flags(
+            text=display,
+            block_type="table",
+            ocr_confidence=document_ocr_confidence,
+        )
+        chunks.append(
+            ChunkRecord(
+                chunk_index=idx,
+                page_number=t.page_number,
+                block_type="table",
+                text=display,
+                normalized_text=norm,
+                token_count=_estimate_tokens(display),
+                block_id=t.block_id or f"table:{t.table_index}",
+                quality_flags=flags,
+                metadata={"table_index": t.table_index, "summary": summary or ""},
+            )
+        )
+        idx += 1
+
+    # 2) Figures - caption + placeholder description.
+    for f in extraction.figures:
+        text_parts: list[str] = []
+        if f.caption:
+            text_parts.append(f"Caption: {f.caption}")
+        text_parts.append(f"Figure detected on page {f.page_number}.")
+        text = "\n".join(text_parts)
+        block_type = "figure_caption" if f.caption else "figure_description"
+        display, norm = normalize_block(text)
+        flags = compute_quality_flags(
+            text=display,
+            block_type=block_type,
+            ocr_confidence=document_ocr_confidence,
+        )
+        chunks.append(
+            ChunkRecord(
+                chunk_index=idx,
+                page_number=f.page_number,
+                block_type=block_type,
+                text=display,
+                normalized_text=norm,
+                token_count=_estimate_tokens(display),
+                block_id=f.block_id or f"figure:{f.figure_index}",
+                quality_flags=flags,
+                metadata={"figure_index": f.figure_index},
+            )
+        )
+        idx += 1
+
+    # 3) Narrative blocks grouped per page, packed by structure.
+    by_page: dict[int, list[ExtractedBlock]] = {}
+    for b in extraction.blocks:
+        by_page.setdefault(b.page_number, []).append(b)
+
+    for page_no in sorted(by_page):
+        blocks = by_page[page_no]
+        groups = _group_by_section(blocks)
+        for group in groups:
+            packed = _pack_group(group, target=target, maximum=maximum, minimum=minimum)
+            for piece in packed:
+                text = piece["text"]
+                btype = piece["block_type"]
+                display, norm = normalize_block(text)
+                flags = compute_quality_flags(
+                    text=display,
+                    block_type=btype,
+                    ocr_confidence=document_ocr_confidence,
+                )
+                chunks.append(
+                    ChunkRecord(
+                        chunk_index=idx,
+                        page_number=page_no,
+                        block_type=btype,
+                        text=display,
+                        normalized_text=norm,
+                        token_count=_estimate_tokens(display),
+                        block_id=piece.get("block_id"),
+                        quality_flags=flags,
+                        metadata={"section_heading": piece.get("section") or ""},
+                    )
+                )
+                idx += 1
+
+            # Optional overlap: only if the last piece is long narrative
+            if overlap > 0 and packed and packed[-1]["block_type"] == "paragraph":
+                tail = _tail_tokens(packed[-1]["text"], overlap)
+                if tail and len(tail.split()) >= max(20, overlap // 2):
+                    # Overlap is already represented by next-group adjacency in
+                    # most legacy docs; we do not emit duplicate overlap chunks
+                    # to avoid index bloat. This is intentional per spec note
+                    # ("only for long narrative text") - left here for future tuning.
+                    pass
+
+    return chunks
+
+
+# ---------------- Helpers ----------------
+
+def _group_by_section(blocks: list[ExtractedBlock]) -> list[list[ExtractedBlock]]:
+    groups: list[list[ExtractedBlock]] = []
+    current: list[ExtractedBlock] = []
+    for b in blocks:
+        if b.block_type in ("title", "heading") and current:
+            groups.append(current)
+            current = [b]
+        else:
+            current.append(b)
+    if current:
+        groups.append(current)
+    return groups
+
+
+def _pack_group(
+    group: list[ExtractedBlock], *, target: int, maximum: int, minimum: int
+) -> list[dict[str, Any]]:
+    """Pack a section's blocks into chunks at most ``maximum`` tokens.
+
+    Headings / titles attach to the next chunk as a section anchor.
+    """
+    if not group:
+        return []
+
+    section_heading = ""
+    body_blocks: list[ExtractedBlock] = []
+    for b in group:
+        if b.block_type in ("title", "heading"):
+            section_heading = (section_heading + " > " + b.text).strip(" >") if section_heading else b.text
+        else:
+            body_blocks.append(b)
+
+    if not body_blocks:
+        # Heading-only group: emit as a single ``heading`` chunk so the title is searchable.
+        text = section_heading or group[0].text
+        return [
+            {
+                "text": text,
+                "block_type": "heading",
+                "block_id": group[0].block_id,
+                "section": section_heading,
+            }
+        ]
+
+    out: list[dict[str, Any]] = []
+    buffer: list[str] = []
+    buffer_block_ids: list[str] = []
+    buffer_block_type = "paragraph"
+    buffer_tokens = 0
+
+    def flush():
+        nonlocal buffer, buffer_block_ids, buffer_block_type, buffer_tokens
+        if not buffer:
+            return
+        text = "\n\n".join(buffer).strip()
+        if not text:
+            buffer = []
+            buffer_block_ids = []
+            buffer_tokens = 0
+            return
+        # Prepend section heading for context (kept short).
+        if section_heading and len(section_heading) < 200:
+            text = f"# {section_heading}\n\n{text}"
+        out.append(
+            {
+                "text": text,
+                "block_type": buffer_block_type,
+                "block_id": buffer_block_ids[0] if buffer_block_ids else None,
+                "section": section_heading,
+            }
+        )
+        buffer = []
+        buffer_block_ids = []
+        buffer_tokens = 0
+
+    for b in body_blocks:
+        tokens = _estimate_tokens(b.text)
+        if tokens >= maximum:
+            # Hard split a giant block into sub-chunks of ~target tokens.
+            flush()
+            for sub in _split_long_text(b.text, target=target, maximum=maximum):
+                out.append(
+                    {
+                        "text": sub,
+                        "block_type": b.block_type if b.block_type != "list" else "list",
+                        "block_id": b.block_id,
+                        "section": section_heading,
+                    }
+                )
+            continue
+
+        if buffer_tokens + tokens > maximum and buffer_tokens >= minimum:
+            flush()
+
+        if not buffer:
+            buffer_block_type = b.block_type if b.block_type != "list" else "list"
+        buffer.append(b.text)
+        if b.block_id:
+            buffer_block_ids.append(b.block_id)
+        buffer_tokens += tokens
+
+        if buffer_tokens >= target:
+            flush()
+
+    flush()
+    return out
+
+
+def _split_long_text(text: str, *, target: int, maximum: int) -> list[str]:
+    words = text.split()
+    if not words:
+        return []
+    pieces: list[str] = []
+    step = target
+    if step <= 0:
+        step = 500
+    i = 0
+    while i < len(words):
+        end = min(len(words), i + maximum)
+        # Aim for ``target`` words but extend up to ``maximum`` to reach a sentence boundary.
+        piece = " ".join(words[i : i + step])
+        pieces.append(piece)
+        i += step
+        if end - i < target // 4 and end - i > 0:
+            pieces[-1] = " ".join(words[i - step : end])
+            break
+    return pieces
+
+
+def _tail_tokens(text: str, n: int) -> str:
+    words = text.split()
+    if len(words) <= n:
+        return text
+    return " ".join(words[-n:])
+
+
+def _summarize_table(t: ExtractedTable) -> str:
+    """Heuristic one-line summary for index recall."""
+    md = t.markdown or ""
+    first = next((line for line in md.splitlines() if line.startswith("|")), "")
+    header_cells = [c.strip() for c in first.strip("|").split("|") if c.strip()]
+    n_cols = len(header_cells)
+    n_rows = max(0, sum(1 for ln in md.splitlines() if ln.startswith("|")) - 2)
+    header_preview = ", ".join(header_cells[:6])
+    return (
+        f"Table on page {t.page_number}: {n_rows} rows x {n_cols} cols. "
+        f"Columns: {header_preview}." if header_cells else
+        f"Table on page {t.page_number}."
+    )
diff --git a/app/ingestion/docling_extractor.py b/app/ingestion/docling_extractor.py
new file mode 100644
index 0000000..1ad9cd2
--- /dev/null
+++ b/app/ingestion/docling_extractor.py
@@ -0,0 +1,384 @@
+"""Docling structured extraction.
+
+Docling produces a hierarchical document model with reading order, layout, tables
+and figures. We export both Markdown and a JSON representation, then walk the
+JSON to emit normalized blocks (title, heading, paragraph, list, table caption,
+figure caption) for downstream chunking.
+
+The extractor is intentionally defensive: Docling's exact Python API has
+shifted across releases. We probe for the safest exporter methods and fall
+back to ``str(document)`` only as a last resort.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+from app.config import settings
+from app.logging_config import get_logger
+
+logger = get_logger(__name__)
+
+
+@dataclass
+class ExtractedBlock:
+    page_number: int
+    block_type: str
+    text: str
+    block_id: str | None = None
+    extra: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class ExtractedTable:
+    page_number: int
+    table_index: int
+    markdown: str
+    csv_text: str | None = None
+    json_data: dict[str, Any] | None = None
+    block_id: str | None = None
+
+
+@dataclass
+class ExtractedFigure:
+    page_number: int
+    figure_index: int
+    caption: str | None
+    block_id: str | None = None
+    image_bytes: bytes | None = None
+    image_ext: str = "png"
+
+
+@dataclass
+class ExtractedPage:
+    page_number: int
+    text: str
+    has_tables: bool = False
+    has_figures: bool = False
+    has_handwriting: bool = False
+    ocr_confidence: float | None = None
+
+
+@dataclass
+class ExtractionResult:
+    markdown: str
+    json_payload: dict[str, Any]
+    blocks: list[ExtractedBlock]
+    tables: list[ExtractedTable]
+    figures: list[ExtractedFigure]
+    pages: list[ExtractedPage]
+
+
+def extract(pdf_path: Path) -> ExtractionResult:
+    """Run Docling on ``pdf_path`` and return a normalized result."""
+    from docling.datamodel.base_models import InputFormat
+    from docling.datamodel.pipeline_options import PdfPipelineOptions
+    from docling.document_converter import DocumentConverter, PdfFormatOption
+
+    pipeline_options = PdfPipelineOptions()
+    # We let OCRmyPDF do the heavy OCR; Docling OCR is opt-in.
+    pipeline_options.do_ocr = settings.docling_ocr_enabled
+    pipeline_options.do_table_structure = True
+    try:
+        pipeline_options.table_structure_options.do_cell_matching = True
+    except Exception:  # noqa: BLE001 - older docling versions lack this
+        pass
+    try:
+        pipeline_options.generate_page_images = True
+    except Exception:  # noqa: BLE001
+        pass
+
+    converter = DocumentConverter(
+        format_options={InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)}
+    )
+
+    logger.info("docling.start", input=str(pdf_path))
+    conv = converter.convert(str(pdf_path))
+    doc = conv.document
+
+    markdown = _safe_export_markdown(doc)
+    json_payload = _safe_export_dict(doc)
+
+    blocks = _walk_blocks(json_payload)
+    tables = _walk_tables(doc, json_payload)
+    figures = _walk_figures(doc, json_payload)
+    pages = _walk_pages(json_payload, blocks, tables, figures)
+
+    logger.info(
+        "docling.done",
+        pages=len(pages),
+        blocks=len(blocks),
+        tables=len(tables),
+        figures=len(figures),
+    )
+    return ExtractionResult(
+        markdown=markdown,
+        json_payload=json_payload,
+        blocks=blocks,
+        tables=tables,
+        figures=figures,
+        pages=pages,
+    )
+
+
+# ---------------- Internal helpers ----------------
+
+def _safe_export_markdown(doc: Any) -> str:
+    for attr in ("export_to_markdown", "to_markdown"):
+        fn = getattr(doc, attr, None)
+        if callable(fn):
+            try:
+                return fn()
+            except Exception:  # noqa: BLE001
+                continue
+    return str(doc)
+
+
+def _safe_export_dict(doc: Any) -> dict[str, Any]:
+    for attr in ("export_to_dict", "model_dump", "dict"):
+        fn = getattr(doc, attr, None)
+        if callable(fn):
+            try:
+                data = fn()
+                if isinstance(data, dict):
+                    return data
+            except Exception:  # noqa: BLE001
+                continue
+    # Last resort: serialize via JSON round-trip
+    try:
+        return json.loads(getattr(doc, "model_dump_json", lambda: "{}")())
+    except Exception:  # noqa: BLE001
+        return {}
+
+
+_DOCLING_LABEL_TO_BLOCK = {
+    "title": "title",
+    "section_header": "heading",
+    "section-header": "heading",
+    "subtitle": "heading",
+    "page_header": "heading",
+    "header": "heading",
+    "list_item": "list",
+    "list-item": "list",
+    "list": "list",
+    "paragraph": "paragraph",
+    "text": "paragraph",
+    "caption": "figure_caption",
+    "figure": "figure_caption",
+    "table": "table",
+    "footnote": "paragraph",
+}
+
+
+def _walk_blocks(payload: dict[str, Any]) -> list[ExtractedBlock]:
+    """Flatten Docling's text items into ordered blocks per page."""
+    blocks: list[ExtractedBlock] = []
+    items = (
+        payload.get("texts")
+        or payload.get("text_items")
+        or payload.get("body", {}).get("text_items", [])
+        or []
+    )
+    if not isinstance(items, list):
+        return blocks
+
+    for item in items:
+        if not isinstance(item, dict):
+            continue
+        label = (item.get("label") or item.get("category") or "paragraph").lower()
+        text = (item.get("text") or "").strip()
+        if not text:
+            continue
+        block_type = _DOCLING_LABEL_TO_BLOCK.get(label, "paragraph")
+        page = _page_of(item)
+        blocks.append(
+            ExtractedBlock(
+                page_number=page,
+                block_type=block_type,
+                text=text,
+                block_id=item.get("self_ref") or item.get("id"),
+                extra={"label": label},
+            )
+        )
+    return blocks
+
+
+def _walk_tables(doc: Any, payload: dict[str, Any]) -> list[ExtractedTable]:
+    tables: list[ExtractedTable] = []
+    raw_tables = payload.get("tables") or []
+    for idx, t in enumerate(raw_tables):
+        if not isinstance(t, dict):
+            continue
+        page = _page_of(t)
+        md = _table_markdown(doc, t, idx)
+        csv_text = _table_csv(t)
+        tables.append(
+            ExtractedTable(
+                page_number=page,
+                table_index=idx,
+                markdown=md,
+                csv_text=csv_text,
+                json_data=t,
+                block_id=t.get("self_ref") or t.get("id"),
+            )
+        )
+    return tables
+
+
+def _walk_figures(doc: Any, payload: dict[str, Any]) -> list[ExtractedFigure]:
+    figures: list[ExtractedFigure] = []
+    raw_figures = payload.get("pictures") or payload.get("figures") or []
+    for idx, f in enumerate(raw_figures):
+        if not isinstance(f, dict):
+            continue
+        page = _page_of(f)
+        caption = (f.get("caption") or "").strip() or None
+        figures.append(
+            ExtractedFigure(
+                page_number=page,
+                figure_index=idx,
+                caption=caption,
+                block_id=f.get("self_ref") or f.get("id"),
+            )
+        )
+    return figures
+
+
+def _walk_pages(
+    payload: dict[str, Any],
+    blocks: list[ExtractedBlock],
+    tables: list[ExtractedTable],
+    figures: list[ExtractedFigure],
+) -> list[ExtractedPage]:
+    pages_meta = payload.get("pages") or {}
+    page_numbers: set[int] = set()
+    if isinstance(pages_meta, dict):
+        for k in pages_meta.keys():
+            try:
+                page_numbers.add(int(k))
+            except (ValueError, TypeError):
+                continue
+    elif isinstance(pages_meta, list):
+        for p in pages_meta:
+            if isinstance(p, dict):
+                pn = p.get("page_no") or p.get("page") or p.get("number")
+                if isinstance(pn, int):
+                    page_numbers.add(pn)
+
+    for b in blocks:
+        page_numbers.add(b.page_number)
+    for t in tables:
+        page_numbers.add(t.page_number)
+    for f in figures:
+        page_numbers.add(f.page_number)
+    page_numbers.discard(0)
+    if not page_numbers:
+        page_numbers = {1}
+
+    by_page_text: dict[int, list[str]] = {pn: [] for pn in page_numbers}
+    for b in blocks:
+        by_page_text.setdefault(b.page_number, []).append(b.text)
+
+    has_tables_set = {t.page_number for t in tables}
+    has_figures_set = {f.page_number for f in figures}
+
+    return [
+        ExtractedPage(
+            page_number=pn,
+            text="\n\n".join(by_page_text.get(pn, [])),
+            has_tables=pn in has_tables_set,
+            has_figures=pn in has_figures_set,
+        )
+        for pn in sorted(page_numbers)
+    ]
+
+
+def _page_of(item: dict[str, Any]) -> int:
+    prov = item.get("prov") or item.get("provenance")
+    if isinstance(prov, list) and prov:
+        first = prov[0]
+        if isinstance(first, dict):
+            pn = first.get("page_no") or first.get("page") or first.get("page_number")
+            if isinstance(pn, int):
+                return pn
+    pn = item.get("page_no") or item.get("page") or item.get("page_number")
+    if isinstance(pn, int):
+        return pn
+    return 1
+
+
+def _table_markdown(doc: Any, raw: dict[str, Any], idx: int) -> str:
+    # Try Docling's own export first (per-table).
+    try:
+        export = getattr(doc, "export_table_to_markdown", None)
+        if callable(export):
+            return export(idx)
+    except Exception:  # noqa: BLE001
+        pass
+
+    grid = raw.get("data") or raw.get("table_cells") or raw.get("grid")
+    if isinstance(grid, list) and grid and isinstance(grid[0], list):
+        return _grid_to_markdown(grid)
+    cells = raw.get("table_cells")
+    if isinstance(cells, list):
+        return _cells_to_markdown(cells)
+    return ""
+
+
+def _grid_to_markdown(grid: list[list[Any]]) -> str:
+    if not grid:
+        return ""
+
+    def _cell(c: Any) -> str:
+        if isinstance(c, dict):
+            return str(c.get("text") or c.get("value") or "").replace("|", "\\|").strip()
+        return str(c).replace("|", "\\|").strip()
+
+    header = grid[0]
+    body = grid[1:] if len(grid) > 1 else []
+    cols = len(header)
+    out = ["| " + " | ".join(_cell(c) for c in header) + " |"]
+    out.append("| " + " | ".join(["---"] * cols) + " |")
+    for row in body:
+        cells = [_cell(c) for c in row]
+        if len(cells) < cols:
+            cells += [""] * (cols - len(cells))
+        out.append("| " + " | ".join(cells[:cols]) + " |")
+    return "\n".join(out)
+
+
+def _cells_to_markdown(cells: list[Any]) -> str:
+    rows: dict[int, dict[int, str]] = {}
+    for c in cells:
+        if not isinstance(c, dict):
+            continue
+        r = c.get("start_row_offset_idx", c.get("row", 0)) or 0
+        col = c.get("start_col_offset_idx", c.get("col", 0)) or 0
+        rows.setdefault(r, {})[col] = (c.get("text") or "").replace("|", "\\|").strip()
+    if not rows:
+        return ""
+    max_col = max((max(r.keys()) for r in rows.values()), default=0)
+    grid = []
+    for r_idx in sorted(rows):
+        row = [rows[r_idx].get(c, "") for c in range(max_col + 1)]
+        grid.append(row)
+    return _grid_to_markdown(grid)
+
+
+def _table_csv(raw: dict[str, Any]) -> str | None:
+    grid = raw.get("data") or raw.get("grid")
+    if not (isinstance(grid, list) and grid and isinstance(grid[0], list)):
+        return None
+    import csv
+    import io
+
+    buf = io.StringIO()
+    writer = csv.writer(buf)
+    for row in grid:
+        writer.writerow([
+            (c.get("text") if isinstance(c, dict) else c) or "" for c in row
+        ])
+    return buf.getvalue()
diff --git a/app/ingestion/figure_processor.py b/app/ingestion/figure_processor.py
new file mode 100644
index 0000000..6e570f9
--- /dev/null
+++ b/app/ingestion/figure_processor.py
@@ -0,0 +1,78 @@
+"""Persists Docling figures to PostgreSQL + MinIO (caption + optional crop)."""
+
+from __future__ import annotations
+
+import uuid
+
+from sqlalchemy import select
+
+from app.db.models import ArtifactType, DocumentArtifact, Figure
+from app.ingestion.docling_extractor import ExtractedFigure
+from app.logging_config import get_logger
+from app.storage.local_paths import key_figure_crop
+from app.storage.minio_client import MinioStorage
+
+logger = get_logger(__name__)
+
+
+def persist_figures(
+    db,
+    storage: MinioStorage,
+    document_id: uuid.UUID,
+    figures: list[ExtractedFigure],
+    page_id_by_number: dict[int, uuid.UUID],
+) -> int:
+    count = 0
+    for f in figures:
+        existing = db.execute(
+            select(Figure).where(Figure.document_id == document_id, Figure.figure_index == f.figure_index)
+        ).scalar_one_or_none()
+        if existing is None:
+            existing = Figure(
+                document_id=document_id,
+                page_id=page_id_by_number.get(f.page_number),
+                page_number=f.page_number,
+                figure_index=f.figure_index,
+            )
+            db.add(existing)
+
+        existing.caption = f.caption
+        existing.description = (
+            f"Figure detected on page {f.page_number}." if not f.caption else
+            f"Figure on page {f.page_number}. Caption: {f.caption}"
+        )
+
+        if f.image_bytes:
+            key = key_figure_crop(document_id, f.page_number, f.figure_index)
+            storage.put_bytes(
+                bucket=storage.derived_bucket,
+                key=key,
+                data=f.image_bytes,
+                content_type=f"image/{f.image_ext}",
+            )
+            existing.storage_bucket = storage.derived_bucket
+            existing.storage_key = key
+            _ensure_artifact(db, document_id, ArtifactType.FIGURE_CROP, storage.derived_bucket, key, f.page_number)
+
+        count += 1
+    return count
+
+
+def _ensure_artifact(db, document_id: uuid.UUID, artifact_type: str, bucket: str, key: str, page: int | None) -> None:
+    existing = db.execute(
+        select(DocumentArtifact).where(
+            DocumentArtifact.document_id == document_id,
+            DocumentArtifact.storage_key == key,
+        )
+    ).scalar_one_or_none()
+    if existing:
+        return
+    db.add(
+        DocumentArtifact(
+            document_id=document_id,
+            artifact_type=artifact_type,
+            storage_bucket=bucket,
+            storage_key=key,
+            page_number=page,
+        )
+    )
diff --git a/app/ingestion/normalizer.py b/app/ingestion/normalizer.py
new file mode 100644
index 0000000..a19dec0
--- /dev/null
+++ b/app/ingestion/normalizer.py
@@ -0,0 +1,12 @@
+"""Block-level normalization wrappers around utils.text_cleaning."""
+
+from __future__ import annotations
+
+from app.utils.text_cleaning import clean_ocr_text, normalize_for_search
+
+
+def normalize_block(text: str) -> tuple[str, str]:
+    """Return ``(display_text, normalized_text)``."""
+    display = clean_ocr_text(text)
+    norm = normalize_for_search(display)
+    return display, norm
diff --git a/app/ingestion/ocr.py b/app/ingestion/ocr.py
new file mode 100644
index 0000000..821ed51
--- /dev/null
+++ b/app/ingestion/ocr.py
@@ -0,0 +1,87 @@
+"""OCRmyPDF integration with Tesseract.
+
+We treat OCR as best-effort: if the input PDF already has a text layer (or OCR is
+disabled by config), we skip OCR and use the original PDF. On failure, the
+caller is expected to mark the document ``OCR_FAILED`` and continue without it.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+
+import ocrmypdf
+
+from app.config import settings
+from app.logging_config import get_logger
+from app.utils.pdf import has_searchable_text
+
+logger = get_logger(__name__)
+
+
+@dataclass
+class OcrResult:
+    output_path: Path
+    skipped: bool
+    reason: str
+    languages: str
+
+
+def run_ocr(input_pdf: Path, output_pdf: Path, languages: str | None = None) -> OcrResult:
+    """Run OCRmyPDF.
+
+    - If ``OCR_ENABLED`` is false: copy the input as the output and skip.
+    - If the input already has searchable text: skip OCR but still produce
+      ``output_pdf`` (a hard-link / copy to keep downstream code simple).
+    - On unexpected exceptions: re-raise (caller handles status update).
+    """
+    langs = languages or settings.ocr_languages
+
+    if not settings.ocr_enabled:
+        return _skip(input_pdf, output_pdf, langs, "ocr_disabled")
+
+    if has_searchable_text(input_pdf):
+        return _skip(input_pdf, output_pdf, langs, "already_searchable")
+
+    output_pdf.parent.mkdir(parents=True, exist_ok=True)
+    logger.info("ocr.start", input=str(input_pdf), output=str(output_pdf), languages=langs)
+
+    try:
+        ocrmypdf.ocr(
+            input_file=str(input_pdf),
+            output_file=str(output_pdf),
+            language=langs,
+            skip_text=False,
+            redo_ocr=False,
+            force_ocr=False,
+            deskew=settings.ocr_deskew,
+            clean=settings.ocr_clean,
+            optimize=settings.ocr_optimize,
+            progress_bar=False,
+            jobs=1,
+            output_type="pdf",
+            # tolerate already-OCR pages where present
+            skip_big=200.0,
+        )
+    except ocrmypdf.exceptions.PriorOcrFoundError:
+        logger.info("ocr.skip.prior_ocr", input=str(input_pdf))
+        return _skip(input_pdf, output_pdf, langs, "prior_ocr_found")
+    except ocrmypdf.exceptions.DigitalSignatureError:
+        logger.warning("ocr.skip.signed_pdf", input=str(input_pdf))
+        return _skip(input_pdf, output_pdf, langs, "digitally_signed")
+    except ocrmypdf.exceptions.EncryptedPdfError as exc:
+        logger.warning("ocr.encrypted", input=str(input_pdf), error=str(exc))
+        raise
+    except ocrmypdf.exceptions.MissingDependencyError as exc:
+        logger.error("ocr.missing_dependency", error=str(exc))
+        raise
+
+    logger.info("ocr.done", output=str(output_pdf))
+    return OcrResult(output_path=output_pdf, skipped=False, reason="ocr_completed", languages=langs)
+
+
+def _skip(input_pdf: Path, output_pdf: Path, langs: str, reason: str) -> OcrResult:
+    output_pdf.parent.mkdir(parents=True, exist_ok=True)
+    if not output_pdf.exists() or output_pdf.resolve() != input_pdf.resolve():
+        output_pdf.write_bytes(input_pdf.read_bytes())
+    return OcrResult(output_path=output_pdf, skipped=True, reason=reason, languages=langs)
diff --git a/app/ingestion/pipeline.py b/app/ingestion/pipeline.py
new file mode 100644
index 0000000..4e61237
--- /dev/null
+++ b/app/ingestion/pipeline.py
@@ -0,0 +1,384 @@
+"""Per-document end-to-end pipeline: OCR -> Docling -> chunk -> persist -> index.
+
+Called by the Celery worker. Idempotent: re-running on the same document deletes
+existing chunks for that document and re-creates them, then re-indexes in
+OpenSearch and Qdrant.
+"""
+
+from __future__ import annotations
+
+import json
+import uuid
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+from sqlalchemy import delete, select
+
+from app.config import settings
+from app.db.models import (
+    ArtifactType,
+    Chunk,
+    Document,
+    DocumentArtifact,
+    DocumentStatus,
+    Page,
+    ProcessingEvent,
+)
+from app.db.session import session_scope
+from app.indexing import opensearch_client, qdrant_client
+from app.indexing.embeddings import get_embedder
+from app.ingestion.chunker import ChunkRecord, chunk_extraction
+from app.ingestion.docling_extractor import ExtractionResult, extract
+from app.ingestion.figure_processor import persist_figures
+from app.ingestion.ocr import run_ocr
+from app.ingestion.table_processor import persist_tables
+from app.logging_config import get_logger
+from app.storage.local_paths import (
+    key_docling_json,
+    key_markdown,
+    key_ocr_pdf,
+    work_dir_for,
+)
+from app.storage.minio_client import get_storage
+from app.utils.language import detect_language
+
+logger = get_logger(__name__)
+
+
+def process_document_id(document_id: uuid.UUID, run_id: uuid.UUID | None = None) -> dict[str, Any]:
+    """Top-level entry called by the Celery task. Wraps the pipeline in
+    error handling so the task always either succeeds or marks the document FAILED.
+    """
+    storage = get_storage()
+    storage.ensure_buckets()
+
+    with session_scope() as db:
+        doc = db.get(Document, document_id)
+        if doc is None:
+            logger.warning("pipeline.document_missing", document_id=str(document_id))
+            return {"status": "missing"}
+
+        source_path = Path(doc.source_path)
+        sha = doc.sha256
+        original_artifact = db.execute(
+            select(DocumentArtifact).where(
+                DocumentArtifact.document_id == doc.id,
+                DocumentArtifact.artifact_type == ArtifactType.ORIGINAL_PDF,
+            )
+        ).scalar_one_or_none()
+
+    work_dir = work_dir_for(document_id)
+    local_pdf = work_dir / f"{sha}.pdf"
+    if not local_pdf.exists():
+        if source_path.exists():
+            local_pdf.write_bytes(source_path.read_bytes())
+        elif original_artifact:
+            storage.get_to_path(original_artifact.storage_bucket, original_artifact.storage_key, local_pdf)
+        else:
+            return _fail(document_id, run_id, "OCR_FAILED", "Original PDF not available locally or in MinIO")
+
+    # ---------------- OCR ----------------
+    ocr_pdf = work_dir / "ocr.pdf"
+    try:
+        _emit_event(document_id, run_id, DocumentStatus.OCR_STARTED, "OCR started")
+        ocr_result = run_ocr(local_pdf, ocr_pdf, languages=settings.ocr_languages)
+    except Exception as exc:  # noqa: BLE001
+        logger.exception("pipeline.ocr_failed", document_id=str(document_id))
+        return _fail(document_id, run_id, DocumentStatus.OCR_FAILED, f"OCR failed: {exc}")
+
+    # Upload OCR PDF (even if we 'skipped' it - OCR PDF is the canonical input to Docling).
+    ocr_key = key_ocr_pdf(document_id)
+    storage.put_file(
+        bucket=storage.derived_bucket,
+        key=ocr_key,
+        path=ocr_result.output_path,
+        content_type="application/pdf",
+    )
+    with session_scope() as db:
+        _ensure_artifact(db, document_id, ArtifactType.OCR_PDF, storage.derived_bucket, ocr_key)
+        doc = db.get(Document, document_id)
+        if doc is not None:
+            doc.status = DocumentStatus.OCR_COMPLETED
+        db.add(
+            ProcessingEvent(
+                run_id=run_id,
+                document_id=document_id,
+                stage=DocumentStatus.OCR_COMPLETED,
+                level="INFO",
+                message=f"OCR finished ({ocr_result.reason})",
+                data={"skipped": ocr_result.skipped, "languages": ocr_result.languages},
+            )
+        )
+
+    # ---------------- Docling ----------------
+    try:
+        _emit_event(document_id, run_id, DocumentStatus.EXTRACTION_STARTED, "Docling extraction started")
+        extraction = extract(ocr_result.output_path)
+    except Exception as exc:  # noqa: BLE001
+        logger.exception("pipeline.docling_failed", document_id=str(document_id))
+        return _fail(document_id, run_id, DocumentStatus.EXTRACTION_FAILED, f"Docling failed: {exc}")
+
+    # Persist Markdown + JSON to MinIO.
+    md_key = key_markdown(document_id)
+    json_key = key_docling_json(document_id)
+    storage.put_bytes(
+        bucket=storage.derived_bucket,
+        key=md_key,
+        data=extraction.markdown.encode("utf-8"),
+        content_type="text/markdown",
+    )
+    storage.put_bytes(
+        bucket=storage.derived_bucket,
+        key=json_key,
+        data=json.dumps(extraction.json_payload, ensure_ascii=False).encode("utf-8"),
+        content_type="application/json",
+    )
+
+    # ---------------- Persist pages, chunks, tables, figures ----------------
+    chunk_records = chunk_extraction(extraction)
+    sample_text = "\n".join(p.text for p in extraction.pages[:3] if p.text)
+    lang = detect_language(sample_text)
+
+    with session_scope() as db:
+        _ensure_artifact(db, document_id, ArtifactType.MARKDOWN, storage.derived_bucket, md_key)
+        _ensure_artifact(db, document_id, ArtifactType.DOCLING_JSON, storage.derived_bucket, json_key)
+
+        doc = db.get(Document, document_id)
+        if doc is None:
+            return {"status": "missing"}
+        doc.status = DocumentStatus.EXTRACTION_COMPLETED
+        if lang and not doc.language_hint:
+            doc.language_hint = lang
+
+        page_id_by_number = _upsert_pages(db, document_id, extraction)
+        persist_tables(db, storage, document_id, extraction.tables, page_id_by_number)
+        persist_figures(db, storage, document_id, extraction.figures, page_id_by_number)
+
+        # Replace chunks idempotently: drop all and re-insert.
+        db.execute(delete(Chunk).where(Chunk.document_id == document_id))
+        for cr in chunk_records:
+            db.add(_to_chunk_row(document_id, page_id_by_number, cr))
+
+        doc.status = DocumentStatus.CHUNKING_COMPLETED
+        db.add(
+            ProcessingEvent(
+                run_id=run_id,
+                document_id=document_id,
+                stage=DocumentStatus.CHUNKING_COMPLETED,
+                level="INFO",
+                message="Chunking complete",
+                data={"chunks": len(chunk_records)},
+            )
+        )
+
+    # ---------------- Indexing (OpenSearch + Qdrant) ----------------
+    try:
+        opensearch_client.ensure_index()
+        qdrant_client.ensure_collection()
+        opensearch_client.delete_by_document(str(document_id))
+        qdrant_client.delete_by_document(str(document_id))
+
+        os_docs, qdrant_points = _build_index_payloads(document_id, chunk_records, extraction, lang)
+        if os_docs:
+            opensearch_client.index_chunks(os_docs)
+        if qdrant_points:
+            embedder = get_embedder()
+            texts_to_embed = [text for _, text, _ in qdrant_points]
+            vectors = embedder.encode(texts_to_embed)
+            triples = [
+                (chunk_id, vec, payload)
+                for (chunk_id, _text, payload), vec in zip(qdrant_points, vectors, strict=True)
+            ]
+            qdrant_client.upsert_chunks(triples)
+    except Exception as exc:  # noqa: BLE001
+        logger.exception("pipeline.indexing_failed", document_id=str(document_id))
+        return _fail(document_id, run_id, DocumentStatus.FAILED, f"Indexing failed: {exc}")
+
+    with session_scope() as db:
+        doc = db.get(Document, document_id)
+        if doc is not None:
+            doc.status = DocumentStatus.INDEXING_COMPLETED
+            doc.error_message = None
+        db.add(
+            ProcessingEvent(
+                run_id=run_id,
+                document_id=document_id,
+                stage=DocumentStatus.INDEXING_COMPLETED,
+                level="INFO",
+                message="Indexing complete",
+                data={"chunks": len(chunk_records)},
+            )
+        )
+
+    return {"status": DocumentStatus.INDEXING_COMPLETED, "chunks": len(chunk_records)}
+
+
+# ---------------- helpers ----------------
+
+def _to_chunk_row(
+    document_id: uuid.UUID, page_id_by_number: dict[int, uuid.UUID], cr: ChunkRecord
+) -> Chunk:
+    return Chunk(
+        document_id=document_id,
+        page_id=page_id_by_number.get(cr.page_number),
+        page_number=cr.page_number,
+        block_id=cr.block_id,
+        chunk_index=cr.chunk_index,
+        block_type=cr.block_type,
+        text=cr.text,
+        normalized_text=cr.normalized_text,
+        token_count=cr.token_count,
+        ocr_confidence=None,
+        quality_flags=cr.quality_flags,
+        chunk_metadata=cr.metadata,
+    )
+
+
+def _upsert_pages(db, document_id: uuid.UUID, extraction: ExtractionResult) -> dict[int, uuid.UUID]:
+    existing = {
+        p.page_number: p
+        for p in db.execute(select(Page).where(Page.document_id == document_id)).scalars()
+    }
+    out: dict[int, uuid.UUID] = {}
+    for ep in extraction.pages:
+        page = existing.get(ep.page_number)
+        if page is None:
+            page = Page(
+                document_id=document_id,
+                page_number=ep.page_number,
+                text=ep.text,
+                ocr_confidence=ep.ocr_confidence,
+                has_tables=ep.has_tables,
+                has_figures=ep.has_figures,
+                has_handwriting=ep.has_handwriting,
+            )
+            db.add(page)
+            db.flush()
+        else:
+            page.text = ep.text
+            page.has_tables = ep.has_tables
+            page.has_figures = ep.has_figures
+            page.has_handwriting = ep.has_handwriting
+        out[ep.page_number] = page.id
+    return out
+
+
+def _build_index_payloads(
+    document_id: uuid.UUID,
+    chunks: list[ChunkRecord],
+    extraction: ExtractionResult,
+    language_hint: str | None,
+) -> tuple[list[dict[str, Any]], list[tuple[str, str, dict[str, Any]]]]:
+    with session_scope() as db:
+        doc = db.get(Document, document_id)
+        if doc is None:
+            return [], []
+        original_file_name = doc.original_file_name
+        source_path = doc.source_path
+
+        chunk_rows = (
+            db.execute(select(Chunk).where(Chunk.document_id == document_id))
+            .scalars()
+            .all()
+        )
+
+    os_docs: list[dict[str, Any]] = []
+    qdrant: list[tuple[str, str, dict[str, Any]]] = []
+
+    for row in chunk_rows:
+        chunk_id = str(row.id)
+        text = row.text or ""
+        os_docs.append(
+            {
+                "chunk_id": chunk_id,
+                "document_id": str(document_id),
+                "source_path": source_path,
+                "original_file_name": original_file_name,
+                "page_number": row.page_number,
+                "block_type": row.block_type,
+                "block_id": row.block_id,
+                "text": text,
+                "normalized_text": row.normalized_text,
+                "ocr_confidence": row.ocr_confidence,
+                "language_hint": language_hint,
+                "metadata": row.chunk_metadata or {},
+                "quality_flags": row.quality_flags or {},
+                "created_at": (row.created_at or datetime.now(tz=timezone.utc)).isoformat(),
+            }
+        )
+        text_preview = text[:512]
+        qdrant.append(
+            (
+                chunk_id,
+                text,
+                {
+                    "document_id": str(document_id),
+                    "source_path": source_path,
+                    "original_file_name": original_file_name,
+                    "page_number": row.page_number,
+                    "block_type": row.block_type,
+                    "block_id": row.block_id,
+                    "text_preview": text_preview,
+                    "ocr_confidence": row.ocr_confidence,
+                    "quality_flags": row.quality_flags or {},
+                    "metadata": row.chunk_metadata or {},
+                },
+            )
+        )
+    return os_docs, qdrant
+
+
+def _ensure_artifact(db, document_id: uuid.UUID, artifact_type: str, bucket: str, key: str) -> None:
+    existing = db.execute(
+        select(DocumentArtifact).where(
+            DocumentArtifact.document_id == document_id,
+            DocumentArtifact.storage_key == key,
+        )
+    ).scalar_one_or_none()
+    if existing:
+        return
+    db.add(
+        DocumentArtifact(
+            document_id=document_id,
+            artifact_type=artifact_type,
+            storage_bucket=bucket,
+            storage_key=key,
+        )
+    )
+
+
+def _emit_event(document_id: uuid.UUID, run_id: uuid.UUID | None, stage: str, message: str) -> None:
+    with session_scope() as db:
+        db.add(
+            ProcessingEvent(
+                run_id=run_id,
+                document_id=document_id,
+                stage=stage,
+                level="INFO",
+                message=message,
+                data={},
+            )
+        )
+
+
+def _fail(
+    document_id: uuid.UUID, run_id: uuid.UUID | None, stage: str, message: str
+) -> dict[str, Any]:
+    with session_scope() as db:
+        doc = db.get(Document, document_id)
+        if doc is not None:
+            doc.status = stage
+            doc.error_message = message[:2000]
+        db.add(
+            ProcessingEvent(
+                run_id=run_id,
+                document_id=document_id,
+                stage=stage,
+                level="ERROR",
+                message=message,
+                data={},
+            )
+        )
+    logger.error("pipeline.failed", document_id=str(document_id), stage=stage, message=message)
+    return {"status": stage, "error": message}
diff --git a/app/ingestion/quality.py b/app/ingestion/quality.py
new file mode 100644
index 0000000..c0dfeeb
--- /dev/null
+++ b/app/ingestion/quality.py
@@ -0,0 +1,41 @@
+"""Quality flag computation for chunks."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from app.utils.text_cleaning import looks_garbled
+
+LOW_OCR_CONFIDENCE_THRESHOLD = 0.6
+SHORT_TEXT_THRESHOLD = 24
+
+
+def compute_quality_flags(
+    *,
+    text: str,
+    block_type: str,
+    ocr_confidence: float | None,
+    has_handwriting: bool = False,
+) -> dict[str, Any]:
+    flags: dict[str, Any] = {
+        "low_ocr_confidence": False,
+        "very_short_text": False,
+        "possible_garbled_text": False,
+        "table_detected": block_type == "table",
+        "figure_detected": block_type in ("figure_caption", "figure_description"),
+        "handwriting_detected": has_handwriting or block_type == "handwriting",
+        "needs_manual_review": False,
+    }
+    if ocr_confidence is not None and ocr_confidence < LOW_OCR_CONFIDENCE_THRESHOLD:
+        flags["low_ocr_confidence"] = True
+    if text and len(text.strip()) < SHORT_TEXT_THRESHOLD:
+        flags["very_short_text"] = True
+    if looks_garbled(text):
+        flags["possible_garbled_text"] = True
+    if (
+        flags["low_ocr_confidence"]
+        or flags["possible_garbled_text"]
+        or flags["handwriting_detected"]
+    ):
+        flags["needs_manual_review"] = True
+    return flags
diff --git a/app/ingestion/scanner.py b/app/ingestion/scanner.py
new file mode 100644
index 0000000..d734af7
--- /dev/null
+++ b/app/ingestion/scanner.py
@@ -0,0 +1,184 @@
+"""Folder scanner: discovers PDFs, deduplicates by SHA256, persists discovery rows.
+
+The scanner does NOT trigger OCR or extraction. It only:
+- enumerates PDF files,
+- hashes each file,
+- creates / reuses a ``Document`` row,
+- uploads the original PDF to MinIO,
+- emits ``DISCOVERED`` / ``STORED_ORIGINAL`` events.
+
+Heavy work (OCR, Docling, indexing) is performed by the Celery worker pipeline.
+"""
+
+from __future__ import annotations
+
+import os
+import uuid
+from collections.abc import Iterator
+from dataclasses import dataclass
+from pathlib import Path
+
+from sqlalchemy import select
+
+from app.db.models import (
+    ArtifactType,
+    Document,
+    DocumentArtifact,
+    DocumentStatus,
+    ProcessingEvent,
+)
+from app.db.session import session_scope
+from app.logging_config import get_logger
+from app.storage.local_paths import key_original_pdf
+from app.storage.minio_client import get_storage
+from app.utils.hashing import sha256_file
+from app.utils.pdf import is_pdf
+
+logger = get_logger(__name__)
+
+
+@dataclass
+class DiscoveryRecord:
+    path: Path
+    sha256: str | None
+    document_id: uuid.UUID | None
+    duplicate: bool
+    invalid: bool = False
+
+
+def iter_pdf_files(root: Path, recursive: bool = True) -> Iterator[Path]:
+    if root.is_file():
+        if is_pdf(root):
+            yield root
+        return
+    if recursive:
+        for dirpath, _dirnames, filenames in os.walk(root):
+            for name in filenames:
+                p = Path(dirpath) / name
+                if is_pdf(p):
+                    yield p
+    else:
+        for p in root.iterdir():
+            if is_pdf(p):
+                yield p
+
+
+def discover_documents(
+    root: Path, recursive: bool = True, force: bool = False
+) -> Iterator[DiscoveryRecord]:
+    storage = get_storage()
+    storage.ensure_buckets()
+
+    for path in iter_pdf_files(root, recursive=recursive):
+        try:
+            stat = path.stat()
+            sha = sha256_file(path)
+        except Exception as exc:  # noqa: BLE001
+            logger.warning("scan.invalid_file", path=str(path), error=str(exc))
+            yield DiscoveryRecord(path=path, sha256=None, document_id=None, duplicate=False, invalid=True)
+            continue
+
+        with session_scope() as db:
+            existing = db.execute(
+                select(Document).where(Document.sha256 == sha)
+            ).scalar_one_or_none()
+
+            if existing and not force:
+                logger.debug("scan.duplicate", path=str(path), sha256=sha, document_id=str(existing.id))
+                yield DiscoveryRecord(path=path, sha256=sha, document_id=existing.id, duplicate=True)
+                continue
+
+            doc = existing or Document(
+                id=uuid.uuid4(),
+                source_path=str(path),
+                original_file_name=path.name,
+                sha256=sha,
+                file_size_bytes=stat.st_size,
+                mime_type="application/pdf",
+                status=DocumentStatus.DISCOVERED,
+            )
+            if not existing:
+                db.add(doc)
+                db.flush()
+                db.add(
+                    ProcessingEvent(
+                        document_id=doc.id,
+                        stage=DocumentStatus.DISCOVERED,
+                        level="INFO",
+                        message="Document discovered",
+                        data={"sha256": sha, "size": stat.st_size, "path": str(path)},
+                    )
+                )
+
+            # Upload original (idempotent) and record artifact if missing.
+            key = key_original_pdf(doc.id, sha)
+            try:
+                if not storage.exists(storage.originals_bucket, key):
+                    storage.put_file(
+                        bucket=storage.originals_bucket,
+                        key=key,
+                        path=path,
+                        content_type="application/pdf",
+                        metadata={"sha256": sha, "original-name": path.name[:255]},
+                    )
+                _ensure_artifact(
+                    db,
+                    doc.id,
+                    ArtifactType.ORIGINAL_PDF,
+                    storage.originals_bucket,
+                    key,
+                    sha,
+                )
+                if doc.status == DocumentStatus.DISCOVERED:
+                    doc.status = DocumentStatus.STORED_ORIGINAL
+                    db.add(
+                        ProcessingEvent(
+                            document_id=doc.id,
+                            stage=DocumentStatus.STORED_ORIGINAL,
+                            level="INFO",
+                            message="Original stored to MinIO",
+                            data={"bucket": storage.originals_bucket, "key": key},
+                        )
+                    )
+            except Exception as exc:  # noqa: BLE001
+                logger.error("scan.store_failed", path=str(path), error=str(exc))
+                doc.status = DocumentStatus.FAILED
+                doc.error_message = f"store_original: {exc}"
+                db.add(
+                    ProcessingEvent(
+                        document_id=doc.id,
+                        stage="STORE_FAILED",
+                        level="ERROR",
+                        message=str(exc),
+                        data={"path": str(path)},
+                    )
+                )
+                yield DiscoveryRecord(path=path, sha256=sha, document_id=None, duplicate=False, invalid=True)
+                continue
+
+            yield DiscoveryRecord(
+                path=path, sha256=sha, document_id=doc.id, duplicate=bool(existing)
+            )
+
+
+def _ensure_artifact(
+    db, document_id: uuid.UUID, artifact_type: str, bucket: str, key: str, checksum: str | None
+) -> None:
+    existing = db.execute(
+        select(DocumentArtifact).where(
+            DocumentArtifact.document_id == document_id,
+            DocumentArtifact.artifact_type == artifact_type,
+            DocumentArtifact.storage_key == key,
+        )
+    ).scalar_one_or_none()
+    if existing:
+        return
+    db.add(
+        DocumentArtifact(
+            document_id=document_id,
+            artifact_type=artifact_type,
+            storage_bucket=bucket,
+            storage_key=key,
+            checksum=checksum,
+        )
+    )
diff --git a/app/ingestion/table_processor.py b/app/ingestion/table_processor.py
new file mode 100644
index 0000000..04135cc
--- /dev/null
+++ b/app/ingestion/table_processor.py
@@ -0,0 +1,84 @@
+"""Persists Docling tables to PostgreSQL + MinIO."""
+
+from __future__ import annotations
+
+import json
+import uuid
+
+from sqlalchemy import select
+
+from app.db.models import ArtifactType, DocumentArtifact, Table
+from app.ingestion.docling_extractor import ExtractedTable
+from app.logging_config import get_logger
+from app.storage.local_paths import key_table_json
+from app.storage.minio_client import MinioStorage
+
+logger = get_logger(__name__)
+
+
+def persist_tables(
+    db,
+    storage: MinioStorage,
+    document_id: uuid.UUID,
+    tables: list[ExtractedTable],
+    page_id_by_number: dict[int, uuid.UUID],
+) -> int:
+    count = 0
+    for t in tables:
+        existing = db.execute(
+            select(Table).where(Table.document_id == document_id, Table.table_index == t.table_index)
+        ).scalar_one_or_none()
+        if existing is None:
+            existing = Table(
+                document_id=document_id,
+                page_id=page_id_by_number.get(t.page_number),
+                page_number=t.page_number,
+                table_index=t.table_index,
+            )
+            db.add(existing)
+
+        existing.markdown = t.markdown or ""
+        existing.csv_text = t.csv_text
+        existing.json_data = t.json_data
+        existing.summary = _summary(t)
+        db.flush()
+
+        # Persist json blob to MinIO for large/inspectable copies.
+        if t.json_data:
+            key = key_table_json(document_id, t.table_index)
+            storage.put_bytes(
+                bucket=storage.derived_bucket,
+                key=key,
+                data=json.dumps(t.json_data, ensure_ascii=False).encode("utf-8"),
+                content_type="application/json",
+            )
+            _ensure_artifact(db, document_id, ArtifactType.TABLE_JSON, storage.derived_bucket, key, t.page_number)
+
+        count += 1
+    return count
+
+
+def _summary(t: ExtractedTable) -> str:
+    md = t.markdown or ""
+    n_rows = max(0, sum(1 for ln in md.splitlines() if ln.startswith("|")) - 2)
+    return f"Table {t.table_index} on page {t.page_number} ({n_rows} rows)."
+
+
+def _ensure_artifact(db, document_id: uuid.UUID, artifact_type: str, bucket: str, key: str, page: int | None) -> None:
+    existing = db.execute(
+        select(DocumentArtifact).where(
+            DocumentArtifact.document_id == document_id,
+            DocumentArtifact.storage_key == key,
+        )
+    ).scalar_one_or_none()
+    if existing:
+        return
+    db.add(
+        DocumentArtifact(
+            document_id=document_id,
+            artifact_type=artifact_type,
+            storage_bucket=bucket,
+            storage_key=key,
+            page_number=page,
+        )
+    )
diff --git a/app/logging_config.py b/app/logging_config.py
new file mode 100644
index 0000000..d06e564
--- /dev/null
+++ b/app/logging_config.py
@@ -0,0 +1,61 @@
+"""Structured logging via structlog with stdlib bridge.
+
+All modules use ``get_logger(__name__)`` and emit key/value pairs.
+"""
+
+from __future__ import annotations
+
+import logging
+import sys
+from typing import Any
+
+import structlog
+
+from app.config import settings
+
+
+def configure_logging() -> None:
+    level = getattr(logging, settings.app_log_level.upper(), logging.INFO)
+
+    timestamper = structlog.processors.TimeStamper(fmt="iso", utc=True)
+
+    shared_processors: list[Any] = [
+        structlog.contextvars.merge_contextvars,
+        structlog.stdlib.add_log_level,
+        structlog.stdlib.add_logger_name,
+        timestamper,
+        structlog.processors.StackInfoRenderer(),
+        structlog.processors.format_exc_info,
+    ]
+
+    structlog.configure(
+        processors=shared_processors
+        + [structlog.stdlib.ProcessorFormatter.wrap_for_formatter],
+        logger_factory=structlog.stdlib.LoggerFactory(),
+        wrapper_class=structlog.stdlib.BoundLogger,
+        cache_logger_on_first_use=True,
+    )
+
+    formatter = structlog.stdlib.ProcessorFormatter(
+        foreign_pre_chain=shared_processors,
+        processors=[
+            structlog.stdlib.ProcessorFormatter.remove_processors_meta,
+            structlog.processors.JSONRenderer(),
+        ],
+    )
+
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setFormatter(formatter)
+
+    root = logging.getLogger()
+    root.handlers.clear()
+    root.addHandler(handler)
+    root.setLevel(level)
+
+    # Quiet down noisy libs
+    for noisy in ("urllib3", "botocore", "s3transfer", "elasticsearch", "opensearch", "httpx"):
+        logging.getLogger(noisy).setLevel(logging.WARNING)
+
+
+def get_logger(name: str | None = None) -> structlog.stdlib.BoundLogger:
+    return structlog.get_logger(name)
diff --git a/app/main.py b/app/main.py
new file mode 100644
index 0000000..73687a3
--- /dev/null
+++ b/app/main.py
@@ -0,0 +1,52 @@
+"""FastAPI entrypoint."""
+
+from __future__ import annotations
+
+from contextlib import asynccontextmanager
+from typing import AsyncIterator
+
+from fastapi import FastAPI
+
+from app import __version__
+from app.api import routes_health, routes_ingestion, routes_search
+from app.config import settings
+from app.logging_config import configure_logging, get_logger
+
+configure_logging()
+logger = get_logger(__name__)
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI) -> AsyncIterator[None]:
+    logger.info("api.startup", version=__version__, prefix=settings.app_api_prefix)
+    # Best-effort bootstrap of MinIO buckets - non-fatal if it fails (health will reflect).
+    try:
+        from app.storage.minio_client import get_storage
+
+        get_storage().ensure_buckets()
+    except Exception as exc:  # noqa: BLE001
+        logger.warning("api.startup.minio_bootstrap_failed", error=str(exc))
+    yield
+    logger.info("api.shutdown")
+
+
+app = FastAPI(
+    title="LegacyHUB",
+    description="Hybrid lexical + semantic search over legacy PDF archives",
+    version=__version__,
+    lifespan=lifespan,
+)
+
+app.include_router(routes_health.router, prefix=settings.app_api_prefix)
+app.include_router(routes_ingestion.router, prefix=settings.app_api_prefix)
+app.include_router(routes_search.router, prefix=settings.app_api_prefix)
+
+
+@app.get("/")
+def root() -> dict[str, str]:
+    return {
+        "service": "LegacyHUB",
+        "version": __version__,
+        "api": settings.app_api_prefix,
+        "docs": "/docs",
+    }
diff --git a/app/storage/__init__.py b/app/storage/__init__.py
new file mode 100644
index 0000000..17d5dbb
--- /dev/null
+++ b/app/storage/__init__.py
@@ -0,0 +1,3 @@
+from app.storage.minio_client import MinioStorage, get_storage
+
+__all__ = ["MinioStorage", "get_storage"]
diff --git a/app/storage/local_paths.py b/app/storage/local_paths.py
new file mode 100644
index 0000000..cbc21e6
--- /dev/null
+++ b/app/storage/local_paths.py
@@ -0,0 +1,42 @@
+"""Storage key conventions for MinIO and local working paths."""
+
+from __future__ import annotations
+
+import uuid
+from pathlib import Path
+
+from app.config import settings
+
+
+def work_dir_for(document_id: uuid.UUID | str) -> Path:
+    p = Path(settings.app_work_dir) / str(document_id)
+    p.mkdir(parents=True, exist_ok=True)
+    return p
+
+
+def key_original_pdf(document_id: uuid.UUID | str, sha256: str) -> str:
+    return f"docs/{document_id}/original/{sha256}.pdf"
+
+
+def key_ocr_pdf(document_id: uuid.UUID | str) -> str:
+    return f"docs/{document_id}/ocr/ocr.pdf"
+
+
+def key_docling_json(document_id: uuid.UUID | str) -> str:
+    return f"docs/{document_id}/docling/document.json"
+
+
+def key_markdown(document_id: uuid.UUID | str) -> str:
+    return f"docs/{document_id}/docling/document.md"
+
+
+def key_page_image(document_id: uuid.UUID | str, page_number: int) -> str:
+    return f"docs/{document_id}/pages/p{page_number:05d}.png"
+
+
+def key_figure_crop(document_id: uuid.UUID | str, page_number: int, figure_index: int) -> str:
+    return f"docs/{document_id}/figures/p{page_number:05d}_f{figure_index:03d}.png"
+
+
+def key_table_json(document_id: uuid.UUID | str, table_index: int) -> str:
+    return f"docs/{document_id}/tables/t{table_index:04d}.json"
diff --git a/app/storage/minio_client.py b/app/storage/minio_client.py
new file mode 100644
index 0000000..a7e430f
--- /dev/null
+++ b/app/storage/minio_client.py
@@ -0,0 +1,110 @@
+"""Thin wrapper around the MinIO Python SDK with bucket bootstrap and retries."""
+
+from __future__ import annotations
+
+import io
+from functools import lru_cache
+from pathlib import Path
+from typing import Any
+
+from minio import Minio
+from minio.error import S3Error
+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
+
+from app.config import settings
+from app.logging_config import get_logger
+
+logger = get_logger(__name__)
+
+
+class MinioStorage:
+    def __init__(self, client: Minio | None = None) -> None:
+        self.client = client or Minio(
+            endpoint=settings.minio_endpoint,
+            access_key=settings.minio_access_key,
+            secret_key=settings.minio_secret_key,
+            secure=settings.minio_secure,
+            region=settings.minio_region,
+        )
+        self.originals_bucket = settings.minio_bucket_originals
+        self.derived_bucket = settings.minio_bucket_derived
+
+    def ensure_buckets(self) -> None:
+        for bucket in (self.originals_bucket, self.derived_bucket):
+            if not self.client.bucket_exists(bucket):
+                logger.info("minio.create_bucket", bucket=bucket)
+                self.client.make_bucket(bucket)
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=1, max=10),
+        retry=retry_if_exception_type(S3Error),
+        reraise=True,
+    )
+    def put_file(
+        self,
+        bucket: str,
+        key: str,
+        path: Path,
+        content_type: str = "application/octet-stream",
+        metadata: dict[str, str] | None = None,
+    ) -> None:
+        size = path.stat().st_size
+        with path.open("rb") as f:
+            self.client.put_object(
+                bucket_name=bucket,
+                object_name=key,
+                data=f,
+                length=size,
+                content_type=content_type,
+                metadata=metadata or {},
+            )
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=1, max=10),
+        retry=retry_if_exception_type(S3Error),
+        reraise=True,
+    )
+    def put_bytes(
+        self,
+        bucket: str,
+        key: str,
+        data: bytes,
+        content_type: str = "application/octet-stream",
+        metadata: dict[str, str] | None = None,
+    ) -> None:
+        self.client.put_object(
+            bucket_name=bucket,
+            object_name=key,
+            data=io.BytesIO(data),
+            length=len(data),
+            content_type=content_type,
+            metadata=metadata or {},
+        )
+
+    def get_to_path(self, bucket: str, key: str, dest: Path) -> Path:
+        dest.parent.mkdir(parents=True, exist_ok=True)
+        self.client.fget_object(bucket, key, str(dest))
+        return dest
+
+    def exists(self, bucket: str, key: str) -> bool:
+        try:
+            self.client.stat_object(bucket, key)
+            return True
+        except S3Error as exc:
+            if exc.code in {"NoSuchKey", "NoSuchObject"}:
+                return False
+            raise
+
+    def health(self) -> dict[str, Any]:
+        try:
+            buckets = [b.name for b in self.client.list_buckets()]
+            return {"status": "ok", "buckets": buckets}
+        except Exception as exc:
+            return {"status": "error", "error": str(exc)}
+
+
+@lru_cache(maxsize=1)
+def get_storage() -> MinioStorage:
+    return MinioStorage()
diff --git a/app/utils/__init__.py b/app/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/utils/hashing.py b/app/utils/hashing.py
new file mode 100644
index 0000000..95afa1f
--- /dev/null
+++ b/app/utils/hashing.py
@@ -0,0 +1,21 @@
+"""Streaming SHA256 hashing utilities for large files."""
+
+from __future__ import annotations
+
+import hashlib
+from pathlib import Path
+
+_CHUNK = 1024 * 1024  # 1 MiB
+
+
+def sha256_file(path: Path | str) -> str:
+    """Compute SHA256 of a file in streaming mode (constant memory)."""
+    h = hashlib.sha256()
+    with open(path, "rb") as f:
+        for block in iter(lambda: f.read(_CHUNK), b""):
+            h.update(block)
+    return h.hexdigest()
+
+
+def sha256_bytes(data: bytes) -> str:
+    return hashlib.sha256(data).hexdigest()
diff --git a/app/utils/language.py b/app/utils/language.py
new file mode 100644
index 0000000..b5f18cc
--- /dev/null
+++ b/app/utils/language.py
@@ -0,0 +1,24 @@
+"""Language detection helper - tolerant to short / mixed text."""
+
+from __future__ import annotations
+
+from langdetect import DetectorFactory, LangDetectException, detect_langs
+
+DetectorFactory.seed = 42
+
+
+def detect_language(text: str, min_chars: int = 40) -> str | None:
+    """Return ISO 639-1 language code or ``None`` if undetectable."""
+    if not text or len(text.strip()) < min_chars:
+        return None
+    try:
+        ranked = detect_langs(text)
+    except LangDetectException:
+        return None
+    if not ranked:
+        return None
+    return ranked[0].lang
+
+
+def has_cyrillic(text: str) -> bool:
+    return any("Ѐ" <= ch <= "ӿ" for ch in text)
diff --git a/app/utils/pdf.py b/app/utils/pdf.py
new file mode 100644
index 0000000..5573c76
--- /dev/null
+++ b/app/utils/pdf.py
@@ -0,0 +1,36 @@
+"""PDF inspection helpers - decide whether OCR is required."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pikepdf
+from pdfminer.high_level import extract_text
+
+
+def page_count(path: Path | str) -> int:
+    with pikepdf.open(str(path)) as pdf:
+        return len(pdf.pages)
+
+
+def has_searchable_text(path: Path | str, sample_pages: int = 3, min_chars: int = 80) -> bool:
+    """Cheap check: extract text from first ``sample_pages`` and require ``min_chars``.
+
+    Returns False on any extraction error - safer to OCR than to skip.
+    """
+    try:
+        text = extract_text(str(path), maxpages=sample_pages) or ""
+    except Exception:
+        return False
+    return len(text.strip()) >= min_chars
+
+
+def is_pdf(path: Path | str) -> bool:
+    p = Path(path)
+    if not p.is_file() or p.suffix.lower() != ".pdf":
+        return False
+    try:
+        with open(p, "rb") as f:
+            return f.read(5) == b"%PDF-"
+    except OSError:
+        return False
diff --git a/app/utils/text_cleaning.py b/app/utils/text_cleaning.py
new file mode 100644
index 0000000..5cdec99
--- /dev/null
+++ b/app/utils/text_cleaning.py
@@ -0,0 +1,69 @@
+"""Conservative OCR text cleaning.
+
+Goals:
+- Drop hyphenation across line breaks (``инвен-\\nтарный`` -> ``инвентарный``).
+- Collapse runs of whitespace.
+- Strip control chars.
+- Preserve all non-letter characters that may carry meaning in legacy/technical
+  documents: digits, punctuation, slashes, dashes, dots, parentheses, etc.
+
+We do NOT lowercase, transliterate, or strip punctuation here. ``normalize_for_search``
+produces a more aggressive form for indexing, but the original ``text`` is always
+kept untouched for citation/display.
+"""
+
+from __future__ import annotations
+
+import re
+import unicodedata
+
+_CONTROL_CHARS = re.compile(r"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]")
+_SOFT_HYPHEN = "­"
+_MULTI_WS = re.compile(r"[ \t ]+")
+_MULTI_NL = re.compile(r"\n{3,}")
+_HYPHEN_LINEBREAK = re.compile(r"(\w)[-‐‑‒–]\n(\w)")
+_TRAILING_WS = re.compile(r"[ \t]+\n")
+
+
+def clean_ocr_text(text: str) -> str:
+    if not text:
+        return ""
+    # Normalize unicode (NFC) to merge combining marks.
+    text = unicodedata.normalize("NFC", text)
+    text = text.replace(_SOFT_HYPHEN, "")
+    text = _CONTROL_CHARS.sub("", text)
+    text = _HYPHEN_LINEBREAK.sub(r"\1\2", text)
+    text = _TRAILING_WS.sub("\n", text)
+    text = _MULTI_WS.sub(" ", text)
+    text = _MULTI_NL.sub("\n\n", text)
+    return text.strip()
+
+
+_PUNCT_RUN = re.compile(r"[^\w\s/\-.,№#:()\[\]]+", flags=re.UNICODE)
+_WS_RUN = re.compile(r"\s+")
+
+
+def normalize_for_search(text: str) -> str:
+    """Lowercase + light normalization for full-text indexing.
+
+    Preserves digits, alphanumerics, slashes, dashes, dots, commas, ``№``, ``#``,
+    colons and brackets - all of which appear in document/serial/standard codes.
+    """
+    if not text:
+        return ""
+    text = clean_ocr_text(text)
+    text = text.lower()
+    text = _PUNCT_RUN.sub(" ", text)
+    text = _WS_RUN.sub(" ", text)
+    return text.strip()
+
+
+def looks_garbled(text: str, threshold: float = 0.35) -> bool:
+    """Heuristic: ratio of non-alphanumeric, non-whitespace chars."""
+    if not text:
+        return False
+    total = len(text)
+    if total < 20:
+        return False
+    bad = sum(1 for c in text if not (c.isalnum() or c.isspace() or c in ".,;:!?-/()[]№#"))
+    return (bad / total) > threshold
diff --git a/app/workers/__init__.py b/app/workers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/workers/celery_app.py b/app/workers/celery_app.py
new file mode 100644
index 0000000..cf6ddf3
--- /dev/null
+++ b/app/workers/celery_app.py
@@ -0,0 +1,28 @@
+"""Celery application instance."""
+
+from __future__ import annotations
+
+from celery import Celery
+
+from app.config import settings
+from app.logging_config import configure_logging
+
+configure_logging()
+
+celery_app = Celery(
+    "legacyhub",
+    broker=settings.redis_url,
+    backend=settings.redis_url,
+    include=["app.workers.tasks"],
+)
+
+celery_app.conf.update(
+    task_acks_late=True,
+    task_reject_on_worker_lost=True,
+    task_track_started=True,
+    worker_prefetch_multiplier=1,
+    task_time_limit=settings.max_document_timeout_seconds * 4,
+    task_soft_time_limit=settings.max_document_timeout_seconds * 3,
+    timezone="UTC",
+    enable_utc=True,
+)
diff --git a/app/workers/tasks.py b/app/workers/tasks.py
new file mode 100644
index 0000000..3d25a07
--- /dev/null
+++ b/app/workers/tasks.py
@@ -0,0 +1,22 @@
+"""Celery tasks - thin wrappers over pipeline functions."""
+
+from __future__ import annotations
+
+import uuid
+
+from celery.utils.log import get_task_logger
+
+from app.workers.celery_app import celery_app
+
+logger = get_task_logger(__name__)
+
+
+@celery_app.task(name="legacyhub.process_document", bind=True, max_retries=2, default_retry_delay=30)
+def process_document(self, document_id: str, run_id: str | None = None) -> dict:
+    from app.ingestion.pipeline import process_document_id
+
+    try:
+        return process_document_id(uuid.UUID(document_id), uuid.UUID(run_id) if run_id else None)
+    except Exception as exc:  # noqa: BLE001
+        logger.exception("worker.process_failed", extra={"document_id": document_id})
+        raise self.retry(exc=exc) from exc
diff --git a/data/input/.gitkeep b/data/input/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/data/work/.gitkeep b/data/work/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..cc70e0e
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,186 @@
+name: legacyhub
+
+x-common-env: &common-env
+  POSTGRES_HOST: ${POSTGRES_HOST:-postgres}
+  POSTGRES_PORT: ${POSTGRES_PORT:-5432}
+  POSTGRES_DB: ${POSTGRES_DB:-legacyhub}
+  POSTGRES_USER: ${POSTGRES_USER:-legacyhub}
+  POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-legacyhub}
+  MINIO_ENDPOINT: ${MINIO_ENDPOINT:-minio:9000}
+  MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-legacyhub}
+  MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-legacyhub-secret}
+  MINIO_BUCKET_ORIGINALS: ${MINIO_BUCKET_ORIGINALS:-legacyhub-originals}
+  MINIO_BUCKET_DERIVED: ${MINIO_BUCKET_DERIVED:-legacyhub-derived}
+  MINIO_SECURE: ${MINIO_SECURE:-false}
+  OPENSEARCH_HOST: ${OPENSEARCH_HOST:-opensearch}
+  OPENSEARCH_PORT: ${OPENSEARCH_PORT:-9200}
+  OPENSEARCH_USE_SSL: ${OPENSEARCH_USE_SSL:-false}
+  OPENSEARCH_VERIFY_CERTS: ${OPENSEARCH_VERIFY_CERTS:-false}
+  OPENSEARCH_INDEX_CHUNKS: ${OPENSEARCH_INDEX_CHUNKS:-legacy_chunks}
+  QDRANT_HOST: ${QDRANT_HOST:-qdrant}
+  QDRANT_PORT: ${QDRANT_PORT:-6333}
+  QDRANT_COLLECTION_CHUNKS: ${QDRANT_COLLECTION_CHUNKS:-legacy_chunks}
+  REDIS_URL: ${REDIS_URL:-redis://redis:6379/0}
+  OCR_LANGUAGES: ${OCR_LANGUAGES:-rus+eng}
+  OCR_ENABLED: ${OCR_ENABLED:-true}
+  DOCLING_OCR_ENABLED: ${DOCLING_OCR_ENABLED:-false}
+  MAX_DOCUMENT_TIMEOUT_SECONDS: ${MAX_DOCUMENT_TIMEOUT_SECONDS:-180}
+  EMBEDDING_MODEL: ${EMBEDDING_MODEL:-BAAI/bge-m3}
+  EMBEDDING_DEVICE: ${EMBEDDING_DEVICE:-cpu}
+  RERANKER_MODEL: ${RERANKER_MODEL:-BAAI/bge-reranker-v2-m3}
+  RERANKER_DEVICE: ${RERANKER_DEVICE:-cpu}
+  APP_LOG_LEVEL: ${APP_LOG_LEVEL:-INFO}
+  APP_INPUT_DIR: /data/input
+  APP_WORK_DIR: /data/work
+
+services:
+  postgres:
+    image: postgres:16-alpine
+    restart: unless-stopped
+    environment:
+      POSTGRES_DB: ${POSTGRES_DB:-legacyhub}
+      POSTGRES_USER: ${POSTGRES_USER:-legacyhub}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-legacyhub}
+    ports:
+      - "5432:5432"
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-legacyhub} -d ${POSTGRES_DB:-legacyhub}"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+
+  minio:
+    image: minio/minio:RELEASE.2024-08-29T01-40-52Z
+    restart: unless-stopped
+    command: server /data --console-address ":9001"
+    environment:
+      MINIO_ROOT_USER: ${MINIO_ACCESS_KEY:-legacyhub}
+      MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY:-legacyhub-secret}
+    ports:
+      - "9000:9000"
+      - "9001:9001"
+    volumes:
+      - minio_data:/data
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+
+  opensearch:
+    image: opensearchproject/opensearch:2.15.0
+    restart: unless-stopped
+    environment:
+      - discovery.type=single-node
+      - bootstrap.memory_lock=true
+      - "OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g"
+      - DISABLE_SECURITY_PLUGIN=true
+      - DISABLE_INSTALL_DEMO_CONFIG=true
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+      nofile:
+        soft: 65536
+        hard: 65536
+    ports:
+      - "9200:9200"
+      - "9600:9600"
+    volumes:
+      - opensearch_data:/usr/share/opensearch/data
+    healthcheck:
+      test: ["CMD-SHELL", "curl -fsS http://localhost:9200/_cluster/health | grep -q '\"status\":\"\\(green\\|yellow\\)\"'"]
+      interval: 15s
+      timeout: 10s
+      retries: 20
+
+  qdrant:
+    image: qdrant/qdrant:v1.11.3
+    restart: unless-stopped
+    ports:
+      - "6333:6333"
+      - "6334:6334"
+    volumes:
+      - qdrant_data:/qdrant/storage
+    healthcheck:
+      test: ["CMD-SHELL", "bash -c '</dev/tcp/127.0.0.1/6333'"]
+      interval: 15s
+      timeout: 5s
+      retries: 10
+
+  redis:
+    image: redis:7-alpine
+    restart: unless-stopped
+    ports:
+      - "6379:6379"
+    volumes:
+      - redis_data:/data
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+
+  api:
+    build:
+      context: .
+      dockerfile: docker/Dockerfile
+    image: legacyhub/api:latest
+    restart: unless-stopped
+    environment:
+      <<: *common-env
+      APP_HOST: 0.0.0.0
+      APP_PORT: 8000
+    command: ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
+    ports:
+      - "8000:8000"
+    depends_on:
+      postgres:
+        condition: service_healthy
+      minio:
+        condition: service_healthy
+      opensearch:
+        condition: service_healthy
+      qdrant:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+    volumes:
+      - ./data/input:/data/input
+      - ./data/work:/data/work
+      - hf_cache:/root/.cache/huggingface
+
+  worker:
+    build:
+      context: .
+      dockerfile: docker/Dockerfile
+    image: legacyhub/api:latest
+    restart: unless-stopped
+    environment:
+      <<: *common-env
+    command: ["celery", "-A", "app.workers.celery_app", "worker", "--loglevel=INFO", "--concurrency=2"]
+    depends_on:
+      postgres:
+        condition: service_healthy
+      minio:
+        condition: service_healthy
+      opensearch:
+        condition: service_healthy
+      qdrant:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+    volumes:
+      - ./data/input:/data/input
+      - ./data/work:/data/work
+      - hf_cache:/root/.cache/huggingface
+
+volumes:
+  postgres_data:
+  minio_data:
+  opensearch_data:
+  qdrant_data:
+  redis_data:
+  hf_cache:
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..59b5b1e
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,49 @@
+FROM python:3.11-slim-bookworm
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    DEBIAN_FRONTEND=noninteractive
+
+# System deps for OCRmyPDF + Tesseract (rus+eng) + Ghostscript + qpdf + image libs
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        curl \
+        ca-certificates \
+        ghostscript \
+        qpdf \
+        unpaper \
+        pngquant \
+        jbig2dec \
+        libxml2-dev \
+        libxslt1-dev \
+        libffi-dev \
+        libjpeg-dev \
+        libopenjp2-7 \
+        libtiff5-dev \
+        zlib1g-dev \
+        poppler-utils \
+        libmagic1 \
+        tesseract-ocr \
+        tesseract-ocr-eng \
+        tesseract-ocr-rus \
+        tesseract-ocr-osd \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY pyproject.toml /app/pyproject.toml
+
+RUN pip install --upgrade pip wheel setuptools && \
+    pip install -e .
+
+COPY app /app/app
+COPY scripts /app/scripts
+COPY alembic.ini /app/alembic.ini
+
+RUN mkdir -p /data/input /data/work
+
+EXPOSE 8000
+
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/frontend/.env.example b/frontend/.env.example
new file mode 100644
index 0000000..110ece7
--- /dev/null
+++ b/frontend/.env.example
@@ -0,0 +1,4 @@
+# Frontend environment
+VITE_API_BASE_URL=/api/v1
+VITE_USE_MOCK=true
+VITE_APP_NAME=LegacyHUB
diff --git a/frontend/.gitignore b/frontend/.gitignore
new file mode 100644
index 0000000..febf62d
--- /dev/null
+++ b/frontend/.gitignore
@@ -0,0 +1,7 @@
+node_modules
+dist
+.env
+.env.local
+.vite
+.DS_Store
+*.log
diff --git a/frontend/README.md b/frontend/README.md
new file mode 100644
index 0000000..e3833b1
--- /dev/null
+++ b/frontend/README.md
@@ -0,0 +1,140 @@
+# LegacyHUB · Frontend
+
+React + TypeScript + Vite frontend for **LegacyHUB**, the legacy-document
+indexing and AI search module of the **TeamHUB Suite**.
+
+This package ships:
+
+- the application shell (collapsible sidebar, top toolbar, breadcrumb nav,
+  global ⌘K command palette, light/dark theme, notification center,
+  user/profile menu);
+- nine pages: Dashboard, Documents, Ingestion Jobs, Search, Document Viewer,
+  Tables & Figures, Quality Control, System Health, Settings;
+- a hybrid AI search workspace with semantic / lexical / hybrid modes, live
+  suggestions, expandable filters, highlighted matches, reranker score
+  visualization and side-by-side chunk preview;
+- typed service layer (`src/services/*`) with Axios + TanStack Query and a
+  mock data backend you can toggle off when the backend is reachable.
+
+## Stack
+
+| Concern        | Library                                |
+|----------------|-----------------------------------------|
+| Bundler        | Vite 5                                  |
+| Language       | TypeScript 5.6                          |
+| UI             | React 18                                |
+| Styling        | TailwindCSS 3 + custom design tokens    |
+| Components     | shadcn/ui primitives (Radix + cva)      |
+| Animation      | Framer Motion                           |
+| Charts         | Recharts                                |
+| Server state   | TanStack Query                          |
+| Client state   | Zustand                                 |
+| Routing        | React Router v6                         |
+| HTTP           | Axios                                   |
+| Icons          | lucide-react                            |
+| Toasts         | sonner                                  |
+| Virtualization | @tanstack/react-virtual                 |
+
+## Quick start
+
+```bash
+cd frontend
+cp .env.example .env       # VITE_USE_MOCK=true for offline UI development
+npm install
+npm run dev                # http://localhost:5173
+```
+
+When the FastAPI backend is running, set `VITE_USE_MOCK=false` (or simply
+`VITE_API_BASE_URL=/api/v1` and let the Vite dev proxy at port 8000 handle
+routing). All API calls are isolated through `src/services/*.ts`.
+
+## Architecture
+
+```
+frontend/src/
+  app/         RouterProvider, QueryClient, TooltipProvider, theme bootstrap
+  pages/       One file per route — composed of widgets + primitives
+  layouts/     AppShell, Sidebar (collapsible), Topbar, Breadcrumbs, ⌘K palette
+  widgets/     Domain-specific composite components (KpiCard, Charts, Result cards,
+               PdfPreviewPane, ChunkPreview, ServiceHealthCard, Timeline)
+  components/
+    ui/        shadcn-style primitives — Button, Card, Tabs, Dialog, Select,
+               Tooltip, Popover, ScrollArea, Command, Skeleton, Progress, …
+    common/    Domain primitives — Logo, StatusChip, ConfidenceMeter,
+               QualityFlag, BlockTypeIcon, Highlight, EmptyState, PageHeader,
+               ThemeToggle
+  services/    Typed API layer (Axios) + TanStack hooks (one file per resource)
+    mock/      Deterministic mock data + simulated latency
+  hooks/       Wrappers around services exposing TanStack Query hooks
+  stores/      Zustand stores: uiStore (theme, sidebar, palette), searchStore
+  styles/      Tailwind layer + design tokens (HSL CSS variables)
+  lib/         cn(), formatBytes/Number/Percent/Duration, relativeTime, etc.
+```
+
+### Design system
+
+- **Palette** — white / light-gray surfaces with a single restrained green
+  accent (`--primary: 158 64% 32%`) matching QMS Hub.
+- **Surfaces** — three tiers: sunken (page background), default card, raised
+  (popovers / dialogs). Glass surfaces via `backdrop-blur` for the topbar.
+- **Corners** — `--radius: 14px` produces soft, premium edges across every
+  component.
+- **Shadows** — `shadow-soft` and `shadow-elevated` only. No harsh drop
+  shadows.
+- **Typography** — Inter variable, optical sizes, tabular numbers for data
+  cells, JetBrains Mono for IDs / paths / hashes.
+- **Motion** — Framer Motion `layoutId` for the active sidebar pill,
+  `fade-in-up` for KPI cards, animated tabs and result expansion.
+- **States** — skeleton shimmer instead of spinners wherever possible.
+
+### Key flows
+
+- **Hybrid search (`/search`)** — Debounced query → TanStack hook hits the
+  backend (or mock). Results are virtualized, scored, optionally reranked.
+  Picking a result hydrates a side-by-side ChunkPreview with the highlighted
+  excerpt, a page thumbnail, citation metadata, and quality flags.
+- **Documents (`/documents`)** — Virtualized table (TanStack Virtual)
+  supports thousands of rows. Filters: status, OCR threshold, "needs review",
+  free-text search. Clicking a row opens the viewer.
+- **Document Viewer (`/viewer/:id`)** — Split layout. Left pane: PDF page
+  thumbnails + synchronized large page preview with highlighted OCR blocks.
+  Right pane: extracted chunks / tables / figures / metadata, kept in lock-step
+  with the active page. Below: full pipeline timeline.
+- **Ingestion (`/ingestion`)** — Submit a folder path with `recursive` /
+  `force` toggles → optimistic queue, run history table with live progress
+  bars.
+- **Quality control (`/quality`)** — Three review queues (low confidence,
+  handwriting, failed extraction) with reviewer actions and an audit log.
+
+### Mock vs real backend
+
+`src/services/apiClient.ts` exports a constant `USE_MOCK`. When `true`, every
+service module short-circuits to `src/services/mock/mockData.ts` which
+generates deterministic, seeded data: 280 documents, dashboards, ingestion
+runs, search results, health and queue snapshots, and per-document detail
+(pages, chunks, tables, figures, timeline events).
+
+This lets the frontend be developed and demoed without the Python services
+running.
+
+### Accessibility
+
+- All interactive elements use `ring-focus` (visible 2px primary ring).
+- Sidebar nav exposes tooltips when collapsed.
+- Keyboard: `Ctrl/Cmd + K` opens the global command palette.
+
+### Responsive layout
+
+- ≥ 1280 px (xl, ultrawide) — three-column dashboards, side-by-side search.
+- 1024–1280 px (laptop) — two-column dashboards, stacked search.
+- < 1024 px — single column; sidebar collapses to icons only.
+
+## Scripts
+
+```bash
+npm run dev        # Vite dev server with /api proxy → :8000
+npm run build      # type-check + production bundle
+npm run preview    # preview build
+npm run lint
+npm run format
+```
diff --git a/frontend/index.html b/frontend/index.html
new file mode 100644
index 0000000..952fea6
--- /dev/null
+++ b/frontend/index.html
@@ -0,0 +1,19 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/favicon.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="theme-color" content="#059669" />
+    <title>LegacyHUB · TeamHUB Suite</title>
+    <link
+      rel="stylesheet"
+      href="https://rsms.me/inter/inter.css"
+      crossorigin="anonymous"
+    />
+  </head>
+  <body class="bg-background text-foreground antialiased">
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>
diff --git a/frontend/package.json b/frontend/package.json
new file mode 100644
index 0000000..03053d8
--- /dev/null
+++ b/frontend/package.json
@@ -0,0 +1,56 @@
+{
+  "name": "legacyhub-frontend",
+  "private": true,
+  "version": "0.1.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "tsc -b && vite build",
+    "preview": "vite preview --port 4173",
+    "lint": "eslint . --ext .ts,.tsx",
+    "format": "prettier --write \"src/**/*.{ts,tsx,css}\""
+  },
+  "dependencies": {
+    "@radix-ui/react-dialog": "^1.1.2",
+    "@radix-ui/react-dropdown-menu": "^2.1.2",
+    "@radix-ui/react-popover": "^1.1.2",
+    "@radix-ui/react-progress": "^1.1.0",
+    "@radix-ui/react-scroll-area": "^1.2.0",
+    "@radix-ui/react-select": "^2.1.2",
+    "@radix-ui/react-separator": "^1.1.0",
+    "@radix-ui/react-slot": "^1.1.0",
+    "@radix-ui/react-switch": "^1.1.1",
+    "@radix-ui/react-tabs": "^1.1.1",
+    "@radix-ui/react-tooltip": "^1.1.3",
+    "@tanstack/react-query": "^5.51.0",
+    "@tanstack/react-virtual": "^3.10.6",
+    "axios": "^1.7.7",
+    "class-variance-authority": "^0.7.0",
+    "clsx": "^2.1.1",
+    "cmdk": "^1.0.0",
+    "date-fns": "^3.6.0",
+    "framer-motion": "^11.5.4",
+    "lucide-react": "^0.451.0",
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1",
+    "react-router-dom": "^6.26.2",
+    "recharts": "^2.13.0",
+    "sonner": "^1.5.0",
+    "tailwind-merge": "^2.5.2",
+    "tailwindcss-animate": "^1.0.7",
+    "zustand": "^4.5.5"
+  },
+  "devDependencies": {
+    "@types/node": "^22.7.4",
+    "@types/react": "^18.3.11",
+    "@types/react-dom": "^18.3.0",
+    "@vitejs/plugin-react": "^4.3.2",
+    "autoprefixer": "^10.4.20",
+    "eslint": "^9.11.1",
+    "postcss": "^8.4.47",
+    "prettier": "^3.3.3",
+    "tailwindcss": "^3.4.13",
+    "typescript": "^5.6.2",
+    "vite": "^5.4.8"
+  }
+}
diff --git a/frontend/postcss.config.js b/frontend/postcss.config.js
new file mode 100644
index 0000000..2aa7205
--- /dev/null
+++ b/frontend/postcss.config.js
@@ -0,0 +1,6 @@
+export default {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+};
diff --git a/frontend/public/favicon.svg b/frontend/public/favicon.svg
new file mode 100644
index 0000000..eca6a7e
--- /dev/null
+++ b/frontend/public/favicon.svg
@@ -0,0 +1,10 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
+  <defs>
+    <linearGradient id="g" x1="0" x2="32" y1="0" y2="32" gradientUnits="userSpaceOnUse">
+      <stop offset="0" stop-color="#10b981"/>
+      <stop offset="1" stop-color="#047857"/>
+    </linearGradient>
+  </defs>
+  <rect width="32" height="32" rx="8" fill="url(#g)"/>
+  <path d="M9 9.5h6.2c2.9 0 4.6 1.5 4.6 4 0 2-1.1 3.3-3 3.8l3.6 5.2h-3l-3.3-5h-2.5v5H9V9.5zm5.9 5.7c1.5 0 2.4-.7 2.4-2 0-1.2-.9-1.9-2.4-1.9h-3.2v3.9h3.2z" fill="#fff"/>
+</svg>
diff --git a/frontend/src/app/App.tsx b/frontend/src/app/App.tsx
new file mode 100644
index 0000000..d62dfbe
--- /dev/null
+++ b/frontend/src/app/App.tsx
@@ -0,0 +1,11 @@
+import { RouterProvider } from "react-router-dom";
+import { AppProviders } from "@/app/providers";
+import { router } from "@/app/router";
+
+export function App() {
+  return (
+    <AppProviders>
+      <RouterProvider router={router} />
+    </AppProviders>
+  );
+}
diff --git a/frontend/src/app/providers.tsx b/frontend/src/app/providers.tsx
new file mode 100644
index 0000000..468c227
--- /dev/null
+++ b/frontend/src/app/providers.tsx
@@ -0,0 +1,34 @@
+import type { ReactNode } from "react";
+import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
+import { TooltipProvider } from "@/components/ui/tooltip";
+import { Toaster } from "sonner";
+
+const queryClient = new QueryClient({
+  defaultOptions: {
+    queries: {
+      retry: 1,
+      refetchOnWindowFocus: false,
+      staleTime: 30_000,
+    },
+  },
+});
+
+export function AppProviders({ children }: { children: ReactNode }) {
+  return (
+    <QueryClientProvider client={queryClient}>
+      <TooltipProvider delayDuration={150}>
+        {children}
+        <Toaster
+          position="bottom-right"
+          richColors
+          toastOptions={{
+            classNames: {
+              toast:
+                "rounded-xl border border-border/70 bg-card text-foreground shadow-elevated",
+            },
+          }}
+        />
+      </TooltipProvider>
+    </QueryClientProvider>
+  );
+}
diff --git a/frontend/src/app/router.tsx b/frontend/src/app/router.tsx
new file mode 100644
index 0000000..32f6278
--- /dev/null
+++ b/frontend/src/app/router.tsx
@@ -0,0 +1,31 @@
+import { createBrowserRouter, Navigate } from "react-router-dom";
+
+import { AppShell } from "@/layouts/AppShell";
+import { DashboardPage } from "@/pages/DashboardPage";
+import { DocumentsPage } from "@/pages/DocumentsPage";
+import { IngestionJobsPage } from "@/pages/IngestionJobsPage";
+import { SearchPage } from "@/pages/SearchPage";
+import { DocumentViewerPage } from "@/pages/DocumentViewerPage";
+import { TablesFiguresPage } from "@/pages/TablesFiguresPage";
+import { QualityControlPage } from "@/pages/QualityControlPage";
+import { SystemHealthPage } from "@/pages/SystemHealthPage";
+import { SettingsPage } from "@/pages/SettingsPage";
+
+export const router = createBrowserRouter([
+  {
+    element: <AppShell />,
+    children: [
+      { path: "/", element: <DashboardPage /> },
+      { path: "/documents", element: <DocumentsPage /> },
+      { path: "/ingestion", element: <IngestionJobsPage /> },
+      { path: "/search", element: <SearchPage /> },
+      { path: "/viewer", element: <DocumentViewerPage /> },
+      { path: "/viewer/:id", element: <DocumentViewerPage /> },
+      { path: "/tables-figures", element: <TablesFiguresPage /> },
+      { path: "/quality", element: <QualityControlPage /> },
+      { path: "/health", element: <SystemHealthPage /> },
+      { path: "/settings", element: <SettingsPage /> },
+      { path: "*", element: <Navigate to="/" replace /> },
+    ],
+  },
+]);
diff --git a/frontend/src/components/common/BlockTypeIcon.tsx b/frontend/src/components/common/BlockTypeIcon.tsx
new file mode 100644
index 0000000..08daf5f
--- /dev/null
+++ b/frontend/src/components/common/BlockTypeIcon.tsx
@@ -0,0 +1,44 @@
+import {
+  AlignLeft,
+  Heading,
+  List,
+  Table as TableIcon,
+  Image as ImageIcon,
+  PenLine,
+  Hash,
+  HelpCircle,
+} from "lucide-react";
+import { cn } from "@/lib/utils";
+
+const MAP: Record<string, { icon: typeof AlignLeft; tone: string }> = {
+  title: { icon: Hash, tone: "text-primary" },
+  heading: { icon: Heading, tone: "text-primary" },
+  paragraph: { icon: AlignLeft, tone: "text-muted-foreground" },
+  list: { icon: List, tone: "text-muted-foreground" },
+  table: { icon: TableIcon, tone: "text-warning" },
+  figure_caption: { icon: ImageIcon, tone: "text-primary-600" },
+  figure_description: { icon: ImageIcon, tone: "text-primary-600" },
+  handwriting: { icon: PenLine, tone: "text-destructive" },
+  unknown: { icon: HelpCircle, tone: "text-muted-foreground" },
+};
+
+export function BlockTypeIcon({
+  type,
+  className,
+}: {
+  type: string;
+  className?: string;
+}) {
+  const m = MAP[type] ?? MAP.unknown;
+  const Icon = m.icon;
+  return <Icon className={cn("h-3.5 w-3.5", m.tone, className)} aria-hidden />;
+}
+
+export function BlockTypeLabel({ type }: { type: string }) {
+  return (
+    <span className="inline-flex items-center gap-1 rounded-md border border-border/70 bg-muted/30 px-1.5 py-0.5 text-[10px] font-medium uppercase tracking-wide text-muted-foreground">
+      <BlockTypeIcon type={type} />
+      {type.replace(/_/g, " ")}
+    </span>
+  );
+}
diff --git a/frontend/src/components/common/ConfidenceMeter.tsx b/frontend/src/components/common/ConfidenceMeter.tsx
new file mode 100644
index 0000000..9d17bab
--- /dev/null
+++ b/frontend/src/components/common/ConfidenceMeter.tsx
@@ -0,0 +1,38 @@
+import { cn } from "@/lib/utils";
+
+export function ConfidenceMeter({
+  value,
+  showLabel = true,
+  className,
+}: {
+  value: number | null | undefined;
+  showLabel?: boolean;
+  className?: string;
+}) {
+  const pct = value == null ? null : Math.round(value * 100);
+  const tone =
+    pct == null
+      ? "bg-muted-foreground/30"
+      : pct >= 85
+      ? "bg-success"
+      : pct >= 65
+      ? "bg-primary"
+      : pct >= 45
+      ? "bg-warning"
+      : "bg-destructive";
+  return (
+    <div className={cn("flex items-center gap-2", className)}>
+      <div className="h-1.5 w-16 overflow-hidden rounded-full bg-muted">
+        <div
+          className={cn("h-full transition-all", tone)}
+          style={{ width: pct == null ? "100%" : `${pct}%` }}
+        />
+      </div>
+      {showLabel && (
+        <span className="font-mono text-xs tabular-nums text-muted-foreground">
+          {pct == null ? "—" : `${pct}%`}
+        </span>
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/components/common/EmptyState.tsx b/frontend/src/components/common/EmptyState.tsx
new file mode 100644
index 0000000..3b1a935
--- /dev/null
+++ b/frontend/src/components/common/EmptyState.tsx
@@ -0,0 +1,38 @@
+import type { ReactNode } from "react";
+import { cn } from "@/lib/utils";
+
+export function EmptyState({
+  icon,
+  title,
+  description,
+  action,
+  className,
+}: {
+  icon?: ReactNode;
+  title: string;
+  description?: string;
+  action?: ReactNode;
+  className?: string;
+}) {
+  return (
+    <div
+      className={cn(
+        "panel flex flex-col items-center justify-center gap-3 px-8 py-14 text-center",
+        className
+      )}
+    >
+      {icon && (
+        <div className="rounded-2xl border border-border/70 bg-accent/40 p-3 text-primary">
+          {icon}
+        </div>
+      )}
+      <div className="space-y-1">
+        <div className="text-sm font-semibold">{title}</div>
+        {description && (
+          <div className="max-w-sm text-xs text-muted-foreground">{description}</div>
+        )}
+      </div>
+      {action}
+    </div>
+  );
+}
diff --git a/frontend/src/components/common/Highlight.tsx b/frontend/src/components/common/Highlight.tsx
new file mode 100644
index 0000000..905d86f
--- /dev/null
+++ b/frontend/src/components/common/Highlight.tsx
@@ -0,0 +1,45 @@
+import { useMemo } from "react";
+
+export function Highlight({
+  text,
+  query,
+}: {
+  text: string;
+  query: string;
+}) {
+  const parts = useMemo(() => splitHighlight(text, query), [text, query]);
+  return (
+    <>
+      {parts.map((p, i) =>
+        p.match ? (
+          <mark
+            key={i}
+            className="rounded-[3px] bg-primary/20 px-0.5 text-primary-700 dark:text-primary-100"
+          >
+            {p.text}
+          </mark>
+        ) : (
+          <span key={i}>{p.text}</span>
+        )
+      )}
+    </>
+  );
+}
+
+function splitHighlight(text: string, query: string): { text: string; match: boolean }[] {
+  const q = query.trim();
+  if (!q) return [{ text, match: false }];
+  const tokens = Array.from(new Set(q.split(/\s+/).filter((t) => t.length >= 2)));
+  if (tokens.length === 0) return [{ text, match: false }];
+  const escaped = tokens.map((t) => t.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
+  const re = new RegExp(`(${escaped.join("|")})`, "gi");
+  const out: { text: string; match: boolean }[] = [];
+  let last = 0;
+  for (const m of text.matchAll(re)) {
+    if (m.index! > last) out.push({ text: text.slice(last, m.index), match: false });
+    out.push({ text: m[0], match: true });
+    last = m.index! + m[0].length;
+  }
+  if (last < text.length) out.push({ text: text.slice(last), match: false });
+  return out;
+}
diff --git a/frontend/src/components/common/Logo.tsx b/frontend/src/components/common/Logo.tsx
new file mode 100644
index 0000000..972286b
--- /dev/null
+++ b/frontend/src/components/common/Logo.tsx
@@ -0,0 +1,23 @@
+import { cn } from "@/lib/utils";
+
+export function Logo({ className, compact = false }: { className?: string; compact?: boolean }) {
+  return (
+    <div className={cn("flex items-center gap-2.5", className)}>
+      <div className="relative h-8 w-8 shrink-0 overflow-hidden rounded-lg shadow-soft">
+        <div className="absolute inset-0 bg-gradient-to-br from-primary-500 to-primary-700" />
+        <div className="absolute inset-0 grid place-items-center text-[15px] font-semibold tracking-tight text-white">
+          L
+        </div>
+        <div className="pointer-events-none absolute inset-0 ring-1 ring-inset ring-white/15" />
+      </div>
+      {!compact && (
+        <div className="leading-tight">
+          <div className="text-sm font-semibold tracking-tight text-foreground">LegacyHUB</div>
+          <div className="text-[10px] uppercase tracking-[0.18em] text-muted-foreground">
+            TeamHUB Suite
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/components/common/PageHeader.tsx b/frontend/src/components/common/PageHeader.tsx
new file mode 100644
index 0000000..e8f2840
--- /dev/null
+++ b/frontend/src/components/common/PageHeader.tsx
@@ -0,0 +1,28 @@
+import type { ReactNode } from "react";
+import { cn } from "@/lib/utils";
+
+export function PageHeader({
+  title,
+  description,
+  actions,
+  className,
+}: {
+  title: string;
+  description?: string;
+  actions?: ReactNode;
+  className?: string;
+}) {
+  return (
+    <header className={cn("flex flex-col gap-2 sm:flex-row sm:items-end sm:justify-between", className)}>
+      <div className="space-y-1">
+        <h1 className="text-2xl font-semibold tracking-tight text-foreground text-balance">
+          {title}
+        </h1>
+        {description && (
+          <p className="max-w-2xl text-sm text-muted-foreground">{description}</p>
+        )}
+      </div>
+      {actions && <div className="flex flex-wrap items-center gap-2">{actions}</div>}
+    </header>
+  );
+}
diff --git a/frontend/src/components/common/QualityFlag.tsx b/frontend/src/components/common/QualityFlag.tsx
new file mode 100644
index 0000000..5ab35db
--- /dev/null
+++ b/frontend/src/components/common/QualityFlag.tsx
@@ -0,0 +1,74 @@
+import { AlertTriangle, CheckCircle2, FileWarning, Hash, Image, PenLine, Table } from "lucide-react";
+import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
+import { cn } from "@/lib/utils";
+
+const FLAGS: Record<
+  string,
+  { label: string; icon: typeof AlertTriangle; tone: string }
+> = {
+  low_ocr_confidence: { label: "Low OCR confidence", icon: AlertTriangle, tone: "text-warning" },
+  very_short_text: { label: "Very short text", icon: Hash, tone: "text-muted-foreground" },
+  possible_garbled_text: { label: "Possible garbled text", icon: FileWarning, tone: "text-destructive" },
+  table_detected: { label: "Table detected", icon: Table, tone: "text-primary-600" },
+  figure_detected: { label: "Figure detected", icon: Image, tone: "text-primary-600" },
+  handwriting_detected: { label: "Handwriting detected", icon: PenLine, tone: "text-destructive" },
+  needs_manual_review: { label: "Needs manual review", icon: AlertTriangle, tone: "text-warning" },
+};
+
+export function QualityFlags({
+  flags,
+  compact = false,
+  className,
+}: {
+  flags: Record<string, boolean | undefined> | null | undefined;
+  compact?: boolean;
+  className?: string;
+}) {
+  const active = Object.entries(flags ?? {})
+    .filter(([k, v]) => v && FLAGS[k])
+    .map(([k]) => k);
+
+  if (active.length === 0) {
+    return (
+      <span className={cn("inline-flex items-center gap-1 text-xs text-success", className)}>
+        <CheckCircle2 className="h-3.5 w-3.5" />
+        Clean
+      </span>
+    );
+  }
+
+  return (
+    <div className={cn("flex flex-wrap items-center gap-1.5", className)}>
+      {active.map((key) => {
+        const f = FLAGS[key];
+        const Icon = f.icon;
+        if (compact) {
+          return (
+            <Tooltip key={key}>
+              <TooltipTrigger asChild>
+                <span
+                  className={cn(
+                    "inline-flex h-6 w-6 items-center justify-center rounded-full border border-border/60 bg-card",
+                    f.tone
+                  )}
+                >
+                  <Icon className="h-3.5 w-3.5" />
+                </span>
+              </TooltipTrigger>
+              <TooltipContent>{f.label}</TooltipContent>
+            </Tooltip>
+          );
+        }
+        return (
+          <span
+            key={key}
+            className="inline-flex items-center gap-1 rounded-md border border-border/60 bg-muted/40 px-2 py-0.5 text-[11px] font-medium"
+          >
+            <Icon className={cn("h-3 w-3", f.tone)} />
+            <span className="text-muted-foreground">{f.label}</span>
+          </span>
+        );
+      })}
+    </div>
+  );
+}
diff --git a/frontend/src/components/common/StatusChip.tsx b/frontend/src/components/common/StatusChip.tsx
new file mode 100644
index 0000000..4aa35aa
--- /dev/null
+++ b/frontend/src/components/common/StatusChip.tsx
@@ -0,0 +1,48 @@
+import { cn } from "@/lib/utils";
+
+const TONE: Record<string, { dot: string; text: string; bg: string }> = {
+  ok: { dot: "bg-success", text: "text-success", bg: "bg-success/10" },
+  active: { dot: "bg-primary", text: "text-primary-700 dark:text-primary-100", bg: "bg-primary/10" },
+  warning: { dot: "bg-warning", text: "text-warning", bg: "bg-warning/10" },
+  error: { dot: "bg-destructive", text: "text-destructive", bg: "bg-destructive/10" },
+  muted: { dot: "bg-muted-foreground", text: "text-muted-foreground", bg: "bg-muted/60" },
+};
+
+export type StatusTone = keyof typeof TONE;
+
+export function StatusChip({
+  tone = "muted",
+  label,
+  className,
+}: {
+  tone?: StatusTone;
+  label: string;
+  className?: string;
+}) {
+  const t = TONE[tone];
+  return (
+    <span
+      className={cn(
+        "inline-flex items-center gap-1.5 rounded-full px-2.5 py-0.5 text-xs font-medium",
+        t.bg,
+        t.text,
+        className
+      )}
+    >
+      <span className={cn("h-1.5 w-1.5 rounded-full", t.dot)} />
+      {label}
+    </span>
+  );
+}
+
+export function statusToTone(status: string): StatusTone {
+  const s = status?.toUpperCase();
+  if (!s) return "muted";
+  if (s.includes("FAILED") || s === "ERROR") return "error";
+  if (s === "INDEXING_COMPLETED" || s === "OK") return "ok";
+  if (s === "DISCOVERED" || s.endsWith("_STARTED") || s === "PENDING") return "active";
+  if (s === "OCR_COMPLETED" || s === "EXTRACTION_COMPLETED" || s === "CHUNKING_COMPLETED")
+    return "active";
+  if (s === "DEGRADED") return "warning";
+  return "muted";
+}
diff --git a/frontend/src/components/common/ThemeToggle.tsx b/frontend/src/components/common/ThemeToggle.tsx
new file mode 100644
index 0000000..c169f35
--- /dev/null
+++ b/frontend/src/components/common/ThemeToggle.tsx
@@ -0,0 +1,28 @@
+import { Moon, Sun, MonitorSmartphone } from "lucide-react";
+import { Button } from "@/components/ui/button";
+import { useUiStore } from "@/stores/uiStore";
+import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
+
+export function ThemeToggle() {
+  const theme = useUiStore((s) => s.theme);
+  const setTheme = useUiStore((s) => s.setTheme);
+
+  const next = theme === "light" ? "dark" : theme === "dark" ? "system" : "light";
+  const Icon = theme === "light" ? Sun : theme === "dark" ? Moon : MonitorSmartphone;
+
+  return (
+    <Tooltip>
+      <TooltipTrigger asChild>
+        <Button
+          variant="ghost"
+          size="icon-sm"
+          aria-label="Toggle theme"
+          onClick={() => setTheme(next)}
+        >
+          <Icon className="h-4 w-4" />
+        </Button>
+      </TooltipTrigger>
+      <TooltipContent>Theme: {theme}</TooltipContent>
+    </Tooltip>
+  );
+}
diff --git a/frontend/src/components/ui/badge.tsx b/frontend/src/components/ui/badge.tsx
new file mode 100644
index 0000000..8b80563
--- /dev/null
+++ b/frontend/src/components/ui/badge.tsx
@@ -0,0 +1,29 @@
+import * as React from "react";
+import { cva, type VariantProps } from "class-variance-authority";
+import { cn } from "@/lib/utils";
+
+const badgeVariants = cva(
+  "inline-flex items-center gap-1 rounded-full border px-2.5 py-0.5 text-xs font-medium transition-colors",
+  {
+    variants: {
+      variant: {
+        default: "border-transparent bg-primary/12 text-primary-700 dark:text-primary-100",
+        outline: "border-border bg-transparent text-foreground",
+        muted: "border-transparent bg-muted text-muted-foreground",
+        success: "border-transparent bg-success/15 text-success",
+        warning: "border-transparent bg-warning/15 text-warning",
+        destructive: "border-transparent bg-destructive/15 text-destructive",
+        accent: "border-transparent bg-accent text-accent-foreground",
+      },
+    },
+    defaultVariants: { variant: "default" },
+  }
+);
+
+export interface BadgeProps
+  extends React.HTMLAttributes<HTMLDivElement>,
+    VariantProps<typeof badgeVariants> {}
+
+export function Badge({ className, variant, ...props }: BadgeProps) {
+  return <div className={cn(badgeVariants({ variant }), className)} {...props} />;
+}
diff --git a/frontend/src/components/ui/button.tsx b/frontend/src/components/ui/button.tsx
new file mode 100644
index 0000000..a06125a
--- /dev/null
+++ b/frontend/src/components/ui/button.tsx
@@ -0,0 +1,56 @@
+import * as React from "react";
+import { Slot } from "@radix-ui/react-slot";
+import { cva, type VariantProps } from "class-variance-authority";
+import { cn } from "@/lib/utils";
+
+const buttonVariants = cva(
+  "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-lg text-sm font-medium ring-focus transition-all disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0",
+  {
+    variants: {
+      variant: {
+        default:
+          "bg-primary text-primary-foreground shadow-soft hover:bg-primary-700 active:translate-y-[0.5px]",
+        secondary:
+          "bg-secondary text-secondary-foreground hover:bg-secondary/80 border border-border/70",
+        outline:
+          "border border-border bg-transparent hover:bg-muted text-foreground",
+        ghost:
+          "hover:bg-muted text-foreground",
+        subtle:
+          "bg-accent text-accent-foreground hover:bg-accent/70",
+        destructive:
+          "bg-destructive text-destructive-foreground hover:bg-destructive/90",
+        link: "text-primary underline-offset-4 hover:underline",
+      },
+      size: {
+        sm: "h-8 px-3 text-xs",
+        default: "h-9 px-4",
+        lg: "h-11 px-6 text-base rounded-xl",
+        icon: "h-9 w-9",
+        "icon-sm": "h-8 w-8",
+      },
+    },
+    defaultVariants: {
+      variant: "default",
+      size: "default",
+    },
+  }
+);
+
+export interface ButtonProps
+  extends React.ButtonHTMLAttributes<HTMLButtonElement>,
+    VariantProps<typeof buttonVariants> {
+  asChild?: boolean;
+}
+
+export const Button = React.forwardRef<HTMLButtonElement, ButtonProps>(
+  ({ className, variant, size, asChild = false, ...props }, ref) => {
+    const Comp = asChild ? Slot : "button";
+    return (
+      <Comp ref={ref} className={cn(buttonVariants({ variant, size }), className)} {...props} />
+    );
+  }
+);
+Button.displayName = "Button";
+
+export { buttonVariants };
diff --git a/frontend/src/components/ui/card.tsx b/frontend/src/components/ui/card.tsx
new file mode 100644
index 0000000..799e30a
--- /dev/null
+++ b/frontend/src/components/ui/card.tsx
@@ -0,0 +1,53 @@
+import * as React from "react";
+import { cn } from "@/lib/utils";
+
+export const Card = React.forwardRef<HTMLDivElement, React.HTMLAttributes<HTMLDivElement>>(
+  ({ className, ...props }, ref) => (
+    <div ref={ref} className={cn("panel", className)} {...props} />
+  )
+);
+Card.displayName = "Card";
+
+export const CardHeader = React.forwardRef<HTMLDivElement, React.HTMLAttributes<HTMLDivElement>>(
+  ({ className, ...props }, ref) => (
+    <div ref={ref} className={cn("flex flex-col gap-1 p-5", className)} {...props} />
+  )
+);
+CardHeader.displayName = "CardHeader";
+
+export const CardTitle = React.forwardRef<HTMLDivElement, React.HTMLAttributes<HTMLDivElement>>(
+  ({ className, ...props }, ref) => (
+    <div
+      ref={ref}
+      className={cn("text-base font-semibold tracking-tight text-foreground", className)}
+      {...props}
+    />
+  )
+);
+CardTitle.displayName = "CardTitle";
+
+export const CardDescription = React.forwardRef<
+  HTMLParagraphElement,
+  React.HTMLAttributes<HTMLParagraphElement>
+>(({ className, ...props }, ref) => (
+  <p ref={ref} className={cn("text-sm text-muted-foreground", className)} {...props} />
+));
+CardDescription.displayName = "CardDescription";
+
+export const CardContent = React.forwardRef<HTMLDivElement, React.HTMLAttributes<HTMLDivElement>>(
+  ({ className, ...props }, ref) => (
+    <div ref={ref} className={cn("px-5 pb-5", className)} {...props} />
+  )
+);
+CardContent.displayName = "CardContent";
+
+export const CardFooter = React.forwardRef<HTMLDivElement, React.HTMLAttributes<HTMLDivElement>>(
+  ({ className, ...props }, ref) => (
+    <div
+      ref={ref}
+      className={cn("flex items-center justify-between gap-2 border-t border-border/60 px-5 py-3", className)}
+      {...props}
+    />
+  )
+);
+CardFooter.displayName = "CardFooter";
diff --git a/frontend/src/components/ui/command.tsx b/frontend/src/components/ui/command.tsx
new file mode 100644
index 0000000..92539ef
--- /dev/null
+++ b/frontend/src/components/ui/command.tsx
@@ -0,0 +1,90 @@
+import * as React from "react";
+import { Command as CommandPrimitive } from "cmdk";
+import { Search } from "lucide-react";
+import { cn } from "@/lib/utils";
+
+export const Command = React.forwardRef<
+  React.ElementRef<typeof CommandPrimitive>,
+  React.ComponentPropsWithoutRef<typeof CommandPrimitive>
+>(({ className, ...props }, ref) => (
+  <CommandPrimitive
+    ref={ref}
+    className={cn("flex h-full w-full flex-col overflow-hidden rounded-xl bg-popover", className)}
+    {...props}
+  />
+));
+Command.displayName = "Command";
+
+export const CommandInput = React.forwardRef<
+  React.ElementRef<typeof CommandPrimitive.Input>,
+  React.ComponentPropsWithoutRef<typeof CommandPrimitive.Input>
+>(({ className, ...props }, ref) => (
+  <div className="flex items-center gap-2 border-b border-border/70 px-3" cmdk-input-wrapper="">
+    <Search className="h-4 w-4 text-muted-foreground" />
+    <CommandPrimitive.Input
+      ref={ref}
+      className={cn(
+        "flex h-10 w-full bg-transparent text-sm outline-none placeholder:text-muted-foreground/70",
+        className
+      )}
+      {...props}
+    />
+  </div>
+));
+CommandInput.displayName = "CommandInput";
+
+export const CommandList = React.forwardRef<
+  React.ElementRef<typeof CommandPrimitive.List>,
+  React.ComponentPropsWithoutRef<typeof CommandPrimitive.List>
+>(({ className, ...props }, ref) => (
+  <CommandPrimitive.List
+    ref={ref}
+    className={cn("max-h-[320px] overflow-y-auto overflow-x-hidden p-1 scrollbar-thin", className)}
+    {...props}
+  />
+));
+CommandList.displayName = "CommandList";
+
+export const CommandEmpty = React.forwardRef<
+  React.ElementRef<typeof CommandPrimitive.Empty>,
+  React.ComponentPropsWithoutRef<typeof CommandPrimitive.Empty>
+>((props, ref) => (
+  <CommandPrimitive.Empty ref={ref} className="py-6 text-center text-sm text-muted-foreground" {...props} />
+));
+CommandEmpty.displayName = "CommandEmpty";
+
+export const CommandGroup = React.forwardRef<
+  React.ElementRef<typeof CommandPrimitive.Group>,
+  React.ComponentPropsWithoutRef<typeof CommandPrimitive.Group>
+>(({ className, ...props }, ref) => (
+  <CommandPrimitive.Group
+    ref={ref}
+    className={cn(
+      "overflow-hidden p-1 text-foreground [&_[cmdk-group-heading]]:px-2 [&_[cmdk-group-heading]]:py-1 [&_[cmdk-group-heading]]:text-xs [&_[cmdk-group-heading]]:font-medium [&_[cmdk-group-heading]]:uppercase [&_[cmdk-group-heading]]:tracking-wide [&_[cmdk-group-heading]]:text-muted-foreground",
+      className
+    )}
+    {...props}
+  />
+));
+CommandGroup.displayName = "CommandGroup";
+
+export const CommandItem = React.forwardRef<
+  React.ElementRef<typeof CommandPrimitive.Item>,
+  React.ComponentPropsWithoutRef<typeof CommandPrimitive.Item>
+>(({ className, ...props }, ref) => (
+  <CommandPrimitive.Item
+    ref={ref}
+    className={cn(
+      "relative flex cursor-pointer select-none items-center gap-2 rounded-md px-2.5 py-1.5 text-sm outline-none transition-colors",
+      "data-[selected=true]:bg-muted data-[selected=true]:text-foreground",
+      "aria-disabled:pointer-events-none aria-disabled:opacity-50",
+      className
+    )}
+    {...props}
+  />
+));
+CommandItem.displayName = "CommandItem";
+
+export const CommandShortcut = ({ className, ...props }: React.HTMLAttributes<HTMLSpanElement>) => (
+  <span className={cn("ml-auto text-xs tracking-widest text-muted-foreground", className)} {...props} />
+);
diff --git a/frontend/src/components/ui/dialog.tsx b/frontend/src/components/ui/dialog.tsx
new file mode 100644
index 0000000..2c8385b
--- /dev/null
+++ b/frontend/src/components/ui/dialog.tsx
@@ -0,0 +1,76 @@
+import * as React from "react";
+import * as DialogPrimitive from "@radix-ui/react-dialog";
+import { X } from "lucide-react";
+import { cn } from "@/lib/utils";
+
+export const Dialog = DialogPrimitive.Root;
+export const DialogTrigger = DialogPrimitive.Trigger;
+export const DialogPortal = DialogPrimitive.Portal;
+export const DialogClose = DialogPrimitive.Close;
+
+export const DialogOverlay = React.forwardRef<
+  React.ElementRef<typeof DialogPrimitive.Overlay>,
+  React.ComponentPropsWithoutRef<typeof DialogPrimitive.Overlay>
+>(({ className, ...props }, ref) => (
+  <DialogPrimitive.Overlay
+    ref={ref}
+    className={cn(
+      "fixed inset-0 z-50 bg-black/40 backdrop-blur-sm",
+      "data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=closed]:animate-out data-[state=closed]:fade-out-0",
+      className
+    )}
+    {...props}
+  />
+));
+DialogOverlay.displayName = DialogPrimitive.Overlay.displayName;
+
+export const DialogContent = React.forwardRef<
+  React.ElementRef<typeof DialogPrimitive.Content>,
+  React.ComponentPropsWithoutRef<typeof DialogPrimitive.Content>
+>(({ className, children, ...props }, ref) => (
+  <DialogPortal>
+    <DialogOverlay />
+    <DialogPrimitive.Content
+      ref={ref}
+      className={cn(
+        "fixed left-1/2 top-1/2 z-50 grid w-full max-w-lg -translate-x-1/2 -translate-y-1/2 gap-4 panel-raised p-6",
+        "data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95",
+        className
+      )}
+      {...props}
+    >
+      {children}
+      <DialogPrimitive.Close className="absolute right-4 top-4 rounded-md p-1 text-muted-foreground hover:bg-muted ring-focus">
+        <X className="h-4 w-4" />
+        <span className="sr-only">Close</span>
+      </DialogPrimitive.Close>
+    </DialogPrimitive.Content>
+  </DialogPortal>
+));
+DialogContent.displayName = DialogPrimitive.Content.displayName;
+
+export const DialogHeader = ({ className, ...props }: React.HTMLAttributes<HTMLDivElement>) => (
+  <div className={cn("flex flex-col gap-1.5", className)} {...props} />
+);
+export const DialogTitle = React.forwardRef<
+  React.ElementRef<typeof DialogPrimitive.Title>,
+  React.ComponentPropsWithoutRef<typeof DialogPrimitive.Title>
+>(({ className, ...props }, ref) => (
+  <DialogPrimitive.Title
+    ref={ref}
+    className={cn("text-base font-semibold tracking-tight", className)}
+    {...props}
+  />
+));
+DialogTitle.displayName = DialogPrimitive.Title.displayName;
+export const DialogDescription = React.forwardRef<
+  React.ElementRef<typeof DialogPrimitive.Description>,
+  React.ComponentPropsWithoutRef<typeof DialogPrimitive.Description>
+>(({ className, ...props }, ref) => (
+  <DialogPrimitive.Description
+    ref={ref}
+    className={cn("text-sm text-muted-foreground", className)}
+    {...props}
+  />
+));
+DialogDescription.displayName = DialogPrimitive.Description.displayName;
diff --git a/frontend/src/components/ui/dropdown-menu.tsx b/frontend/src/components/ui/dropdown-menu.tsx
new file mode 100644
index 0000000..41c0485
--- /dev/null
+++ b/frontend/src/components/ui/dropdown-menu.tsx
@@ -0,0 +1,113 @@
+import * as React from "react";
+import * as DropdownMenuPrimitive from "@radix-ui/react-dropdown-menu";
+import { Check, ChevronRight } from "lucide-react";
+import { cn } from "@/lib/utils";
+
+export const DropdownMenu = DropdownMenuPrimitive.Root;
+export const DropdownMenuTrigger = DropdownMenuPrimitive.Trigger;
+export const DropdownMenuGroup = DropdownMenuPrimitive.Group;
+export const DropdownMenuPortal = DropdownMenuPrimitive.Portal;
+export const DropdownMenuSeparator = React.forwardRef<
+  React.ElementRef<typeof DropdownMenuPrimitive.Separator>,
+  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Separator>
+>(({ className, ...props }, ref) => (
+  <DropdownMenuPrimitive.Separator
+    ref={ref}
+    className={cn("-mx-1 my-1 h-px bg-border/70", className)}
+    {...props}
+  />
+));
+DropdownMenuSeparator.displayName = DropdownMenuPrimitive.Separator.displayName;
+
+export const DropdownMenuContent = React.forwardRef<
+  React.ElementRef<typeof DropdownMenuPrimitive.Content>,
+  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Content>
+>(({ className, sideOffset = 6, ...props }, ref) => (
+  <DropdownMenuPrimitive.Portal>
+    <DropdownMenuPrimitive.Content
+      ref={ref}
+      sideOffset={sideOffset}
+      className={cn(
+        "z-50 min-w-[12rem] overflow-hidden rounded-xl border border-border/70 bg-popover p-1 text-popover-foreground shadow-elevated",
+        "data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95",
+        className
+      )}
+      {...props}
+    />
+  </DropdownMenuPrimitive.Portal>
+));
+DropdownMenuContent.displayName = DropdownMenuPrimitive.Content.displayName;
+
+export const DropdownMenuItem = React.forwardRef<
+  React.ElementRef<typeof DropdownMenuPrimitive.Item>,
+  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Item> & { inset?: boolean }
+>(({ className, inset, ...props }, ref) => (
+  <DropdownMenuPrimitive.Item
+    ref={ref}
+    className={cn(
+      "relative flex cursor-pointer select-none items-center gap-2 rounded-md px-2.5 py-1.5 text-sm outline-none transition-colors",
+      "hover:bg-muted focus:bg-muted",
+      inset && "pl-8",
+      className
+    )}
+    {...props}
+  />
+));
+DropdownMenuItem.displayName = DropdownMenuPrimitive.Item.displayName;
+
+export const DropdownMenuLabel = React.forwardRef<
+  React.ElementRef<typeof DropdownMenuPrimitive.Label>,
+  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Label>
+>(({ className, ...props }, ref) => (
+  <DropdownMenuPrimitive.Label
+    ref={ref}
+    className={cn("px-2.5 py-1.5 text-xs font-medium uppercase tracking-wide text-muted-foreground", className)}
+    {...props}
+  />
+));
+DropdownMenuLabel.displayName = DropdownMenuPrimitive.Label.displayName;
+
+export const DropdownMenuCheckboxItem = React.forwardRef<
+  React.ElementRef<typeof DropdownMenuPrimitive.CheckboxItem>,
+  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.CheckboxItem>
+>(({ className, children, checked, ...props }, ref) => (
+  <DropdownMenuPrimitive.CheckboxItem
+    ref={ref}
+    checked={checked}
+    className={cn(
+      "relative flex cursor-pointer select-none items-center rounded-md py-1.5 pl-8 pr-2 text-sm outline-none transition-colors hover:bg-muted",
+      className
+    )}
+    {...props}
+  >
+    <span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
+      <DropdownMenuPrimitive.ItemIndicator>
+        <Check className="h-3.5 w-3.5" />
+      </DropdownMenuPrimitive.ItemIndicator>
+    </span>
+    {children}
+  </DropdownMenuPrimitive.CheckboxItem>
+));
+DropdownMenuCheckboxItem.displayName = DropdownMenuPrimitive.CheckboxItem.displayName;
+
+export const DropdownMenuShortcut = ({ className, ...props }: React.HTMLAttributes<HTMLSpanElement>) => (
+  <span className={cn("ml-auto text-xs tracking-widest text-muted-foreground", className)} {...props} />
+);
+
+export const DropdownMenuSubTrigger = React.forwardRef<
+  React.ElementRef<typeof DropdownMenuPrimitive.SubTrigger>,
+  React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.SubTrigger>
+>(({ className, children, ...props }, ref) => (
+  <DropdownMenuPrimitive.SubTrigger
+    ref={ref}
+    className={cn(
+      "flex cursor-pointer select-none items-center rounded-md px-2.5 py-1.5 text-sm outline-none hover:bg-muted",
+      className
+    )}
+    {...props}
+  >
+    {children}
+    <ChevronRight className="ml-auto h-4 w-4" />
+  </DropdownMenuPrimitive.SubTrigger>
+));
+DropdownMenuSubTrigger.displayName = DropdownMenuPrimitive.SubTrigger.displayName;
diff --git a/frontend/src/components/ui/input.tsx b/frontend/src/components/ui/input.tsx
new file mode 100644
index 0000000..6d61483
--- /dev/null
+++ b/frontend/src/components/ui/input.tsx
@@ -0,0 +1,21 @@
+import * as React from "react";
+import { cn } from "@/lib/utils";
+
+export type InputProps = React.InputHTMLAttributes<HTMLInputElement>;
+
+export const Input = React.forwardRef<HTMLInputElement, InputProps>(
+  ({ className, type = "text", ...props }, ref) => (
+    <input
+      ref={ref}
+      type={type}
+      className={cn(
+        "flex h-9 w-full rounded-lg border border-input bg-surface px-3 py-1.5 text-sm shadow-sm transition-colors",
+        "placeholder:text-muted-foreground/70",
+        "ring-focus disabled:cursor-not-allowed disabled:opacity-50",
+        className
+      )}
+      {...props}
+    />
+  )
+);
+Input.displayName = "Input";
diff --git a/frontend/src/components/ui/popover.tsx b/frontend/src/components/ui/popover.tsx
new file mode 100644
index 0000000..508b7bd
--- /dev/null
+++ b/frontend/src/components/ui/popover.tsx
@@ -0,0 +1,27 @@
+import * as React from "react";
+import * as PopoverPrimitive from "@radix-ui/react-popover";
+import { cn } from "@/lib/utils";
+
+export const Popover = PopoverPrimitive.Root;
+export const PopoverTrigger = PopoverPrimitive.Trigger;
+export const PopoverAnchor = PopoverPrimitive.Anchor;
+
+export const PopoverContent = React.forwardRef<
+  React.ElementRef<typeof PopoverPrimitive.Content>,
+  React.ComponentPropsWithoutRef<typeof PopoverPrimitive.Content>
+>(({ className, align = "center", sideOffset = 6, ...props }, ref) => (
+  <PopoverPrimitive.Portal>
+    <PopoverPrimitive.Content
+      ref={ref}
+      align={align}
+      sideOffset={sideOffset}
+      className={cn(
+        "z-50 w-72 rounded-xl border border-border/70 bg-popover p-3 text-popover-foreground shadow-elevated",
+        "data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95",
+        className
+      )}
+      {...props}
+    />
+  </PopoverPrimitive.Portal>
+));
+PopoverContent.displayName = PopoverPrimitive.Content.displayName;
diff --git a/frontend/src/components/ui/progress.tsx b/frontend/src/components/ui/progress.tsx
new file mode 100644
index 0000000..27fe701
--- /dev/null
+++ b/frontend/src/components/ui/progress.tsx
@@ -0,0 +1,22 @@
+import * as React from "react";
+import * as ProgressPrimitive from "@radix-ui/react-progress";
+import { cn } from "@/lib/utils";
+
+export const Progress = React.forwardRef<
+  React.ElementRef<typeof ProgressPrimitive.Root>,
+  React.ComponentPropsWithoutRef<typeof ProgressPrimitive.Root> & {
+    indicatorClassName?: string;
+  }
+>(({ className, value, indicatorClassName, ...props }, ref) => (
+  <ProgressPrimitive.Root
+    ref={ref}
+    className={cn("relative h-2 w-full overflow-hidden rounded-full bg-muted", className)}
+    {...props}
+  >
+    <ProgressPrimitive.Indicator
+      className={cn("h-full w-full flex-1 bg-primary transition-all", indicatorClassName)}
+      style={{ transform: `translateX(-${100 - (value ?? 0)}%)` }}
+    />
+  </ProgressPrimitive.Root>
+));
+Progress.displayName = "Progress";
diff --git a/frontend/src/components/ui/scroll-area.tsx b/frontend/src/components/ui/scroll-area.tsx
new file mode 100644
index 0000000..754a8ae
--- /dev/null
+++ b/frontend/src/components/ui/scroll-area.tsx
@@ -0,0 +1,36 @@
+import * as React from "react";
+import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area";
+import { cn } from "@/lib/utils";
+
+export const ScrollArea = React.forwardRef<
+  React.ElementRef<typeof ScrollAreaPrimitive.Root>,
+  React.ComponentPropsWithoutRef<typeof ScrollAreaPrimitive.Root>
+>(({ className, children, ...props }, ref) => (
+  <ScrollAreaPrimitive.Root ref={ref} className={cn("relative overflow-hidden", className)} {...props}>
+    <ScrollAreaPrimitive.Viewport className="h-full w-full rounded-[inherit]">
+      {children}
+    </ScrollAreaPrimitive.Viewport>
+    <ScrollBar />
+    <ScrollAreaPrimitive.Corner />
+  </ScrollAreaPrimitive.Root>
+));
+ScrollArea.displayName = "ScrollArea";
+
+const ScrollBar = React.forwardRef<
+  React.ElementRef<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>,
+  React.ComponentPropsWithoutRef<typeof ScrollAreaPrimitive.ScrollAreaScrollbar>
+>(({ className, orientation = "vertical", ...props }, ref) => (
+  <ScrollAreaPrimitive.ScrollAreaScrollbar
+    ref={ref}
+    orientation={orientation}
+    className={cn(
+      "flex touch-none select-none transition-colors",
+      orientation === "vertical" ? "h-full w-2 p-0.5" : "h-2 flex-col p-0.5",
+      className
+    )}
+    {...props}
+  >
+    <ScrollAreaPrimitive.ScrollAreaThumb className="relative flex-1 rounded-full bg-muted-foreground/30 hover:bg-muted-foreground/50 transition-colors" />
+  </ScrollAreaPrimitive.ScrollAreaScrollbar>
+));
+ScrollBar.displayName = "ScrollBar";
diff --git a/frontend/src/components/ui/select.tsx b/frontend/src/components/ui/select.tsx
new file mode 100644
index 0000000..19e25e6
--- /dev/null
+++ b/frontend/src/components/ui/select.tsx
@@ -0,0 +1,89 @@
+import * as React from "react";
+import * as SelectPrimitive from "@radix-ui/react-select";
+import { Check, ChevronDown } from "lucide-react";
+import { cn } from "@/lib/utils";
+
+export const Select = SelectPrimitive.Root;
+export const SelectGroup = SelectPrimitive.Group;
+export const SelectValue = SelectPrimitive.Value;
+
+export const SelectTrigger = React.forwardRef<
+  React.ElementRef<typeof SelectPrimitive.Trigger>,
+  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Trigger>
+>(({ className, children, ...props }, ref) => (
+  <SelectPrimitive.Trigger
+    ref={ref}
+    className={cn(
+      "flex h-9 w-full items-center justify-between gap-2 rounded-lg border border-input bg-surface px-3 text-sm shadow-sm transition-colors",
+      "ring-focus disabled:cursor-not-allowed disabled:opacity-50",
+      "[&>span]:truncate",
+      className
+    )}
+    {...props}
+  >
+    {children}
+    <SelectPrimitive.Icon asChild>
+      <ChevronDown className="h-4 w-4 text-muted-foreground" />
+    </SelectPrimitive.Icon>
+  </SelectPrimitive.Trigger>
+));
+SelectTrigger.displayName = SelectPrimitive.Trigger.displayName;
+
+export const SelectContent = React.forwardRef<
+  React.ElementRef<typeof SelectPrimitive.Content>,
+  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Content>
+>(({ className, children, position = "popper", ...props }, ref) => (
+  <SelectPrimitive.Portal>
+    <SelectPrimitive.Content
+      ref={ref}
+      position={position}
+      className={cn(
+        "relative z-50 max-h-72 min-w-[8rem] overflow-hidden rounded-xl border border-border/70 bg-popover text-popover-foreground shadow-elevated",
+        "data-[state=open]:animate-in data-[state=open]:fade-in-0 data-[state=open]:zoom-in-95",
+        position === "popper" && "translate-y-1",
+        className
+      )}
+      {...props}
+    >
+      <SelectPrimitive.Viewport className="p-1 max-h-72">
+        {children}
+      </SelectPrimitive.Viewport>
+    </SelectPrimitive.Content>
+  </SelectPrimitive.Portal>
+));
+SelectContent.displayName = SelectPrimitive.Content.displayName;
+
+export const SelectItem = React.forwardRef<
+  React.ElementRef<typeof SelectPrimitive.Item>,
+  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Item>
+>(({ className, children, ...props }, ref) => (
+  <SelectPrimitive.Item
+    ref={ref}
+    className={cn(
+      "relative flex w-full cursor-pointer select-none items-center rounded-md py-1.5 pl-8 pr-2 text-sm outline-none transition-colors",
+      "data-[highlighted]:bg-muted data-[disabled]:pointer-events-none data-[disabled]:opacity-50",
+      className
+    )}
+    {...props}
+  >
+    <span className="absolute left-2 flex h-3.5 w-3.5 items-center justify-center">
+      <SelectPrimitive.ItemIndicator>
+        <Check className="h-3.5 w-3.5" />
+      </SelectPrimitive.ItemIndicator>
+    </span>
+    <SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>
+  </SelectPrimitive.Item>
+));
+SelectItem.displayName = SelectPrimitive.Item.displayName;
+
+export const SelectSeparator = React.forwardRef<
+  React.ElementRef<typeof SelectPrimitive.Separator>,
+  React.ComponentPropsWithoutRef<typeof SelectPrimitive.Separator>
+>(({ className, ...props }, ref) => (
+  <SelectPrimitive.Separator
+    ref={ref}
+    className={cn("-mx-1 my-1 h-px bg-border/70", className)}
+    {...props}
+  />
+));
+SelectSeparator.displayName = SelectPrimitive.Separator.displayName;
diff --git a/frontend/src/components/ui/separator.tsx b/frontend/src/components/ui/separator.tsx
new file mode 100644
index 0000000..94d868d
--- /dev/null
+++ b/frontend/src/components/ui/separator.tsx
@@ -0,0 +1,21 @@
+import * as React from "react";
+import * as SeparatorPrimitive from "@radix-ui/react-separator";
+import { cn } from "@/lib/utils";
+
+export const Separator = React.forwardRef<
+  React.ElementRef<typeof SeparatorPrimitive.Root>,
+  React.ComponentPropsWithoutRef<typeof SeparatorPrimitive.Root>
+>(({ className, orientation = "horizontal", decorative = true, ...props }, ref) => (
+  <SeparatorPrimitive.Root
+    ref={ref}
+    decorative={decorative}
+    orientation={orientation}
+    className={cn(
+      "shrink-0 bg-border/70",
+      orientation === "horizontal" ? "h-px w-full" : "h-full w-px",
+      className
+    )}
+    {...props}
+  />
+));
+Separator.displayName = "Separator";
diff --git a/frontend/src/components/ui/skeleton.tsx b/frontend/src/components/ui/skeleton.tsx
new file mode 100644
index 0000000..f2cd4dc
--- /dev/null
+++ b/frontend/src/components/ui/skeleton.tsx
@@ -0,0 +1,5 @@
+import { cn } from "@/lib/utils";
+
+export function Skeleton({ className, ...props }: React.HTMLAttributes<HTMLDivElement>) {
+  return <div className={cn("skeleton-shimmer rounded-lg", className)} {...props} />;
+}
diff --git a/frontend/src/components/ui/switch.tsx b/frontend/src/components/ui/switch.tsx
new file mode 100644
index 0000000..248bdca
--- /dev/null
+++ b/frontend/src/components/ui/switch.tsx
@@ -0,0 +1,27 @@
+import * as React from "react";
+import * as SwitchPrimitives from "@radix-ui/react-switch";
+import { cn } from "@/lib/utils";
+
+export const Switch = React.forwardRef<
+  React.ElementRef<typeof SwitchPrimitives.Root>,
+  React.ComponentPropsWithoutRef<typeof SwitchPrimitives.Root>
+>(({ className, ...props }, ref) => (
+  <SwitchPrimitives.Root
+    ref={ref}
+    className={cn(
+      "peer inline-flex h-5 w-9 shrink-0 cursor-pointer items-center rounded-full border border-transparent transition-colors",
+      "data-[state=checked]:bg-primary data-[state=unchecked]:bg-muted",
+      "ring-focus disabled:cursor-not-allowed disabled:opacity-50",
+      className
+    )}
+    {...props}
+  >
+    <SwitchPrimitives.Thumb
+      className={cn(
+        "pointer-events-none block h-4 w-4 rounded-full bg-white shadow-soft transition-transform",
+        "data-[state=checked]:translate-x-4 data-[state=unchecked]:translate-x-0.5"
+      )}
+    />
+  </SwitchPrimitives.Root>
+));
+Switch.displayName = "Switch";
diff --git a/frontend/src/components/ui/tabs.tsx b/frontend/src/components/ui/tabs.tsx
new file mode 100644
index 0000000..8ba131a
--- /dev/null
+++ b/frontend/src/components/ui/tabs.tsx
@@ -0,0 +1,45 @@
+import * as React from "react";
+import * as TabsPrimitive from "@radix-ui/react-tabs";
+import { cn } from "@/lib/utils";
+
+export const Tabs = TabsPrimitive.Root;
+
+export const TabsList = React.forwardRef<
+  React.ElementRef<typeof TabsPrimitive.List>,
+  React.ComponentPropsWithoutRef<typeof TabsPrimitive.List>
+>(({ className, ...props }, ref) => (
+  <TabsPrimitive.List
+    ref={ref}
+    className={cn(
+      "inline-flex h-9 items-center justify-center gap-1 rounded-xl border border-border/70 bg-muted/40 p-1 text-muted-foreground",
+      className
+    )}
+    {...props}
+  />
+));
+TabsList.displayName = TabsPrimitive.List.displayName;
+
+export const TabsTrigger = React.forwardRef<
+  React.ElementRef<typeof TabsPrimitive.Trigger>,
+  React.ComponentPropsWithoutRef<typeof TabsPrimitive.Trigger>
+>(({ className, ...props }, ref) => (
+  <TabsPrimitive.Trigger
+    ref={ref}
+    className={cn(
+      "inline-flex items-center justify-center gap-1.5 whitespace-nowrap rounded-lg px-3 py-1 text-xs font-medium transition-all",
+      "ring-focus disabled:pointer-events-none disabled:opacity-50",
+      "data-[state=active]:bg-card data-[state=active]:text-foreground data-[state=active]:shadow-soft",
+      className
+    )}
+    {...props}
+  />
+));
+TabsTrigger.displayName = TabsPrimitive.Trigger.displayName;
+
+export const TabsContent = React.forwardRef<
+  React.ElementRef<typeof TabsPrimitive.Content>,
+  React.ComponentPropsWithoutRef<typeof TabsPrimitive.Content>
+>(({ className, ...props }, ref) => (
+  <TabsPrimitive.Content ref={ref} className={cn("mt-3 ring-focus", className)} {...props} />
+));
+TabsContent.displayName = TabsPrimitive.Content.displayName;
diff --git a/frontend/src/components/ui/tooltip.tsx b/frontend/src/components/ui/tooltip.tsx
new file mode 100644
index 0000000..18954eb
--- /dev/null
+++ b/frontend/src/components/ui/tooltip.tsx
@@ -0,0 +1,24 @@
+import * as React from "react";
+import * as TooltipPrimitive from "@radix-ui/react-tooltip";
+import { cn } from "@/lib/utils";
+
+export const TooltipProvider = TooltipPrimitive.Provider;
+export const Tooltip = TooltipPrimitive.Root;
+export const TooltipTrigger = TooltipPrimitive.Trigger;
+
+export const TooltipContent = React.forwardRef<
+  React.ElementRef<typeof TooltipPrimitive.Content>,
+  React.ComponentPropsWithoutRef<typeof TooltipPrimitive.Content>
+>(({ className, sideOffset = 6, ...props }, ref) => (
+  <TooltipPrimitive.Content
+    ref={ref}
+    sideOffset={sideOffset}
+    className={cn(
+      "z-50 overflow-hidden rounded-md border border-border/70 bg-popover px-2.5 py-1.5 text-xs text-popover-foreground shadow-elevated",
+      "animate-in fade-in-0 zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95",
+      className
+    )}
+    {...props}
+  />
+));
+TooltipContent.displayName = TooltipPrimitive.Content.displayName;
diff --git a/frontend/src/hooks/useDebounce.ts b/frontend/src/hooks/useDebounce.ts
new file mode 100644
index 0000000..65873b8
--- /dev/null
+++ b/frontend/src/hooks/useDebounce.ts
@@ -0,0 +1,10 @@
+import { useEffect, useState } from "react";
+
+export function useDebounce<T>(value: T, delay = 250): T {
+  const [debounced, setDebounced] = useState(value);
+  useEffect(() => {
+    const t = setTimeout(() => setDebounced(value), delay);
+    return () => clearTimeout(t);
+  }, [value, delay]);
+  return debounced;
+}
diff --git a/frontend/src/hooks/useDocuments.ts b/frontend/src/hooks/useDocuments.ts
new file mode 100644
index 0000000..1edeb5a
--- /dev/null
+++ b/frontend/src/hooks/useDocuments.ts
@@ -0,0 +1,27 @@
+import { useQuery, keepPreviousData } from "@tanstack/react-query";
+import { getDashboardStats, getDocument, listDocuments, type DocumentListParams } from "@/services/documents";
+
+export function useDocuments(params: DocumentListParams) {
+  return useQuery({
+    queryKey: ["documents", params],
+    queryFn: () => listDocuments(params),
+    placeholderData: keepPreviousData,
+    staleTime: 20_000,
+  });
+}
+
+export function useDocument(id: string | undefined) {
+  return useQuery({
+    queryKey: ["document", id],
+    queryFn: () => getDocument(id!),
+    enabled: Boolean(id),
+  });
+}
+
+export function useDashboardStats() {
+  return useQuery({
+    queryKey: ["dashboard", "stats"],
+    queryFn: getDashboardStats,
+    refetchInterval: 30_000,
+  });
+}
diff --git a/frontend/src/hooks/useHealth.ts b/frontend/src/hooks/useHealth.ts
new file mode 100644
index 0000000..df8bdea
--- /dev/null
+++ b/frontend/src/hooks/useHealth.ts
@@ -0,0 +1,19 @@
+import { useQuery } from "@tanstack/react-query";
+import { getHealth, getQueueState } from "@/services/health";
+
+export function useHealth() {
+  return useQuery({
+    queryKey: ["health"],
+    queryFn: getHealth,
+    refetchInterval: 15_000,
+    staleTime: 10_000,
+  });
+}
+
+export function useQueue() {
+  return useQuery({
+    queryKey: ["queue"],
+    queryFn: getQueueState,
+    refetchInterval: 10_000,
+  });
+}
diff --git a/frontend/src/hooks/useIngestion.ts b/frontend/src/hooks/useIngestion.ts
new file mode 100644
index 0000000..1a62000
--- /dev/null
+++ b/frontend/src/hooks/useIngestion.ts
@@ -0,0 +1,23 @@
+import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query";
+import { ingestFolder, listRuns } from "@/services/ingestion";
+import type { IngestFolderRequest } from "@/services/types";
+
+export function useIngestionRuns() {
+  return useQuery({
+    queryKey: ["ingestion-runs"],
+    queryFn: listRuns,
+    refetchInterval: 15_000,
+  });
+}
+
+export function useStartIngestion() {
+  const qc = useQueryClient();
+  return useMutation({
+    mutationFn: (req: IngestFolderRequest) => ingestFolder(req),
+    onSuccess: () => {
+      qc.invalidateQueries({ queryKey: ["ingestion-runs"] });
+      qc.invalidateQueries({ queryKey: ["documents"] });
+      qc.invalidateQueries({ queryKey: ["dashboard"] });
+    },
+  });
+}
diff --git a/frontend/src/hooks/useQuality.ts b/frontend/src/hooks/useQuality.ts
new file mode 100644
index 0000000..edf206b
--- /dev/null
+++ b/frontend/src/hooks/useQuality.ts
@@ -0,0 +1,10 @@
+import { useQuery } from "@tanstack/react-query";
+import { getQualityQueue, type QualityQueueKind } from "@/services/quality";
+
+export function useQualityQueue(kind: QualityQueueKind) {
+  return useQuery({
+    queryKey: ["quality", kind],
+    queryFn: () => getQualityQueue(kind),
+    staleTime: 30_000,
+  });
+}
diff --git a/frontend/src/hooks/useSearch.ts b/frontend/src/hooks/useSearch.ts
new file mode 100644
index 0000000..c3e3b1a
--- /dev/null
+++ b/frontend/src/hooks/useSearch.ts
@@ -0,0 +1,32 @@
+import { useQuery } from "@tanstack/react-query";
+import { search, suggest } from "@/services/search";
+import type { SearchFilters, SearchMode } from "@/services/types";
+
+export function useSearchResults(opts: {
+  query: string;
+  mode: SearchMode;
+  filters: SearchFilters;
+  limit: number;
+  enabled?: boolean;
+}) {
+  return useQuery({
+    queryKey: ["search", opts.query, opts.mode, opts.filters, opts.limit],
+    queryFn: () =>
+      search({
+        query: opts.query,
+        limit: opts.limit,
+        filters: opts.filters,
+        search_mode: opts.mode,
+      }),
+    enabled: opts.enabled !== false && opts.query.trim().length > 0,
+    staleTime: 60_000,
+  });
+}
+
+export function useSuggestions(query: string) {
+  return useQuery({
+    queryKey: ["search-suggest", query],
+    queryFn: () => suggest(query),
+    staleTime: 30_000,
+  });
+}
diff --git a/frontend/src/hooks/useTheme.ts b/frontend/src/hooks/useTheme.ts
new file mode 100644
index 0000000..7c65e43
--- /dev/null
+++ b/frontend/src/hooks/useTheme.ts
@@ -0,0 +1,15 @@
+import { useEffect } from "react";
+import { applyTheme, useUiStore } from "@/stores/uiStore";
+
+export function useThemeBootstrap() {
+  const theme = useUiStore((s) => s.theme);
+
+  useEffect(() => {
+    applyTheme(theme);
+    if (theme !== "system") return;
+    const mq = window.matchMedia("(prefers-color-scheme: dark)");
+    const fn = () => applyTheme("system");
+    mq.addEventListener("change", fn);
+    return () => mq.removeEventListener("change", fn);
+  }, [theme]);
+}
diff --git a/frontend/src/layouts/AppShell.tsx b/frontend/src/layouts/AppShell.tsx
new file mode 100644
index 0000000..fd612ec
--- /dev/null
+++ b/frontend/src/layouts/AppShell.tsx
@@ -0,0 +1,30 @@
+import { Outlet } from "react-router-dom";
+import { Sidebar } from "@/layouts/Sidebar";
+import { Topbar } from "@/layouts/Topbar";
+import { CommandPalette } from "@/layouts/CommandPalette";
+import { useThemeBootstrap } from "@/hooks/useTheme";
+
+export function AppShell() {
+  useThemeBootstrap();
+  return (
+    <div className="relative flex min-h-screen bg-background">
+      {/* Soft ambient backdrop */}
+      <div
+        aria-hidden
+        className="pointer-events-none fixed inset-0 -z-10 opacity-[0.45] dark:opacity-30"
+        style={{
+          background:
+            "radial-gradient(60% 50% at 18% 14%, hsl(var(--primary) / 0.16), transparent 70%), radial-gradient(40% 30% at 90% 0%, hsl(var(--primary) / 0.10), transparent 60%)",
+        }}
+      />
+      <Sidebar />
+      <div className="flex min-w-0 flex-1 flex-col">
+        <Topbar />
+        <main className="relative flex min-w-0 flex-1 flex-col gap-6 px-4 py-6 lg:px-8 lg:py-8 2xl:px-12">
+          <Outlet />
+        </main>
+      </div>
+      <CommandPalette />
+    </div>
+  );
+}
diff --git a/frontend/src/layouts/Breadcrumbs.tsx b/frontend/src/layouts/Breadcrumbs.tsx
new file mode 100644
index 0000000..809ab7c
--- /dev/null
+++ b/frontend/src/layouts/Breadcrumbs.tsx
@@ -0,0 +1,43 @@
+import { ChevronRight, Home } from "lucide-react";
+import { Link, useLocation } from "react-router-dom";
+import { NAV } from "@/layouts/navConfig";
+import { cn } from "@/lib/utils";
+
+export function Breadcrumbs({ className }: { className?: string }) {
+  const { pathname } = useLocation();
+  const segments = pathname.split("/").filter(Boolean);
+
+  const trail = segments.map((seg, i) => {
+    const url = "/" + segments.slice(0, i + 1).join("/");
+    const match = NAV.find((n) => n.to === url);
+    return { url, label: match?.label ?? prettify(seg) };
+  });
+
+  return (
+    <nav className={cn("flex items-center gap-1.5 text-xs text-muted-foreground", className)}>
+      <Link
+        to="/"
+        className="inline-flex items-center gap-1 rounded-md px-1.5 py-1 hover:bg-muted hover:text-foreground"
+      >
+        <Home className="h-3.5 w-3.5" />
+        <span className="font-medium">LegacyHUB</span>
+      </Link>
+      {trail.map((t, i) => (
+        <span key={t.url} className="flex items-center gap-1.5">
+          <ChevronRight className="h-3.5 w-3.5 text-muted-foreground/60" />
+          {i === trail.length - 1 ? (
+            <span className="font-medium text-foreground">{t.label}</span>
+          ) : (
+            <Link to={t.url} className="hover:text-foreground">
+              {t.label}
+            </Link>
+          )}
+        </span>
+      ))}
+    </nav>
+  );
+}
+
+function prettify(s: string): string {
+  return s.replace(/-/g, " ").replace(/\b\w/g, (m) => m.toUpperCase());
+}
diff --git a/frontend/src/layouts/CommandPalette.tsx b/frontend/src/layouts/CommandPalette.tsx
new file mode 100644
index 0000000..91f0523
--- /dev/null
+++ b/frontend/src/layouts/CommandPalette.tsx
@@ -0,0 +1,87 @@
+import { useEffect } from "react";
+import { useNavigate } from "react-router-dom";
+import { Dialog, DialogContent } from "@/components/ui/dialog";
+import {
+  Command,
+  CommandInput,
+  CommandList,
+  CommandGroup,
+  CommandItem,
+  CommandEmpty,
+  CommandShortcut,
+} from "@/components/ui/command";
+import { useUiStore } from "@/stores/uiStore";
+import { NAV } from "@/layouts/navConfig";
+
+export function CommandPalette() {
+  const open = useUiStore((s) => s.commandOpen);
+  const close = useUiStore((s) => s.closeCommand);
+  const toggle = useUiStore((s) => s.toggleCommand);
+  const navigate = useNavigate();
+
+  useEffect(() => {
+    const handler = (e: KeyboardEvent) => {
+      if ((e.metaKey || e.ctrlKey) && e.key.toLowerCase() === "k") {
+        e.preventDefault();
+        toggle();
+      }
+    };
+    window.addEventListener("keydown", handler);
+    return () => window.removeEventListener("keydown", handler);
+  }, [toggle]);
+
+  return (
+    <Dialog open={open} onOpenChange={(o) => (o ? null : close())}>
+      <DialogContent className="max-w-xl gap-0 overflow-hidden p-0">
+        <Command label="Global command palette">
+          <CommandInput placeholder="Search pages, documents, recent queries…" />
+          <CommandList>
+            <CommandEmpty>No matching results.</CommandEmpty>
+            <CommandGroup heading="Navigation">
+              {NAV.map((n) => (
+                <CommandItem
+                  key={n.to}
+                  value={n.label}
+                  onSelect={() => {
+                    navigate(n.to);
+                    close();
+                  }}
+                >
+                  <n.icon className="h-4 w-4 text-muted-foreground" />
+                  {n.label}
+                  {n.shortcut && <CommandShortcut>{n.shortcut}</CommandShortcut>}
+                </CommandItem>
+              ))}
+            </CommandGroup>
+            <CommandGroup heading="Actions">
+              <CommandItem
+                onSelect={() => {
+                  navigate("/ingestion");
+                  close();
+                }}
+              >
+                Start new ingestion run
+              </CommandItem>
+              <CommandItem
+                onSelect={() => {
+                  navigate("/quality");
+                  close();
+                }}
+              >
+                Review low-confidence queue
+              </CommandItem>
+              <CommandItem
+                onSelect={() => {
+                  navigate("/search");
+                  close();
+                }}
+              >
+                Open AI search workspace
+              </CommandItem>
+            </CommandGroup>
+          </CommandList>
+        </Command>
+      </DialogContent>
+    </Dialog>
+  );
+}
diff --git a/frontend/src/layouts/Sidebar.tsx b/frontend/src/layouts/Sidebar.tsx
new file mode 100644
index 0000000..c6350a9
--- /dev/null
+++ b/frontend/src/layouts/Sidebar.tsx
@@ -0,0 +1,171 @@
+import { NavLink } from "react-router-dom";
+import { ChevronsLeft, ChevronsRight, Sparkles } from "lucide-react";
+import { motion } from "framer-motion";
+
+import { Logo } from "@/components/common/Logo";
+import { Button } from "@/components/ui/button";
+import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
+import { Badge } from "@/components/ui/badge";
+import { useUiStore } from "@/stores/uiStore";
+import { GROUPS, NAV, type NavItem } from "@/layouts/navConfig";
+import { cn } from "@/lib/utils";
+
+export function Sidebar() {
+  const collapsed = useUiStore((s) => s.sidebarCollapsed);
+  const toggle = useUiStore((s) => s.toggleSidebar);
+
+  return (
+    <motion.aside
+      initial={false}
+      animate={{ width: collapsed ? 72 : 248 }}
+      transition={{ type: "spring", stiffness: 280, damping: 32 }}
+      className={cn(
+        "relative z-30 flex h-screen shrink-0 flex-col border-r border-border/70 bg-surface",
+        "shadow-[1px_0_0_rgba(15,23,42,0.02)]"
+      )}
+    >
+      <div className="flex h-14 items-center justify-between px-3">
+        <Logo compact={collapsed} />
+        <Button
+          variant="ghost"
+          size="icon-sm"
+          aria-label="Toggle sidebar"
+          onClick={toggle}
+          className="hidden lg:inline-flex"
+        >
+          {collapsed ? <ChevronsRight className="h-4 w-4" /> : <ChevronsLeft className="h-4 w-4" />}
+        </Button>
+      </div>
+
+      <div className="px-3 pb-2">
+        <PromoCard collapsed={collapsed} />
+      </div>
+
+      <nav className="flex-1 overflow-y-auto px-2 py-2 scrollbar-thin">
+        {(Object.keys(GROUPS) as NavItem["group"][]).map((group) => (
+          <SidebarGroup
+            key={group}
+            title={GROUPS[group]}
+            items={NAV.filter((n) => n.group === group)}
+            collapsed={collapsed}
+          />
+        ))}
+      </nav>
+
+      <div className="border-t border-border/70 p-3">
+        <div
+          className={cn(
+            "rounded-xl border border-border/60 bg-muted/30 px-3 py-2 text-[11px] leading-relaxed text-muted-foreground",
+            collapsed && "hidden"
+          )}
+        >
+          <div className="flex items-center gap-1.5 font-medium text-foreground">
+            <span className="h-1.5 w-1.5 rounded-full bg-success" />
+            All services healthy
+          </div>
+          <span>Last sync · 2m ago</span>
+        </div>
+      </div>
+    </motion.aside>
+  );
+}
+
+function SidebarGroup({
+  title,
+  items,
+  collapsed,
+}: {
+  title: string;
+  items: NavItem[];
+  collapsed: boolean;
+}) {
+  return (
+    <div className="mb-2">
+      <div
+        className={cn(
+          "px-2 pb-1 pt-3 text-[10px] font-medium uppercase tracking-[0.16em] text-muted-foreground/80",
+          collapsed && "sr-only"
+        )}
+      >
+        {title}
+      </div>
+      <ul className="flex flex-col gap-0.5">
+        {items.map((item) => (
+          <SidebarLink key={item.to} item={item} collapsed={collapsed} />
+        ))}
+      </ul>
+    </div>
+  );
+}
+
+function SidebarLink({ item, collapsed }: { item: NavItem; collapsed: boolean }) {
+  const Icon = item.icon;
+  const link = (
+    <NavLink
+      to={item.to}
+      end={item.to === "/"}
+      className={({ isActive }) =>
+        cn(
+          "group relative flex items-center gap-3 rounded-xl px-2.5 py-2 text-sm font-medium transition-colors",
+          "ring-focus",
+          isActive
+            ? "bg-primary/10 text-primary-700 dark:text-primary-100"
+            : "text-muted-foreground hover:bg-muted hover:text-foreground"
+        )
+      }
+    >
+      {({ isActive }) => (
+        <>
+          {isActive && (
+            <motion.span
+              layoutId="sidebar-active"
+              transition={{ type: "spring", stiffness: 380, damping: 32 }}
+              className="absolute left-0 top-1/2 -translate-y-1/2 h-6 w-[3px] rounded-r-full bg-primary"
+            />
+          )}
+          <Icon className="h-4 w-4 shrink-0" />
+          {!collapsed && <span className="truncate">{item.label}</span>}
+          {!collapsed && item.badge && (
+            <Badge variant="warning" className="ml-auto">
+              {item.badge}
+            </Badge>
+          )}
+          {!collapsed && item.shortcut && (
+            <span className="ml-auto font-mono text-[10px] uppercase tracking-widest text-muted-foreground/70">
+              {item.shortcut}
+            </span>
+          )}
+        </>
+      )}
+    </NavLink>
+  );
+
+  if (!collapsed) return <li>{link}</li>;
+
+  return (
+    <li>
+      <Tooltip>
+        <TooltipTrigger asChild>{link}</TooltipTrigger>
+        <TooltipContent side="right">{item.label}</TooltipContent>
+      </Tooltip>
+    </li>
+  );
+}
+
+function PromoCard({ collapsed }: { collapsed: boolean }) {
+  if (collapsed) return null;
+  return (
+    <div className="relative overflow-hidden rounded-xl border border-border/70 bg-gradient-to-br from-primary/8 via-transparent to-transparent p-3">
+      <div className="absolute -right-6 -top-6 h-20 w-20 rounded-full bg-primary/15 blur-2xl" aria-hidden />
+      <div className="relative flex items-start gap-2">
+        <Sparkles className="mt-0.5 h-4 w-4 text-primary" />
+        <div className="space-y-0.5">
+          <div className="text-[11px] font-medium text-foreground">AI Knowledge Console</div>
+          <p className="text-[11px] leading-relaxed text-muted-foreground">
+            Hybrid retrieval over your archive. Ask in plain language.
+          </p>
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/layouts/Topbar.tsx b/frontend/src/layouts/Topbar.tsx
new file mode 100644
index 0000000..699ce0e
--- /dev/null
+++ b/frontend/src/layouts/Topbar.tsx
@@ -0,0 +1,145 @@
+import { Bell, Command as CommandIcon, HelpCircle, Search } from "lucide-react";
+import { useNavigate } from "react-router-dom";
+
+import { Breadcrumbs } from "@/layouts/Breadcrumbs";
+import { Button } from "@/components/ui/button";
+import { ThemeToggle } from "@/components/common/ThemeToggle";
+import {
+  DropdownMenu,
+  DropdownMenuContent,
+  DropdownMenuItem,
+  DropdownMenuLabel,
+  DropdownMenuSeparator,
+  DropdownMenuTrigger,
+} from "@/components/ui/dropdown-menu";
+import { useUiStore } from "@/stores/uiStore";
+import { cn } from "@/lib/utils";
+import {
+  Popover,
+  PopoverContent,
+  PopoverTrigger,
+} from "@/components/ui/popover";
+import { Badge } from "@/components/ui/badge";
+import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
+
+export function Topbar() {
+  const openCommand = useUiStore((s) => s.openCommand);
+  const navigate = useNavigate();
+
+  return (
+    <header className="sticky top-0 z-20 flex h-14 items-center gap-3 border-b border-border/70 bg-background/80 px-4 backdrop-blur lg:px-6">
+      <Breadcrumbs className="hidden md:flex" />
+
+      <div className="ml-auto flex flex-1 items-center justify-end gap-2 md:flex-none md:gap-3">
+        <button
+          onClick={openCommand}
+          className={cn(
+            "group hidden h-9 w-full max-w-[420px] items-center gap-3 rounded-xl border border-border/70 bg-surface px-3 text-left text-sm text-muted-foreground shadow-sm transition-colors hover:border-primary/40 hover:bg-card md:flex"
+          )}
+        >
+          <Search className="h-4 w-4 text-muted-foreground" />
+          <span className="flex-1 truncate">Search documents, chunks, queries…</span>
+          <kbd className="hidden items-center gap-0.5 rounded-md border border-border/80 bg-muted/40 px-1.5 py-0.5 font-mono text-[10px] font-medium text-muted-foreground md:inline-flex">
+            <CommandIcon className="h-3 w-3" /> K
+          </kbd>
+        </button>
+
+        <Tooltip>
+          <TooltipTrigger asChild>
+            <Button variant="ghost" size="icon-sm" onClick={() => navigate("/search")}>
+              <Search className="h-4 w-4" />
+            </Button>
+          </TooltipTrigger>
+          <TooltipContent>Search</TooltipContent>
+        </Tooltip>
+
+        <ThemeToggle />
+
+        <NotificationCenter />
+
+        <Tooltip>
+          <TooltipTrigger asChild>
+            <Button variant="ghost" size="icon-sm" aria-label="Help">
+              <HelpCircle className="h-4 w-4" />
+            </Button>
+          </TooltipTrigger>
+          <TooltipContent>Docs & shortcuts</TooltipContent>
+        </Tooltip>
+
+        <UserMenu />
+      </div>
+    </header>
+  );
+}
+
+function NotificationCenter() {
+  return (
+    <Popover>
+      <PopoverTrigger asChild>
+        <Button variant="ghost" size="icon-sm" className="relative" aria-label="Notifications">
+          <Bell className="h-4 w-4" />
+          <span className="absolute right-1.5 top-1.5 h-1.5 w-1.5 rounded-full bg-primary ring-2 ring-background" />
+        </Button>
+      </PopoverTrigger>
+      <PopoverContent align="end" className="w-80">
+        <div className="mb-2 flex items-center justify-between">
+          <div className="text-sm font-semibold">Activity</div>
+          <Badge variant="muted" className="font-mono">3 new</Badge>
+        </div>
+        <ul className="divide-y divide-border/60 text-sm">
+          <li className="py-2.5">
+            <div className="flex items-center justify-between">
+              <span className="font-medium">Ingestion run #2284 completed</span>
+              <span className="text-[10px] text-muted-foreground">2m</span>
+            </div>
+            <div className="text-xs text-muted-foreground">482 docs · 9 failures</div>
+          </li>
+          <li className="py-2.5">
+            <div className="flex items-center justify-between">
+              <span className="font-medium">Reranker model warmed up</span>
+              <span className="text-[10px] text-muted-foreground">14m</span>
+            </div>
+            <div className="text-xs text-muted-foreground">BGE-reranker-v2-m3 loaded on CPU</div>
+          </li>
+          <li className="py-2.5">
+            <div className="flex items-center justify-between">
+              <span className="font-medium">Low OCR confidence queue grew</span>
+              <span className="text-[10px] text-muted-foreground">1h</span>
+            </div>
+            <div className="text-xs text-muted-foreground">17 new documents flagged for review</div>
+          </li>
+        </ul>
+      </PopoverContent>
+    </Popover>
+  );
+}
+
+function UserMenu() {
+  return (
+    <DropdownMenu>
+      <DropdownMenuTrigger asChild>
+        <button className="flex items-center gap-2 rounded-full border border-border/70 bg-card pl-1 pr-3 py-1 text-left transition-colors hover:bg-muted ring-focus">
+          <div className="grid h-7 w-7 place-items-center rounded-full bg-primary text-xs font-semibold text-primary-foreground">
+            VM
+          </div>
+          <div className="hidden text-xs leading-tight md:block">
+            <div className="font-medium text-foreground">Vadim Malanov</div>
+            <div className="text-[10px] text-muted-foreground">Architect · TeamHUB</div>
+          </div>
+        </button>
+      </DropdownMenuTrigger>
+      <DropdownMenuContent align="end" className="w-56">
+        <DropdownMenuLabel>Account</DropdownMenuLabel>
+        <DropdownMenuItem>Profile</DropdownMenuItem>
+        <DropdownMenuItem>Workspaces</DropdownMenuItem>
+        <DropdownMenuItem>API tokens</DropdownMenuItem>
+        <DropdownMenuSeparator />
+        <DropdownMenuLabel>TeamHUB Suite</DropdownMenuLabel>
+        <DropdownMenuItem>Switch to QMS Hub</DropdownMenuItem>
+        <DropdownMenuItem>Switch to Project Hub</DropdownMenuItem>
+        <DropdownMenuSeparator />
+        <DropdownMenuItem className="text-destructive">Sign out</DropdownMenuItem>
+      </DropdownMenuContent>
+    </DropdownMenu>
+  );
+}
diff --git a/frontend/src/layouts/navConfig.ts b/frontend/src/layouts/navConfig.ts
new file mode 100644
index 0000000..effe7eb
--- /dev/null
+++ b/frontend/src/layouts/navConfig.ts
@@ -0,0 +1,39 @@
+import {
+  LayoutDashboard,
+  FileText,
+  Workflow,
+  Search,
+  ScanLine,
+  Table2,
+  ShieldCheck,
+  Activity,
+  Settings,
+  type LucideIcon,
+} from "lucide-react";
+
+export interface NavItem {
+  to: string;
+  label: string;
+  icon: LucideIcon;
+  group: "primary" | "operations" | "system";
+  shortcut?: string;
+  badge?: string;
+}
+
+export const NAV: NavItem[] = [
+  { to: "/", label: "Dashboard", icon: LayoutDashboard, group: "primary", shortcut: "G D" },
+  { to: "/documents", label: "Documents", icon: FileText, group: "primary", shortcut: "G O" },
+  { to: "/search", label: "Search", icon: Search, group: "primary", shortcut: "G S" },
+  { to: "/viewer", label: "Document Viewer", icon: ScanLine, group: "primary" },
+  { to: "/ingestion", label: "Ingestion Jobs", icon: Workflow, group: "operations" },
+  { to: "/tables-figures", label: "Tables & Figures", icon: Table2, group: "operations" },
+  { to: "/quality", label: "Quality Control", icon: ShieldCheck, group: "operations", badge: "review" },
+  { to: "/health", label: "System Health", icon: Activity, group: "system" },
+  { to: "/settings", label: "Settings", icon: Settings, group: "system" },
+];
+
+export const GROUPS: Record<NavItem["group"], string> = {
+  primary: "Workspace",
+  operations: "Operations",
+  system: "System",
+};
diff --git a/frontend/src/lib/utils.ts b/frontend/src/lib/utils.ts
new file mode 100644
index 0000000..6251f49
--- /dev/null
+++ b/frontend/src/lib/utils.ts
@@ -0,0 +1,59 @@
+import { clsx, type ClassValue } from "clsx";
+import { twMerge } from "tailwind-merge";
+
+export function cn(...inputs: ClassValue[]) {
+  return twMerge(clsx(inputs));
+}
+
+export function formatBytes(bytes: number, decimals = 1): string {
+  if (!Number.isFinite(bytes) || bytes <= 0) return "0 B";
+  const units = ["B", "KB", "MB", "GB", "TB"];
+  const i = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1);
+  const value = bytes / Math.pow(1024, i);
+  return `${value.toFixed(decimals).replace(/\.0$/, "")} ${units[i]}`;
+}
+
+export function formatNumber(value: number): string {
+  if (!Number.isFinite(value)) return "—";
+  return new Intl.NumberFormat("en-US").format(value);
+}
+
+export function formatPercent(value: number, digits = 0): string {
+  return `${(value * 100).toFixed(digits)}%`;
+}
+
+export function formatDuration(ms: number): string {
+  if (!Number.isFinite(ms) || ms < 0) return "—";
+  if (ms < 1000) return `${Math.round(ms)} ms`;
+  const s = ms / 1000;
+  if (s < 60) return `${s.toFixed(s < 10 ? 1 : 0)} s`;
+  const m = s / 60;
+  if (m < 60) return `${m.toFixed(1)} min`;
+  const h = m / 60;
+  return `${h.toFixed(1)} h`;
+}
+
+export function relativeTime(iso: string): string {
+  const t = new Date(iso).getTime();
+  if (Number.isNaN(t)) return iso;
+  const diffSec = Math.round((t - Date.now()) / 1000);
+  const abs = Math.abs(diffSec);
+  const rtf = new Intl.RelativeTimeFormat("en", { numeric: "auto" });
+  if (abs < 60) return rtf.format(diffSec, "second");
+  if (abs < 3600) return rtf.format(Math.round(diffSec / 60), "minute");
+  if (abs < 86400) return rtf.format(Math.round(diffSec / 3600), "hour");
+  return rtf.format(Math.round(diffSec / 86400), "day");
+}
+
+export function truncate(text: string, max: number): string {
+  if (!text) return "";
+  return text.length > max ? text.slice(0, max - 1) + "…" : text;
+}
+
+export function classFromConfidence(value: number | null | undefined): string {
+  if (value == null) return "text-muted-foreground";
+  if (value >= 0.85) return "text-success";
+  if (value >= 0.65) return "text-primary";
+  if (value >= 0.45) return "text-warning";
+  return "text-destructive";
+}
diff --git a/frontend/src/main.tsx b/frontend/src/main.tsx
new file mode 100644
index 0000000..ad15499
--- /dev/null
+++ b/frontend/src/main.tsx
@@ -0,0 +1,10 @@
+import React from "react";
+import ReactDOM from "react-dom/client";
+import { App } from "@/app/App";
+import "@/styles/globals.css";
+
+ReactDOM.createRoot(document.getElementById("root")!).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>
+);
diff --git a/frontend/src/pages/DashboardPage.tsx b/frontend/src/pages/DashboardPage.tsx
new file mode 100644
index 0000000..ee1f54f
--- /dev/null
+++ b/frontend/src/pages/DashboardPage.tsx
@@ -0,0 +1,113 @@
+import { FileText, Layers, ShieldAlert, Sparkles, Cpu, Database } from "lucide-react";
+
+import { PageHeader } from "@/components/common/PageHeader";
+import { Button } from "@/components/ui/button";
+import { KpiCard } from "@/widgets/KpiCard";
+import { IngestionStatsChart } from "@/widgets/IngestionStatsChart";
+import { OCRQualityWidget } from "@/widgets/OCRQualityWidget";
+import { QueueWidget } from "@/widgets/QueueWidget";
+import { StorageWidget } from "@/widgets/StorageWidget";
+import { ServiceHealthCard } from "@/widgets/ServiceHealthCard";
+import { RecentRunsWidget } from "@/widgets/RecentRunsWidget";
+import { useDashboardStats } from "@/hooks/useDocuments";
+import { formatBytes, formatNumber, formatPercent } from "@/lib/utils";
+
+export function DashboardPage() {
+  const { data } = useDashboardStats();
+
+  return (
+    <>
+      <PageHeader
+        title="Knowledge operations dashboard"
+        description="Live view of ingestion throughput, OCR quality, and the search backbone powering the TeamHUB suite."
+        actions={
+          <>
+            <Button variant="outline" size="sm">Export snapshot</Button>
+            <Button size="sm" className="gap-1.5">
+              <Sparkles className="h-3.5 w-3.5" />
+              Open AI Search
+            </Button>
+          </>
+        }
+      />
+
+      <section className="grid grid-cols-1 gap-4 md:grid-cols-2 xl:grid-cols-4">
+        <KpiCard
+          label="Indexed documents"
+          value={formatNumber(data?.total_documents ?? 0)}
+          helper={`${formatNumber(data?.total_pages ?? 0)} pages · ${formatNumber(data?.total_chunks ?? 0)} chunks`}
+          delta={4.2}
+          icon={<FileText className="h-4 w-4" />}
+          tone="primary"
+          trend={data?.daily_ingest.slice(-12).map((d) => d.ingested) ?? []}
+        />
+        <KpiCard
+          label="OCR confidence"
+          value={formatPercent(data?.avg_ocr_confidence ?? 0, 1)}
+          helper="weighted by page count"
+          delta={1.3}
+          icon={<Layers className="h-4 w-4" />}
+          tone="success"
+          trend={data?.ocr_distribution.map((d) => d.count) ?? []}
+        />
+        <KpiCard
+          label="Needs manual review"
+          value={formatNumber(data?.needs_review ?? 0)}
+          helper="handwriting, garbled, or low confidence"
+          delta={-2.6}
+          icon={<ShieldAlert className="h-4 w-4" />}
+          tone="warning"
+        />
+        <KpiCard
+          label="Failed documents"
+          value={formatNumber(data?.failed_documents ?? 0)}
+          helper="retryable via reindex"
+          delta={-1.4}
+          icon={<Cpu className="h-4 w-4" />}
+          tone="destructive"
+        />
+      </section>
+
+      <section className="grid grid-cols-1 gap-4 xl:grid-cols-3">
+        <div className="xl:col-span-2">
+          <IngestionStatsChart data={data?.daily_ingest ?? []} />
+        </div>
+        <QueueWidget />
+      </section>
+
+      <section className="grid grid-cols-1 gap-4 xl:grid-cols-3">
+        <div className="xl:col-span-2">
+          <StorageWidget
+            totalBytes={data?.total_storage_bytes ?? 0}
+            growth={data?.storage_growth ?? []}
+          />
+        </div>
+        <OCRQualityWidget
+          distribution={data?.ocr_distribution ?? []}
+          avg={data?.avg_ocr_confidence ?? 0}
+        />
+      </section>
+
+      <section className="grid grid-cols-1 gap-4 xl:grid-cols-3">
+        <div className="xl:col-span-2">
+          <RecentRunsWidget />
+        </div>
+        <div className="space-y-4">
+          <ServiceHealthCard />
+          <div className="panel p-5">
+            <div className="flex items-center gap-2">
+              <Database className="h-4 w-4 text-primary" />
+              <div className="text-sm font-semibold">Storage</div>
+            </div>
+            <div className="mt-2 text-2xl font-semibold tracking-tight">
+              {formatBytes(data?.total_storage_bytes ?? 0)}
+            </div>
+            <div className="text-xs text-muted-foreground">
+              MinIO originals + derived artifacts (Markdown, Docling JSON, page images)
+            </div>
+          </div>
+        </div>
+      </section>
+    </>
+  );
+}
diff --git a/frontend/src/pages/DocumentViewerPage.tsx b/frontend/src/pages/DocumentViewerPage.tsx
new file mode 100644
index 0000000..d5d4e0e
--- /dev/null
+++ b/frontend/src/pages/DocumentViewerPage.tsx
@@ -0,0 +1,179 @@
+import { useState } from "react";
+import { useParams } from "react-router-dom";
+import { FileText, Image as ImageIcon, Layers, Table as TableIcon } from "lucide-react";
+
+import { PageHeader } from "@/components/common/PageHeader";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { ScrollArea } from "@/components/ui/scroll-area";
+import { EmptyState } from "@/components/common/EmptyState";
+import { ConfidenceMeter } from "@/components/common/ConfidenceMeter";
+import { BlockTypeLabel } from "@/components/common/BlockTypeIcon";
+import { QualityFlags } from "@/components/common/QualityFlag";
+import { PdfPreviewPane } from "@/widgets/PdfPreviewPane";
+import { DocumentTimeline } from "@/widgets/DocumentTimeline";
+import { useDocument, useDocuments } from "@/hooks/useDocuments";
+import { cn, formatBytes } from "@/lib/utils";
+
+export function DocumentViewerPage() {
+  const { id } = useParams<{ id?: string }>();
+  const { data: list } = useDocuments({ page_size: 12, status: "INDEXING_COMPLETED" });
+  const fallbackId = list?.items[0]?.id;
+  const effectiveId = id ?? fallbackId;
+  const { data: doc, isLoading } = useDocument(effectiveId);
+  const [activePage, setActivePage] = useState<number>(1);
+
+  if (!effectiveId) {
+    return <EmptyState title="No document selected" description="Pick a document from the list to inspect." />;
+  }
+  if (isLoading || !doc) {
+    return (
+      <div className="grid grid-cols-1 gap-4 xl:grid-cols-[1.1fr_1fr]">
+        <div className="skeleton-shimmer h-[70vh] rounded-2xl" />
+        <div className="skeleton-shimmer h-[70vh] rounded-2xl" />
+      </div>
+    );
+  }
+
+  return (
+    <>
+      <PageHeader
+        title={doc.original_file_name}
+        description={`${doc.pages} pages · ${doc.chunks} chunks · ${formatBytes(doc.file_size_bytes)} · ${doc.language_hint ?? "—"}`}
+        actions={
+          <>
+            <Button size="sm" variant="outline">Download original</Button>
+            <Button size="sm">Re-index</Button>
+          </>
+        }
+      />
+
+      <div className="flex flex-wrap items-center gap-2 text-xs">
+        <Badge variant="muted" className="font-mono">SHA256 · {doc.sha256.slice(0, 12)}…</Badge>
+        <Badge variant="outline" className="font-mono">{doc.source_path}</Badge>
+        <ConfidenceMeter value={doc.ocr_confidence} />
+        <QualityFlags flags={doc.flags} compact />
+      </div>
+
+      <div className="grid grid-cols-1 gap-4 xl:grid-cols-[1.1fr_1fr]">
+        <PdfPreviewPane
+          fileName={doc.original_file_name}
+          pages={doc.pages_data}
+          onPageChange={setActivePage}
+        />
+
+        <Card className="flex h-full flex-col">
+          <CardHeader>
+            <CardTitle>Extracted structure</CardTitle>
+            <CardDescription>Docling output synchronized with the page above.</CardDescription>
+          </CardHeader>
+          <CardContent className="flex flex-1 flex-col gap-3">
+            <Tabs defaultValue="chunks" className="flex flex-1 flex-col">
+              <TabsList>
+                <TabsTrigger value="chunks">
+                  <Layers className="h-3.5 w-3.5" /> Chunks
+                </TabsTrigger>
+                <TabsTrigger value="tables">
+                  <TableIcon className="h-3.5 w-3.5" /> Tables
+                </TabsTrigger>
+                <TabsTrigger value="figures">
+                  <ImageIcon className="h-3.5 w-3.5" /> Figures
+                </TabsTrigger>
+                <TabsTrigger value="metadata">
+                  <FileText className="h-3.5 w-3.5" /> Metadata
+                </TabsTrigger>
+              </TabsList>
+
+              <TabsContent value="chunks" className="flex-1 overflow-hidden">
+                <ScrollArea className="h-[460px] pr-2">
+                  <div className="space-y-2">
+                    {doc.chunks_data.map((c) => (
+                      <button
+                        key={c.id}
+                        onClick={() => setActivePage(c.page_number)}
+                        className={cn(
+                          "block w-full rounded-xl border border-border/70 bg-card px-3 py-2.5 text-left transition-colors hover:border-primary/40",
+                          c.page_number === activePage && "border-primary/60 bg-accent/30"
+                        )}
+                      >
+                        <div className="flex items-center gap-2 text-xs">
+                          <BlockTypeLabel type={c.block_type} />
+                          <span className="font-mono text-muted-foreground">p.{c.page_number}</span>
+                          <Badge variant="outline" className="ml-auto font-mono">
+                            #{c.chunk_index}
+                          </Badge>
+                        </div>
+                        <p className="mt-1.5 line-clamp-3 text-[13px] leading-relaxed text-foreground/90">
+                          {c.text}
+                        </p>
+                      </button>
+                    ))}
+                  </div>
+                </ScrollArea>
+              </TabsContent>
+
+              <TabsContent value="tables">
+                <div className="space-y-3">
+                  {doc.tables.map((t) => (
+                    <Card key={t.id}>
+                      <CardHeader>
+                        <CardTitle className="text-sm">Table {t.table_index + 1}</CardTitle>
+                        <CardDescription>{t.summary}</CardDescription>
+                      </CardHeader>
+                      <CardContent>
+                        <pre className="overflow-x-auto rounded-md border border-border/70 bg-muted/30 p-3 font-mono text-[12px] leading-relaxed text-foreground">
+                          {t.markdown}
+                        </pre>
+                      </CardContent>
+                    </Card>
+                  ))}
+                </div>
+              </TabsContent>
+
+              <TabsContent value="figures">
+                <div className="grid grid-cols-2 gap-3">
+                  {doc.figures.map((f) => (
+                    <Card key={f.id}>
+                      <CardContent className="space-y-2 p-3">
+                        <div className="bg-grid-faint relative aspect-video rounded-lg border border-border/60" />
+                        <div className="flex items-center justify-between text-xs">
+                          <Badge variant="outline" className="font-mono">p.{f.page_number}</Badge>
+                          <Badge variant="muted">figure #{f.figure_index + 1}</Badge>
+                        </div>
+                        <p className="text-xs text-muted-foreground">{f.caption ?? f.description}</p>
+                      </CardContent>
+                    </Card>
+                  ))}
+                </div>
+              </TabsContent>
+
+              <TabsContent value="metadata">
+                <div className="grid grid-cols-2 gap-3 text-sm">
+                  {[
+                    ["Document ID", doc.id],
+                    ["SHA256", doc.sha256],
+                    ["Source path", doc.source_path],
+                    ["Language hint", doc.language_hint ?? "—"],
+                    ["Pages", doc.pages],
+                    ["Chunks", doc.chunks],
+                    ["Status", doc.status],
+                    ["Size", formatBytes(doc.file_size_bytes)],
+                  ].map(([k, v]) => (
+                    <div key={String(k)} className="rounded-xl border border-border/70 bg-card px-3 py-2">
+                      <div className="text-[10px] uppercase tracking-wide text-muted-foreground">{k}</div>
+                      <div className="mt-0.5 truncate font-mono text-xs">{String(v)}</div>
+                    </div>
+                  ))}
+                </div>
+              </TabsContent>
+            </Tabs>
+          </CardContent>
+        </Card>
+      </div>
+
+      <DocumentTimeline events={doc.timeline} />
+    </>
+  );
+}
diff --git a/frontend/src/pages/DocumentsPage.tsx b/frontend/src/pages/DocumentsPage.tsx
new file mode 100644
index 0000000..54e3aa1
--- /dev/null
+++ b/frontend/src/pages/DocumentsPage.tsx
@@ -0,0 +1,197 @@
+import { useMemo, useRef, useState } from "react";
+import { useNavigate } from "react-router-dom";
+import { useVirtualizer } from "@tanstack/react-virtual";
+import { FileText, Filter, Inbox, Search, SlidersHorizontal } from "lucide-react";
+
+import { PageHeader } from "@/components/common/PageHeader";
+import { Card, CardContent } from "@/components/ui/card";
+import { Input } from "@/components/ui/input";
+import { Button } from "@/components/ui/button";
+import { Badge } from "@/components/ui/badge";
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
+import { ConfidenceMeter } from "@/components/common/ConfidenceMeter";
+import { StatusChip, statusToTone } from "@/components/common/StatusChip";
+import { QualityFlags } from "@/components/common/QualityFlag";
+import { EmptyState } from "@/components/common/EmptyState";
+import { useDocuments } from "@/hooks/useDocuments";
+import { useDebounce } from "@/hooks/useDebounce";
+import { formatBytes, formatNumber, relativeTime } from "@/lib/utils";
+
+export function DocumentsPage() {
+  const navigate = useNavigate();
+  const [query, setQuery] = useState("");
+  const [status, setStatus] = useState<string>("any");
+  const [needsReview, setNeedsReview] = useState(false);
+  const debouncedQuery = useDebounce(query, 220);
+
+  const { data, isLoading } = useDocuments({
+    q: debouncedQuery,
+    status: status === "any" ? undefined : status,
+    needs_review: needsReview,
+    page_size: 200,
+  });
+
+  const items = data?.items ?? [];
+  const parentRef = useRef<HTMLDivElement>(null);
+
+  const rowVirtualizer = useVirtualizer({
+    count: items.length,
+    getScrollElement: () => parentRef.current,
+    estimateSize: () => 64,
+    overscan: 12,
+  });
+
+  const totals = useMemo(
+    () => ({
+      docs: data?.total ?? 0,
+      pages: items.reduce((a, d) => a + d.pages, 0),
+      chunks: items.reduce((a, d) => a + d.chunks, 0),
+    }),
+    [data?.total, items]
+  );
+
+  return (
+    <>
+      <PageHeader
+        title="Documents"
+        description="Browse, filter, and inspect every PDF the platform has touched."
+        actions={
+          <>
+            <Button variant="outline" size="sm">
+              Bulk actions
+            </Button>
+            <Button size="sm" className="gap-1.5">
+              <Inbox className="h-3.5 w-3.5" />
+              Trigger ingestion
+            </Button>
+          </>
+        }
+      />
+
+      <Card className="overflow-hidden">
+        <div className="flex flex-col gap-3 border-b border-border/70 px-4 py-3 lg:flex-row lg:items-center">
+          <div className="relative flex-1">
+            <Search className="pointer-events-none absolute left-3 top-1/2 h-4 w-4 -translate-y-1/2 text-muted-foreground" />
+            <Input
+              value={query}
+              onChange={(e) => setQuery(e.target.value)}
+              placeholder="Search by file name, source path, hash…"
+              className="pl-9"
+            />
+          </div>
+          <div className="flex items-center gap-2">
+            <Select value={status} onValueChange={setStatus}>
+              <SelectTrigger className="w-44">
+                <Filter className="mr-1.5 h-3.5 w-3.5 text-muted-foreground" />
+                <SelectValue placeholder="Status" />
+              </SelectTrigger>
+              <SelectContent>
+                <SelectItem value="any">Any status</SelectItem>
+                <SelectItem value="INDEXING_COMPLETED">Indexed</SelectItem>
+                <SelectItem value="EXTRACTION_COMPLETED">Extracted</SelectItem>
+                <SelectItem value="OCR_FAILED">OCR failed</SelectItem>
+                <SelectItem value="EXTRACTION_FAILED">Extraction failed</SelectItem>
+                <SelectItem value="FAILED">Failed</SelectItem>
+              </SelectContent>
+            </Select>
+            <Button
+              size="sm"
+              variant={needsReview ? "subtle" : "outline"}
+              onClick={() => setNeedsReview((v) => !v)}
+              className="gap-1.5"
+            >
+              <SlidersHorizontal className="h-3.5 w-3.5" />
+              Needs review
+            </Button>
+          </div>
+        </div>
+
+        <div className="flex items-center justify-between gap-3 border-b border-border/70 bg-muted/30 px-4 py-2 text-xs text-muted-foreground">
+          <div className="flex flex-wrap gap-x-3 gap-y-1">
+            <span>
+              <span className="font-mono text-foreground">{formatNumber(totals.docs)}</span> documents
+            </span>
+            <span>
+              <span className="font-mono text-foreground">{formatNumber(totals.pages)}</span> pages
+            </span>
+            <span>
+              <span className="font-mono text-foreground">{formatNumber(totals.chunks)}</span> chunks
+            </span>
+          </div>
+          <Badge variant="muted" className="font-mono">virtualized</Badge>
+        </div>
+
+        <div ref={parentRef} className="max-h-[640px] overflow-y-auto scrollbar-thin">
+          <table className="w-full text-sm">
+            <thead className="sticky top-0 z-10 bg-card text-[10px] uppercase tracking-wide text-muted-foreground">
+              <tr className="border-b border-border/70">
+                <th className="px-4 py-2 text-left font-medium">Document</th>
+                <th className="px-4 py-2 text-left font-medium">Status</th>
+                <th className="px-4 py-2 text-left font-medium">OCR</th>
+                <th className="px-4 py-2 text-left font-medium">Flags</th>
+                <th className="px-4 py-2 text-right font-medium">Pages</th>
+                <th className="px-4 py-2 text-right font-medium">Chunks</th>
+                <th className="px-4 py-2 text-right font-medium">Size</th>
+                <th className="px-4 py-2 text-right font-medium">Updated</th>
+              </tr>
+            </thead>
+            <tbody>
+              <tr style={{ height: rowVirtualizer.getTotalSize() }}>
+                <td colSpan={8} className="p-0">
+                  <div className="relative" style={{ height: rowVirtualizer.getTotalSize() }}>
+                    {rowVirtualizer.getVirtualItems().map((v) => {
+                      const d = items[v.index];
+                      return (
+                        <button
+                          key={d.id}
+                          onClick={() => navigate(`/viewer/${d.id}`)}
+                          className="group absolute left-0 right-0 grid w-full grid-cols-[1.5fr_0.9fr_0.9fr_1fr_0.5fr_0.5fr_0.6fr_0.7fr] items-center gap-x-3 px-4 py-2.5 text-left transition-colors hover:bg-muted/40 border-b border-border/60"
+                          style={{ transform: `translateY(${v.start}px)`, height: `${v.size}px` }}
+                        >
+                          <div className="flex min-w-0 items-center gap-2.5">
+                            <span className="grid h-9 w-9 place-items-center rounded-lg border border-border/70 bg-card text-primary">
+                              <FileText className="h-4 w-4" />
+                            </span>
+                            <div className="min-w-0 leading-tight">
+                              <div className="truncate text-sm font-medium text-foreground">
+                                {d.original_file_name}
+                              </div>
+                              <div className="truncate font-mono text-[11px] text-muted-foreground">
+                                {d.source_path}
+                              </div>
+                            </div>
+                          </div>
+                          <StatusChip tone={statusToTone(d.status)} label={d.status} />
+                          <ConfidenceMeter value={d.ocr_confidence} />
+                          <QualityFlags flags={d.flags} compact />
+                          <div className="text-right font-mono text-xs tabular-nums">{d.pages}</div>
+                          <div className="text-right font-mono text-xs tabular-nums">{d.chunks}</div>
+                          <div className="text-right font-mono text-xs tabular-nums text-muted-foreground">
+                            {formatBytes(d.file_size_bytes)}
+                          </div>
+                          <div className="text-right text-xs text-muted-foreground">
+                            {relativeTime(d.updated_at)}
+                          </div>
+                        </button>
+                      );
+                    })}
+                  </div>
+                </td>
+              </tr>
+            </tbody>
+          </table>
+        </div>
+
+        {!isLoading && items.length === 0 && (
+          <CardContent className="py-10">
+            <EmptyState
+              icon={<FileText className="h-5 w-5" />}
+              title="No documents match those filters"
+              description="Adjust your search, drop the status filter, or trigger a new ingestion run from the Operations panel."
+            />
+          </CardContent>
+        )}
+      </Card>
+    </>
+  );
+}
diff --git a/frontend/src/pages/IngestionJobsPage.tsx b/frontend/src/pages/IngestionJobsPage.tsx
new file mode 100644
index 0000000..93d5f28
--- /dev/null
+++ b/frontend/src/pages/IngestionJobsPage.tsx
@@ -0,0 +1,167 @@
+import { useState } from "react";
+import { Loader2, PlayCircle } from "lucide-react";
+import { toast } from "sonner";
+
+import { PageHeader } from "@/components/common/PageHeader";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { Input } from "@/components/ui/input";
+import { Button } from "@/components/ui/button";
+import { Switch } from "@/components/ui/switch";
+import { StatusChip, statusToTone } from "@/components/common/StatusChip";
+import { Progress } from "@/components/ui/progress";
+import { formatNumber, relativeTime } from "@/lib/utils";
+import { useIngestionRuns, useStartIngestion } from "@/hooks/useIngestion";
+
+export function IngestionJobsPage() {
+  const { data: runs, isLoading } = useIngestionRuns();
+  const start = useStartIngestion();
+  const [path, setPath] = useState("/data/input");
+  const [recursive, setRecursive] = useState(true);
+  const [force, setForce] = useState(false);
+
+  function submit() {
+    start.mutate(
+      { path, recursive, force },
+      {
+        onSuccess: (res) => {
+          toast.success(`Run ${res.run_id.slice(0, 8)} queued`, {
+            description: `${res.queued} queued · ${res.skipped_duplicates} duplicates · ${res.invalid_files} invalid`,
+          });
+        },
+        onError: (err: unknown) =>
+          toast.error("Ingestion failed", {
+            description: err instanceof Error ? err.message : "Unknown error",
+          }),
+      }
+    );
+  }
+
+  return (
+    <>
+      <PageHeader
+        title="Ingestion jobs"
+        description="Schedule new ingestion runs and review the history of every batch operation."
+      />
+
+      <div className="grid grid-cols-1 gap-4 xl:grid-cols-3">
+        <Card>
+          <CardHeader>
+            <CardTitle>New run</CardTitle>
+            <CardDescription>Discover PDFs, OCR, extract and index.</CardDescription>
+          </CardHeader>
+          <CardContent className="space-y-3">
+            <label className="block space-y-1.5 text-xs font-medium uppercase tracking-wide text-muted-foreground">
+              <span>Source folder</span>
+              <Input value={path} onChange={(e) => setPath(e.target.value)} placeholder="/data/input" />
+            </label>
+            <ToggleRow label="Recursive" hint="Walk into all subdirectories" checked={recursive} onChange={setRecursive} />
+            <ToggleRow
+              label="Force re-process"
+              hint="Re-run pipeline for already-known SHA256 hashes"
+              checked={force}
+              onChange={setForce}
+            />
+            <Button onClick={submit} disabled={start.isPending} className="w-full">
+              {start.isPending ? (
+                <>
+                  <Loader2 className="h-4 w-4 animate-spin" /> Queuing…
+                </>
+              ) : (
+                <>
+                  <PlayCircle className="h-4 w-4" /> Start ingestion
+                </>
+              )}
+            </Button>
+          </CardContent>
+        </Card>
+
+        <div className="xl:col-span-2">
+          <Card>
+            <CardHeader>
+              <CardTitle>Run history</CardTitle>
+              <CardDescription>Most recent jobs across all sources</CardDescription>
+            </CardHeader>
+            <CardContent className="overflow-x-auto">
+              <table className="min-w-full text-sm">
+                <thead className="text-[10px] uppercase tracking-wide text-muted-foreground">
+                  <tr>
+                    <th className="px-3 py-2 text-left font-medium">Status</th>
+                    <th className="px-3 py-2 text-left font-medium">Source</th>
+                    <th className="px-3 py-2 text-left font-medium">Progress</th>
+                    <th className="px-3 py-2 text-right font-medium">Failed</th>
+                    <th className="px-3 py-2 text-right font-medium">Started</th>
+                    <th className="px-3 py-2 text-right font-medium">Finished</th>
+                  </tr>
+                </thead>
+                <tbody className="divide-y divide-border/60">
+                  {isLoading &&
+                    Array.from({ length: 4 }).map((_, i) => (
+                      <tr key={i}>
+                        <td colSpan={6} className="px-3 py-2">
+                          <div className="skeleton-shimmer h-7 w-full rounded" />
+                        </td>
+                      </tr>
+                    ))}
+                  {runs?.map((r) => {
+                    const pct = r.total_files > 0 ? Math.round((r.processed_files / r.total_files) * 100) : 0;
+                    return (
+                      <tr key={r.id} className="transition-colors hover:bg-muted/30">
+                        <td className="px-3 py-2.5">
+                          <StatusChip tone={statusToTone(r.status)} label={r.status} />
+                        </td>
+                        <td className="px-3 py-2.5 font-mono text-xs text-muted-foreground">
+                          {r.source_folder}
+                        </td>
+                        <td className="px-3 py-2.5">
+                          <div className="flex items-center gap-2">
+                            <Progress value={pct} className="h-1.5 w-40" />
+                            <span className="font-mono text-xs tabular-nums text-muted-foreground">
+                              {formatNumber(r.processed_files)}/{formatNumber(r.total_files)}
+                            </span>
+                          </div>
+                        </td>
+                        <td className="px-3 py-2.5 text-right font-mono text-xs tabular-nums">
+                          <span className={r.failed_files > 0 ? "text-destructive" : "text-muted-foreground"}>
+                            {formatNumber(r.failed_files)}
+                          </span>
+                        </td>
+                        <td className="px-3 py-2.5 text-right text-xs text-muted-foreground">
+                          {relativeTime(r.started_at)}
+                        </td>
+                        <td className="px-3 py-2.5 text-right text-xs text-muted-foreground">
+                          {r.finished_at ? relativeTime(r.finished_at) : "—"}
+                        </td>
+                      </tr>
+                    );
+                  })}
+                </tbody>
+              </table>
+            </CardContent>
+          </Card>
+        </div>
+      </div>
+    </>
+  );
+}
+
+function ToggleRow({
+  label,
+  hint,
+  checked,
+  onChange,
+}: {
+  label: string;
+  hint: string;
+  checked: boolean;
+  onChange: (v: boolean) => void;
+}) {
+  return (
+    <label className="flex cursor-pointer items-start justify-between gap-3 rounded-xl border border-border/70 bg-muted/20 p-3">
+      <div className="leading-tight">
+        <div className="text-sm font-medium">{label}</div>
+        <div className="text-xs text-muted-foreground">{hint}</div>
+      </div>
+      <Switch checked={checked} onCheckedChange={onChange} />
+    </label>
+  );
+}
diff --git a/frontend/src/pages/QualityControlPage.tsx b/frontend/src/pages/QualityControlPage.tsx
new file mode 100644
index 0000000..528a49c
--- /dev/null
+++ b/frontend/src/pages/QualityControlPage.tsx
@@ -0,0 +1,128 @@
+import { useState } from "react";
+import { CheckCircle2, FileWarning, PenLine, ShieldCheck } from "lucide-react";
+import { motion } from "framer-motion";
+
+import { PageHeader } from "@/components/common/PageHeader";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { Button } from "@/components/ui/button";
+import { Badge } from "@/components/ui/badge";
+import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
+import { ConfidenceMeter } from "@/components/common/ConfidenceMeter";
+import { QualityFlags } from "@/components/common/QualityFlag";
+import { useQualityQueue } from "@/hooks/useQuality";
+import { relativeTime } from "@/lib/utils";
+import type { QualityQueueKind } from "@/services/quality";
+
+const TABS: { kind: QualityQueueKind; label: string; icon: typeof PenLine; tone: string }[] = [
+  { kind: "low_confidence", label: "Low confidence", icon: FileWarning, tone: "text-warning" },
+  { kind: "handwriting", label: "Handwriting", icon: PenLine, tone: "text-destructive" },
+  { kind: "failed", label: "Failed extraction", icon: ShieldCheck, tone: "text-destructive" },
+];
+
+export function QualityControlPage() {
+  const [kind, setKind] = useState<QualityQueueKind>("low_confidence");
+  const { data, isLoading } = useQualityQueue(kind);
+
+  return (
+    <>
+      <PageHeader
+        title="Quality control"
+        description="Review queues for handwriting detection, low-confidence OCR, and failed extractions."
+        actions={
+          <Button variant="outline" size="sm">Export audit log</Button>
+        }
+      />
+
+      <Tabs value={kind} onValueChange={(v) => setKind(v as QualityQueueKind)}>
+        <TabsList>
+          {TABS.map(({ kind: k, label, icon: Icon, tone }) => (
+            <TabsTrigger key={k} value={k}>
+              <Icon className={"h-3.5 w-3.5 " + tone} />
+              {label}
+            </TabsTrigger>
+          ))}
+        </TabsList>
+      </Tabs>
+
+      <Card>
+        <CardHeader>
+          <CardTitle>Review queue</CardTitle>
+          <CardDescription>
+            {data?.length ?? 0} documents flagged · sorted by detection time
+          </CardDescription>
+        </CardHeader>
+        <CardContent className="space-y-2">
+          {isLoading &&
+            Array.from({ length: 5 }).map((_, i) => (
+              <div key={i} className="skeleton-shimmer h-16 rounded-xl" />
+            ))}
+          {data?.map((item, idx) => (
+            <motion.div
+              key={item.document.id}
+              initial={{ opacity: 0, y: 8 }}
+              animate={{ opacity: 1, y: 0 }}
+              transition={{ delay: idx * 0.02 }}
+              className="flex flex-wrap items-center gap-3 rounded-xl border border-border/70 bg-card px-3 py-3 transition-colors hover:border-primary/30"
+            >
+              <div className="grid h-10 w-10 place-items-center rounded-xl bg-primary/8 text-primary">
+                <FileWarning className="h-4 w-4" />
+              </div>
+              <div className="min-w-0 flex-1 leading-tight">
+                <div className="truncate text-sm font-medium">{item.document.original_file_name}</div>
+                <div className="truncate font-mono text-[11px] text-muted-foreground">
+                  {item.document.source_path}
+                </div>
+              </div>
+              <Badge variant="warning" className="font-mono">
+                {item.pages_flagged} pages flagged
+              </Badge>
+              <ConfidenceMeter value={item.document.ocr_confidence} />
+              <QualityFlags flags={item.document.flags} compact />
+              <span className="text-xs text-muted-foreground">{relativeTime(item.detected_at)}</span>
+              <div className="flex w-full items-center justify-end gap-2 sm:w-auto">
+                <Button size="sm" variant="outline">Open viewer</Button>
+                <Button size="sm" className="gap-1.5">
+                  <CheckCircle2 className="h-3.5 w-3.5" />
+                  Mark reviewed
+                </Button>
+              </div>
+            </motion.div>
+          ))}
+        </CardContent>
+      </Card>
+
+      <AuditLog />
+    </>
+  );
+}
+
+function AuditLog() {
+  const events = [
+    { stage: "Manual review approved", message: "Vadim cleared Регламент_ТО_2014_1102.pdf", time: "5m" },
+    { stage: "Reindex triggered", message: "ГОСТ_21.501-93_1003.pdf · reranker enabled", time: "32m" },
+    { stage: "Handwriting flagged", message: "Журнал_ремонтов_1009.pdf · pages 4, 6, 11", time: "2h" },
+    { stage: "Low confidence", message: "Архивный_отчет_1156.pdf · 17 chunks below threshold", time: "5h" },
+  ];
+  return (
+    <Card>
+      <CardHeader>
+        <CardTitle>Audit log</CardTitle>
+        <CardDescription>Recent reviewer actions and automated flags</CardDescription>
+      </CardHeader>
+      <CardContent>
+        <ol className="relative ml-2 border-l border-border/70 pl-5">
+          {events.map((e, i) => (
+            <li key={i} className="relative pb-4 last:pb-0">
+              <span className="absolute -left-[7px] top-1 h-2.5 w-2.5 rounded-full bg-primary ring-2 ring-card" />
+              <div className="flex items-center justify-between gap-2">
+                <div className="text-sm font-medium">{e.stage}</div>
+                <span className="text-[11px] text-muted-foreground">{e.time} ago</span>
+              </div>
+              <div className="text-xs text-muted-foreground">{e.message}</div>
+            </li>
+          ))}
+        </ol>
+      </CardContent>
+    </Card>
+  );
+}
diff --git a/frontend/src/pages/SearchPage.tsx b/frontend/src/pages/SearchPage.tsx
new file mode 100644
index 0000000..328cbe9
--- /dev/null
+++ b/frontend/src/pages/SearchPage.tsx
@@ -0,0 +1,336 @@
+import { useEffect, useMemo, useState } from "react";
+import { AnimatePresence, motion } from "framer-motion";
+import { ArrowRight, Filter, Loader2, Search as SearchIcon, Sparkles, X } from "lucide-react";
+
+import { PageHeader } from "@/components/common/PageHeader";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { Input } from "@/components/ui/input";
+import { Button } from "@/components/ui/button";
+import { Badge } from "@/components/ui/badge";
+import {
+  Popover,
+  PopoverContent,
+  PopoverTrigger,
+} from "@/components/ui/popover";
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
+import { ScrollArea } from "@/components/ui/scroll-area";
+import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs";
+import { Skeleton } from "@/components/ui/skeleton";
+import { EmptyState } from "@/components/common/EmptyState";
+import { SearchResultCard } from "@/widgets/SearchResultCard";
+import { ChunkPreview } from "@/widgets/ChunkPreview";
+import { useSearchStore } from "@/stores/searchStore";
+import { useSearchResults, useSuggestions } from "@/hooks/useSearch";
+import { useDebounce } from "@/hooks/useDebounce";
+import type { SearchMode } from "@/services/types";
+import { cn, formatNumber } from "@/lib/utils";
+
+export function SearchPage() {
+  const { query, mode, filters, setQuery, setMode, setFilters, pushHistory, history } = useSearchStore();
+  const [draft, setDraft] = useState(query);
+  const [activeId, setActiveId] = useState<string | null>(null);
+  const debounced = useDebounce(draft, 320);
+
+  useEffect(() => {
+    setQuery(debounced);
+  }, [debounced, setQuery]);
+
+  const { data: suggestions } = useSuggestions(draft);
+  const { data, isFetching } = useSearchResults({
+    query,
+    mode,
+    filters,
+    limit: 20,
+    enabled: query.trim().length > 0,
+  });
+
+  const results = data?.results ?? [];
+  const active = useMemo(
+    () => results.find((r) => r.chunk_id === activeId) ?? results[0] ?? null,
+    [results, activeId]
+  );
+
+  useEffect(() => {
+    if (results.length > 0 && !activeId) setActiveId(results[0].chunk_id);
+  }, [results, activeId]);
+
+  function submit(value?: string) {
+    const q = (value ?? draft).trim();
+    if (!q) return;
+    setQuery(q);
+    pushHistory(q);
+  }
+
+  return (
+    <>
+      <PageHeader
+        title="AI knowledge search"
+        description="Hybrid lexical + semantic retrieval with BGE reranking over the entire archive."
+        actions={
+          <Badge variant="muted" className="font-mono">
+            <Sparkles className="h-3 w-3 text-primary" /> hybrid · BGE-M3
+          </Badge>
+        }
+      />
+
+      <Card className="overflow-visible">
+        <CardContent className="space-y-4 p-5">
+          <div className="flex flex-col gap-3 lg:flex-row lg:items-center">
+            <div className="relative flex-1">
+              <SearchIcon className="pointer-events-none absolute left-3 top-1/2 h-4 w-4 -translate-y-1/2 text-muted-foreground" />
+              <Input
+                value={draft}
+                onChange={(e) => setDraft(e.target.value)}
+                placeholder='Ask in plain language, e.g. "ГОСТ 21.501-93 рабочие чертежи"'
+                className="h-12 rounded-xl pl-9 pr-32 text-base"
+                onKeyDown={(e) => {
+                  if (e.key === "Enter") submit();
+                }}
+              />
+              <div className="absolute right-2 top-1/2 -translate-y-1/2">
+                <Button size="sm" className="gap-1.5" onClick={() => submit()}>
+                  {isFetching ? <Loader2 className="h-3.5 w-3.5 animate-spin" /> : <ArrowRight className="h-3.5 w-3.5" />}
+                  Search
+                </Button>
+              </div>
+            </div>
+
+            <Tabs value={mode} onValueChange={(v) => setMode(v as SearchMode)}>
+              <TabsList>
+                <TabsTrigger value="hybrid">Hybrid</TabsTrigger>
+                <TabsTrigger value="lexical">Lexical</TabsTrigger>
+                <TabsTrigger value="semantic">Semantic</TabsTrigger>
+              </TabsList>
+            </Tabs>
+
+            <FiltersPopover />
+          </div>
+
+          {suggestions && draft.length === 0 && (
+            <Suggestions items={[...history, ...suggestions].slice(0, 6)} onPick={(v) => { setDraft(v); submit(v); }} />
+          )}
+          {suggestions && draft.length > 0 && suggestions.length > 0 && (
+            <Suggestions items={suggestions} onPick={(v) => { setDraft(v); submit(v); }} />
+          )}
+
+          <ActiveFilters />
+        </CardContent>
+      </Card>
+
+      <div className="grid grid-cols-1 gap-4 xl:grid-cols-[1.2fr_1fr]">
+        <ScrollArea className="max-h-[78vh]">
+          <div className="space-y-3 pr-1">
+            {query.trim() === "" ? (
+              <EmptyState
+                icon={<SearchIcon className="h-5 w-5" />}
+                title="Ask a question to begin"
+                description="Try ГОСТ codes, regulation IDs, project names, or natural language — the hybrid retriever handles all of them."
+              />
+            ) : isFetching && results.length === 0 ? (
+              <>
+                {Array.from({ length: 4 }).map((_, i) => (
+                  <ResultSkeleton key={i} />
+                ))}
+              </>
+            ) : results.length === 0 ? (
+              <EmptyState title="No results" description="Try broadening the query or removing filters." />
+            ) : (
+              <>
+                <ResultsHeader
+                  totalCandidates={data?.total_candidates ?? 0}
+                  reranked={Boolean(data?.reranked)}
+                  shown={results.length}
+                />
+                <AnimatePresence mode="popLayout">
+                  {results.map((hit) => (
+                    <SearchResultCard
+                      key={hit.chunk_id}
+                      hit={hit}
+                      query={query}
+                      active={hit.chunk_id === active?.chunk_id}
+                      onSelect={() => setActiveId(hit.chunk_id)}
+                      reranked={Boolean(data?.reranked)}
+                    />
+                  ))}
+                </AnimatePresence>
+              </>
+            )}
+          </div>
+        </ScrollArea>
+
+        <div className="hidden xl:block">
+          <ChunkPreview hit={active} query={query} />
+        </div>
+      </div>
+    </>
+  );
+}
+
+function ResultsHeader({
+  totalCandidates,
+  reranked,
+  shown,
+}: {
+  totalCandidates: number;
+  reranked: boolean;
+  shown: number;
+}) {
+  return (
+    <div className="flex flex-wrap items-center gap-2 px-1 text-xs text-muted-foreground">
+      <span>
+        Showing <span className="font-mono text-foreground">{shown}</span> of{" "}
+        <span className="font-mono text-foreground">{formatNumber(totalCandidates)}</span> candidates
+      </span>
+      <span>·</span>
+      <Badge variant={reranked ? "default" : "muted"} className="font-mono">
+        {reranked ? "BGE reranker active" : "raw RRF order"}
+      </Badge>
+    </div>
+  );
+}
+
+function Suggestions({
+  items,
+  onPick,
+}: {
+  items: string[];
+  onPick: (q: string) => void;
+}) {
+  if (!items.length) return null;
+  return (
+    <div className="flex flex-wrap items-center gap-2">
+      <span className="text-[11px] uppercase tracking-wide text-muted-foreground">Try</span>
+      {Array.from(new Set(items)).map((s) => (
+        <button
+          key={s}
+          onClick={() => onPick(s)}
+          className={cn(
+            "group rounded-full border border-border/70 bg-muted/40 px-3 py-1 text-xs text-foreground/90 transition-colors",
+            "hover:border-primary/50 hover:bg-accent/40 hover:text-primary-700 dark:hover:text-primary-100"
+          )}
+        >
+          {s}
+        </button>
+      ))}
+    </div>
+  );
+}
+
+function FiltersPopover() {
+  const filters = useSearchStore((s) => s.filters);
+  const setFilters = useSearchStore((s) => s.setFilters);
+  const reset = useSearchStore((s) => s.reset);
+  const activeCount = Object.values(filters).filter((v) => v !== null && v !== undefined && v !== "").length;
+
+  return (
+    <Popover>
+      <PopoverTrigger asChild>
+        <Button variant="outline" size="default" className="gap-1.5">
+          <Filter className="h-3.5 w-3.5" />
+          Filters
+          {activeCount > 0 && <Badge className="ml-1">{activeCount}</Badge>}
+        </Button>
+      </PopoverTrigger>
+      <PopoverContent align="end" className="w-80 space-y-3">
+        <div className="flex items-center justify-between">
+          <div className="text-sm font-semibold">Refine results</div>
+          <button
+            onClick={reset}
+            className="text-[11px] text-muted-foreground hover:text-foreground"
+          >
+            Clear all
+          </button>
+        </div>
+        <div className="space-y-2">
+          <Label>Block type</Label>
+          <Select
+            value={filters.block_type ?? "any"}
+            onValueChange={(v) => setFilters({ block_type: v === "any" ? null : v })}
+          >
+            <SelectTrigger>
+              <SelectValue placeholder="Any" />
+            </SelectTrigger>
+            <SelectContent>
+              <SelectItem value="any">Any</SelectItem>
+              <SelectItem value="paragraph">Paragraph</SelectItem>
+              <SelectItem value="heading">Heading</SelectItem>
+              <SelectItem value="table">Table</SelectItem>
+              <SelectItem value="figure_caption">Figure caption</SelectItem>
+              <SelectItem value="list">List</SelectItem>
+              <SelectItem value="handwriting">Handwriting</SelectItem>
+            </SelectContent>
+          </Select>
+        </div>
+        <div className="space-y-2">
+          <Label>Min OCR confidence</Label>
+          <div className="flex items-center gap-2">
+            <input
+              type="range"
+              min={0}
+              max={100}
+              step={5}
+              value={Math.round((filters.min_ocr_confidence ?? 0) * 100)}
+              onChange={(e) => setFilters({ min_ocr_confidence: Number(e.target.value) / 100 || null })}
+              className="h-2 w-full appearance-none rounded-full bg-muted [&::-webkit-slider-thumb]:h-4 [&::-webkit-slider-thumb]:w-4 [&::-webkit-slider-thumb]:appearance-none [&::-webkit-slider-thumb]:rounded-full [&::-webkit-slider-thumb]:bg-primary"
+            />
+            <span className="w-12 font-mono text-xs tabular-nums">
+              {filters.min_ocr_confidence ? `${Math.round(filters.min_ocr_confidence * 100)}%` : "any"}
+            </span>
+          </div>
+        </div>
+        <div className="space-y-2">
+          <Label>Source path</Label>
+          <Input
+            placeholder="/archive/scanned"
+            value={filters.source_path ?? ""}
+            onChange={(e) => setFilters({ source_path: e.target.value || null })}
+          />
+        </div>
+      </PopoverContent>
+    </Popover>
+  );
+}
+
+function Label({ children }: { children: React.ReactNode }) {
+  return <div className="text-[11px] font-medium uppercase tracking-wide text-muted-foreground">{children}</div>;
+}
+
+function ActiveFilters() {
+  const filters = useSearchStore((s) => s.filters);
+  const setFilters = useSearchStore((s) => s.setFilters);
+  const chips = (Object.entries(filters) as [keyof typeof filters, unknown][])
+    .filter(([, v]) => v !== null && v !== undefined && v !== "")
+    .map(([k, v]) => ({ k, v }));
+  if (chips.length === 0) return null;
+  return (
+    <div className="flex flex-wrap gap-2">
+      {chips.map(({ k, v }) => (
+        <Badge key={k} variant="muted" className="gap-1 pr-1.5">
+          {k}: <span className="font-mono">{String(v)}</span>
+          <button
+            onClick={() => setFilters({ [k]: null } as any)}
+            className="rounded-full p-0.5 hover:bg-muted-foreground/20"
+            aria-label={`Remove ${k} filter`}
+          >
+            <X className="h-3 w-3" />
+          </button>
+        </Badge>
+      ))}
+    </div>
+  );
+}
+
+function ResultSkeleton() {
+  return (
+    <div className="panel space-y-2.5 p-4">
+      <div className="flex items-center gap-2">
+        <Skeleton className="h-5 w-12" />
+        <Skeleton className="h-5 w-24" />
+      </div>
+      <Skeleton className="h-4 w-1/2" />
+      <Skeleton className="h-3 w-full" />
+      <Skeleton className="h-3 w-5/6" />
+      <Skeleton className="h-3 w-2/3" />
+    </div>
+  );
+}
diff --git a/frontend/src/pages/SettingsPage.tsx b/frontend/src/pages/SettingsPage.tsx
new file mode 100644
index 0000000..3f9c4fe
--- /dev/null
+++ b/frontend/src/pages/SettingsPage.tsx
@@ -0,0 +1,159 @@
+import { useState } from "react";
+import { PageHeader } from "@/components/common/PageHeader";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { Input } from "@/components/ui/input";
+import { Button } from "@/components/ui/button";
+import { Switch } from "@/components/ui/switch";
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
+import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
+import { Badge } from "@/components/ui/badge";
+import { useUiStore } from "@/stores/uiStore";
+
+export function SettingsPage() {
+  const theme = useUiStore((s) => s.theme);
+  const setTheme = useUiStore((s) => s.setTheme);
+  const [model, setModel] = useState("BAAI/bge-m3");
+  const [reranker, setReranker] = useState(true);
+  const [device, setDevice] = useState<"cpu" | "cuda" | "mps">("cpu");
+
+  return (
+    <>
+      <PageHeader title="Settings" description="Personal preferences and platform-wide configuration." />
+
+      <Tabs defaultValue="profile">
+        <TabsList>
+          <TabsTrigger value="profile">Profile</TabsTrigger>
+          <TabsTrigger value="appearance">Appearance</TabsTrigger>
+          <TabsTrigger value="search">Search</TabsTrigger>
+          <TabsTrigger value="integrations">Integrations</TabsTrigger>
+        </TabsList>
+
+        <TabsContent value="profile">
+          <Card>
+            <CardHeader>
+              <CardTitle>Account</CardTitle>
+              <CardDescription>Your TeamHUB SSO identity</CardDescription>
+            </CardHeader>
+            <CardContent className="grid grid-cols-1 gap-3 sm:grid-cols-2">
+              <Field label="Name" value="Vadim Malanov" />
+              <Field label="Role" value="Architect" />
+              <Field label="Email" value="vadim.malanov@gmail.com" />
+              <Field label="Workspace" value="TeamHUB · LegacyHUB" />
+            </CardContent>
+          </Card>
+        </TabsContent>
+
+        <TabsContent value="appearance">
+          <Card>
+            <CardHeader>
+              <CardTitle>Appearance</CardTitle>
+              <CardDescription>Theme and density</CardDescription>
+            </CardHeader>
+            <CardContent className="space-y-3">
+              <Row label="Theme">
+                <Select value={theme} onValueChange={(v) => setTheme(v as typeof theme)}>
+                  <SelectTrigger className="w-40">
+                    <SelectValue />
+                  </SelectTrigger>
+                  <SelectContent>
+                    <SelectItem value="light">Light</SelectItem>
+                    <SelectItem value="dark">Dark</SelectItem>
+                    <SelectItem value="system">System</SelectItem>
+                  </SelectContent>
+                </Select>
+              </Row>
+              <Row label="Compact density">
+                <Switch />
+              </Row>
+              <Row label="Animated transitions">
+                <Switch defaultChecked />
+              </Row>
+            </CardContent>
+          </Card>
+        </TabsContent>
+
+        <TabsContent value="search">
+          <Card>
+            <CardHeader>
+              <CardTitle>Search & retrieval</CardTitle>
+              <CardDescription>Embedding model, reranker, and hybrid weighting</CardDescription>
+            </CardHeader>
+            <CardContent className="space-y-3">
+              <Row label="Embedding model">
+                <Select value={model} onValueChange={setModel}>
+                  <SelectTrigger className="w-64">
+                    <SelectValue />
+                  </SelectTrigger>
+                  <SelectContent>
+                    <SelectItem value="BAAI/bge-m3">BAAI/bge-m3 (dense, 1024)</SelectItem>
+                    <SelectItem value="BAAI/bge-small-en">BAAI/bge-small-en</SelectItem>
+                  </SelectContent>
+                </Select>
+              </Row>
+              <Row label="Device">
+                <Select value={device} onValueChange={(v) => setDevice(v as typeof device)}>
+                  <SelectTrigger className="w-40">
+                    <SelectValue />
+                  </SelectTrigger>
+                  <SelectContent>
+                    <SelectItem value="cpu">CPU</SelectItem>
+                    <SelectItem value="cuda">CUDA</SelectItem>
+                    <SelectItem value="mps">Apple MPS</SelectItem>
+                  </SelectContent>
+                </Select>
+              </Row>
+              <Row label="BGE reranker">
+                <Switch checked={reranker} onCheckedChange={setReranker} />
+              </Row>
+              <Row label="RRF k constant">
+                <Input className="w-32" defaultValue={60} />
+              </Row>
+              <div className="flex justify-end">
+                <Button>Save changes</Button>
+              </div>
+            </CardContent>
+          </Card>
+        </TabsContent>
+
+        <TabsContent value="integrations">
+          <Card>
+            <CardHeader>
+              <CardTitle>TeamHUB Suite</CardTitle>
+              <CardDescription>Connected modules in the suite</CardDescription>
+            </CardHeader>
+            <CardContent className="space-y-2">
+              {[
+                { name: "QMS Hub", status: "Connected" },
+                { name: "Project Hub", status: "Connected" },
+                { name: "Asset Hub", status: "Pending" },
+              ].map((m) => (
+                <div key={m.name} className="flex items-center justify-between rounded-xl border border-border/70 bg-card px-3 py-2">
+                  <div className="text-sm font-medium">{m.name}</div>
+                  <Badge variant={m.status === "Connected" ? "success" : "muted"}>{m.status}</Badge>
+                </div>
+              ))}
+            </CardContent>
+          </Card>
+        </TabsContent>
+      </Tabs>
+    </>
+  );
+}
+
+function Row({ label, children }: { label: string; children: React.ReactNode }) {
+  return (
+    <div className="flex items-center justify-between gap-3 rounded-xl border border-border/70 bg-card/40 px-3 py-2">
+      <div className="text-sm text-foreground">{label}</div>
+      {children}
+    </div>
+  );
+}
+
+function Field({ label, value }: { label: string; value: string }) {
+  return (
+    <div className="rounded-xl border border-border/70 bg-card px-3 py-2">
+      <div className="text-[10px] uppercase tracking-wide text-muted-foreground">{label}</div>
+      <div className="mt-0.5 text-sm">{value}</div>
+    </div>
+  );
+}
diff --git a/frontend/src/pages/SystemHealthPage.tsx b/frontend/src/pages/SystemHealthPage.tsx
new file mode 100644
index 0000000..4c0f876
--- /dev/null
+++ b/frontend/src/pages/SystemHealthPage.tsx
@@ -0,0 +1,100 @@
+import { Line, LineChart, ResponsiveContainer, Tooltip, XAxis, YAxis } from "recharts";
+
+import { PageHeader } from "@/components/common/PageHeader";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { Badge } from "@/components/ui/badge";
+import { StatusChip } from "@/components/common/StatusChip";
+import { ServiceHealthCard } from "@/widgets/ServiceHealthCard";
+import { QueueWidget } from "@/widgets/QueueWidget";
+import { StorageWidget } from "@/widgets/StorageWidget";
+import { useDashboardStats } from "@/hooks/useDocuments";
+import { useHealth } from "@/hooks/useHealth";
+
+export function SystemHealthPage() {
+  const { data } = useDashboardStats();
+  const { data: health } = useHealth();
+
+  return (
+    <>
+      <PageHeader
+        title="System health"
+        description="Backing services, queue metrics, throughput, and storage growth."
+        actions={
+          <Badge variant={health?.status === "ok" ? "success" : "warning"} className="font-mono">
+            v{health?.version ?? "—"}
+          </Badge>
+        }
+      />
+
+      <div className="grid grid-cols-1 gap-4 xl:grid-cols-3">
+        <ServiceHealthCard />
+        <QueueWidget />
+        <Card>
+          <CardHeader>
+            <CardTitle>Embeddings & reranker</CardTitle>
+            <CardDescription>Inference latency & queue depth</CardDescription>
+          </CardHeader>
+          <CardContent className="space-y-3">
+            <Row label="Embedding model" value="BAAI/bge-m3" badge="cpu" />
+            <Row label="Reranker" value="BAAI/bge-reranker-v2-m3" badge="cpu" />
+            <Row label="Embedding p95" value="142 ms" tone="ok" />
+            <Row label="Reranker p95" value="380 ms" tone="warning" />
+            <Row label="Inference workers" value="2" />
+          </CardContent>
+        </Card>
+      </div>
+
+      <div className="grid grid-cols-1 gap-4 xl:grid-cols-2">
+        <Card>
+          <CardHeader>
+            <CardTitle>Throughput (24h)</CardTitle>
+            <CardDescription>Documents & chunks processed per minute</CardDescription>
+          </CardHeader>
+          <CardContent className="h-[260px]">
+            <ResponsiveContainer width="100%" height="100%">
+              <LineChart data={data?.throughput ?? []}>
+                <XAxis dataKey="time" tickLine={false} axisLine={false} fontSize={11} stroke="hsl(var(--muted-foreground))" />
+                <YAxis tickLine={false} axisLine={false} fontSize={11} stroke="hsl(var(--muted-foreground))" />
+                <Tooltip
+                  contentStyle={{
+                    background: "hsl(var(--popover))",
+                    border: "1px solid hsl(var(--border))",
+                    borderRadius: 12,
+                    fontSize: 12,
+                  }}
+                />
+                <Line type="monotone" dataKey="docs_per_min" stroke="hsl(var(--primary))" strokeWidth={2} dot={false} />
+                <Line type="monotone" dataKey="chunks_per_min" stroke="hsl(var(--muted-foreground))" strokeWidth={1.5} dot={false} strokeDasharray="3 4" />
+              </LineChart>
+            </ResponsiveContainer>
+          </CardContent>
+        </Card>
+
+        <StorageWidget totalBytes={data?.total_storage_bytes ?? 0} growth={data?.storage_growth ?? []} />
+      </div>
+    </>
+  );
+}
+
+function Row({
+  label,
+  value,
+  badge,
+  tone,
+}: {
+  label: string;
+  value: string;
+  badge?: string;
+  tone?: "ok" | "warning";
+}) {
+  return (
+    <div className="flex items-center justify-between gap-3 rounded-xl border border-border/70 bg-card/40 px-3 py-2">
+      <div className="text-xs font-medium text-muted-foreground">{label}</div>
+      <div className="flex items-center gap-2 font-mono text-sm">
+        {value}
+        {badge && <Badge variant="muted" className="font-mono">{badge}</Badge>}
+        {tone && <StatusChip tone={tone === "ok" ? "ok" : "warning"} label={tone} />}
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/pages/TablesFiguresPage.tsx b/frontend/src/pages/TablesFiguresPage.tsx
new file mode 100644
index 0000000..85036d2
--- /dev/null
+++ b/frontend/src/pages/TablesFiguresPage.tsx
@@ -0,0 +1,108 @@
+import { useState } from "react";
+import { Image as ImageIcon, Table as TableIcon } from "lucide-react";
+
+import { PageHeader } from "@/components/common/PageHeader";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
+import { Badge } from "@/components/ui/badge";
+import { useDocuments, useDocument } from "@/hooks/useDocuments";
+
+export function TablesFiguresPage() {
+  const { data: list } = useDocuments({ page_size: 8, status: "INDEXING_COMPLETED" });
+  const [docId, setDocId] = useState<string | undefined>(undefined);
+  const effective = docId ?? list?.items[0]?.id;
+  const { data: doc } = useDocument(effective);
+
+  return (
+    <>
+      <PageHeader
+        title="Tables & Figures"
+        description="Browse every structured artifact Docling extracted from your archive."
+      />
+
+      <div className="grid grid-cols-1 gap-4 xl:grid-cols-[260px_1fr]">
+        <Card className="h-fit">
+          <CardHeader>
+            <CardTitle>Documents</CardTitle>
+            <CardDescription>Select a document to inspect</CardDescription>
+          </CardHeader>
+          <CardContent className="space-y-1.5">
+            {list?.items.map((d) => (
+              <button
+                key={d.id}
+                onClick={() => setDocId(d.id)}
+                className={
+                  "flex w-full items-center gap-2 rounded-lg border px-2.5 py-2 text-left text-xs transition-colors " +
+                  (effective === d.id
+                    ? "border-primary/60 bg-accent/40 text-foreground"
+                    : "border-border/60 hover:bg-muted/40")
+                }
+              >
+                <div className="grid h-7 w-7 place-items-center rounded-md bg-primary/10 text-primary">
+                  <TableIcon className="h-3.5 w-3.5" />
+                </div>
+                <div className="min-w-0">
+                  <div className="truncate font-medium">{d.original_file_name}</div>
+                  <div className="font-mono text-[10px] text-muted-foreground">
+                    {d.pages} pages
+                  </div>
+                </div>
+              </button>
+            ))}
+          </CardContent>
+        </Card>
+
+        <Tabs defaultValue="tables" className="space-y-4">
+          <TabsList>
+            <TabsTrigger value="tables">
+              <TableIcon className="h-3.5 w-3.5" /> Tables
+              {doc && <Badge variant="muted" className="ml-1">{doc.tables.length}</Badge>}
+            </TabsTrigger>
+            <TabsTrigger value="figures">
+              <ImageIcon className="h-3.5 w-3.5" /> Figures
+              {doc && <Badge variant="muted" className="ml-1">{doc.figures.length}</Badge>}
+            </TabsTrigger>
+          </TabsList>
+
+          <TabsContent value="tables">
+            <div className="space-y-3">
+              {doc?.tables.map((t) => (
+                <Card key={t.id}>
+                  <CardHeader>
+                    <CardTitle className="text-sm">Table {t.table_index + 1}</CardTitle>
+                    <CardDescription>
+                      <Badge variant="outline" className="mr-2 font-mono">p.{t.page_number}</Badge>
+                      {t.summary}
+                    </CardDescription>
+                  </CardHeader>
+                  <CardContent>
+                    <pre className="overflow-x-auto rounded-lg border border-border/60 bg-muted/30 p-3 font-mono text-[12px] leading-relaxed">
+                      {t.markdown}
+                    </pre>
+                  </CardContent>
+                </Card>
+              ))}
+            </div>
+          </TabsContent>
+
+          <TabsContent value="figures">
+            <div className="grid grid-cols-1 gap-3 md:grid-cols-2 xl:grid-cols-3">
+              {doc?.figures.map((f) => (
+                <Card key={f.id}>
+                  <CardContent className="space-y-2 p-3">
+                    <div className="bg-grid-faint relative aspect-video rounded-lg border border-border/60" />
+                    <div className="flex items-center justify-between text-xs">
+                      <Badge variant="outline" className="font-mono">p.{f.page_number}</Badge>
+                      <Badge variant="muted">figure #{f.figure_index + 1}</Badge>
+                    </div>
+                    <p className="text-xs text-muted-foreground">{f.caption ?? f.description}</p>
+                  </CardContent>
+                </Card>
+              ))}
+            </div>
+          </TabsContent>
+        </Tabs>
+      </div>
+    </>
+  );
+}
diff --git a/frontend/src/services/apiClient.ts b/frontend/src/services/apiClient.ts
new file mode 100644
index 0000000..7b6039e
--- /dev/null
+++ b/frontend/src/services/apiClient.ts
@@ -0,0 +1,33 @@
+import axios, { type AxiosInstance, type AxiosError } from "axios";
+
+const BASE_URL = import.meta.env.VITE_API_BASE_URL ?? "/api/v1";
+
+export const apiClient: AxiosInstance = axios.create({
+  baseURL: BASE_URL,
+  timeout: 60_000,
+  headers: { "Content-Type": "application/json" },
+});
+
+apiClient.interceptors.response.use(
+  (response) => response,
+  (error: AxiosError) => {
+    const status = error.response?.status;
+    const message =
+      (error.response?.data as { detail?: string } | undefined)?.detail ?? error.message;
+    return Promise.reject(new ApiError(message, status, error));
+  }
+);
+
+export class ApiError extends Error {
+  status: number | undefined;
+  cause: unknown;
+  constructor(message: string, status: number | undefined, cause: unknown) {
+    super(message);
+    this.name = "ApiError";
+    this.status = status;
+    this.cause = cause;
+  }
+}
+
+export const USE_MOCK =
+  (import.meta.env.VITE_USE_MOCK ?? "true").toString().toLowerCase() === "true";
diff --git a/frontend/src/services/documents.ts b/frontend/src/services/documents.ts
new file mode 100644
index 0000000..7f5c6d4
--- /dev/null
+++ b/frontend/src/services/documents.ts
@@ -0,0 +1,71 @@
+import { apiClient, USE_MOCK } from "@/services/apiClient";
+import * as mock from "@/services/mock/mockData";
+import type { DocumentDetail, DocumentSummary } from "@/services/types";
+
+export interface DocumentListParams {
+  q?: string;
+  status?: string;
+  block_type?: string;
+  min_confidence?: number;
+  needs_review?: boolean;
+  page?: number;
+  page_size?: number;
+}
+
+export interface DocumentList {
+  items: DocumentSummary[];
+  total: number;
+  page: number;
+  page_size: number;
+}
+
+export async function listDocuments(params: DocumentListParams = {}): Promise<DocumentList> {
+  if (USE_MOCK) {
+    await delay();
+    let items = [...mock.documents];
+    if (params.q) {
+      const q = params.q.toLowerCase();
+      items = items.filter(
+        (d) =>
+          d.original_file_name.toLowerCase().includes(q) ||
+          d.source_path.toLowerCase().includes(q)
+      );
+    }
+    if (params.status) items = items.filter((d) => d.status === params.status);
+    if (params.min_confidence != null)
+      items = items.filter((d) => (d.ocr_confidence ?? 0) >= params.min_confidence!);
+    if (params.needs_review) items = items.filter((d) => d.flags.needs_manual_review);
+    const page = params.page ?? 1;
+    const pageSize = params.page_size ?? 25;
+    return {
+      items: items.slice((page - 1) * pageSize, page * pageSize),
+      total: items.length,
+      page,
+      page_size: pageSize,
+    };
+  }
+  const { data } = await apiClient.get<DocumentList>("/documents", { params });
+  return data;
+}
+
+export async function getDocument(id: string): Promise<DocumentDetail | undefined> {
+  if (USE_MOCK) {
+    await delay();
+    return mock.findDocument(id);
+  }
+  const { data } = await apiClient.get<DocumentDetail>(`/documents/${id}`);
+  return data;
+}
+
+export async function getDashboardStats() {
+  if (USE_MOCK) {
+    await delay();
+    return mock.dashboard;
+  }
+  const { data } = await apiClient.get("/dashboard/stats");
+  return data;
+}
+
+function delay(ms = 250) {
+  return new Promise<void>((r) => setTimeout(r, ms));
+}
diff --git a/frontend/src/services/health.ts b/frontend/src/services/health.ts
new file mode 100644
index 0000000..42c5fd2
--- /dev/null
+++ b/frontend/src/services/health.ts
@@ -0,0 +1,25 @@
+import { apiClient, USE_MOCK } from "@/services/apiClient";
+import * as mock from "@/services/mock/mockData";
+import type { HealthResponse, QueueState } from "@/services/types";
+
+export async function getHealth(): Promise<HealthResponse> {
+  if (USE_MOCK) {
+    await delay();
+    return mock.health;
+  }
+  const { data } = await apiClient.get<HealthResponse>("/health");
+  return data;
+}
+
+export async function getQueueState(): Promise<QueueState> {
+  if (USE_MOCK) {
+    await delay();
+    return mock.queue;
+  }
+  // Endpoint not yet implemented backend-side; fall back to mock.
+  return mock.queue;
+}
+
+function delay(ms = 220): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
diff --git a/frontend/src/services/ingestion.ts b/frontend/src/services/ingestion.ts
new file mode 100644
index 0000000..6824101
--- /dev/null
+++ b/frontend/src/services/ingestion.ts
@@ -0,0 +1,27 @@
+import { apiClient, USE_MOCK } from "@/services/apiClient";
+import * as mock from "@/services/mock/mockData";
+import type { IngestFolderRequest, IngestFolderResponse, IngestionRun } from "@/services/types";
+
+export async function ingestFolder(req: IngestFolderRequest): Promise<IngestFolderResponse> {
+  if (USE_MOCK) {
+    await new Promise((r) => setTimeout(r, 600));
+    return {
+      run_id: crypto.randomUUID(),
+      discovered: 78,
+      queued: 72,
+      skipped_duplicates: 4,
+      invalid_files: 2,
+    };
+  }
+  const { data } = await apiClient.post<IngestFolderResponse>("/ingest/folder", req);
+  return data;
+}
+
+export async function listRuns(): Promise<IngestionRun[]> {
+  if (USE_MOCK) {
+    await new Promise((r) => setTimeout(r, 220));
+    return mock.ingestionRuns;
+  }
+  const { data } = await apiClient.get<IngestionRun[]>("/ingest/runs");
+  return data;
+}
diff --git a/frontend/src/services/mock/mockData.ts b/frontend/src/services/mock/mockData.ts
new file mode 100644
index 0000000..c95b5db
--- /dev/null
+++ b/frontend/src/services/mock/mockData.ts
@@ -0,0 +1,298 @@
+import type {
+  ChunkSummary,
+  DashboardStats,
+  DocumentDetail,
+  DocumentStatus,
+  DocumentSummary,
+  FigureData,
+  HealthResponse,
+  IngestionRun,
+  PageSummary,
+  QueueState,
+  SearchHit,
+  SearchResponse,
+  TableData,
+  TimelineEvent,
+} from "@/services/types";
+
+const RNG = mulberry32(20260510);
+
+function mulberry32(seed: number) {
+  return function () {
+    let t = (seed += 0x6d2b79f5);
+    t = Math.imul(t ^ (t >>> 15), t | 1);
+    t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
+    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
+  };
+}
+
+function uuid(): string {
+  return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => {
+    const r = (RNG() * 16) | 0;
+    const v = c === "x" ? r : (r & 0x3) | 0x8;
+    return v.toString(16);
+  });
+}
+
+function pick<T>(items: readonly T[]): T {
+  return items[Math.floor(RNG() * items.length)];
+}
+
+function isoDaysAgo(days: number, jitterMin = 0): string {
+  const d = new Date();
+  d.setUTCDate(d.getUTCDate() - days);
+  d.setUTCMinutes(d.getUTCMinutes() - Math.floor(RNG() * jitterMin));
+  return d.toISOString();
+}
+
+const FILE_PREFIXES = [
+  "ГОСТ_21.501-93",
+  "ТУ_5781-001-2010",
+  "Чертеж_фундамента",
+  "Спецификация_узлов",
+  "Регламент_ТО_2014",
+  "Журнал_ремонтов",
+  "ПОС_корпус_3",
+  "Расчет_прочности",
+  "Схема_электропитания",
+  "Архивный_отчет",
+];
+
+const STATUSES: DocumentStatus[] = [
+  "INDEXING_COMPLETED",
+  "INDEXING_COMPLETED",
+  "INDEXING_COMPLETED",
+  "CHUNKING_COMPLETED",
+  "EXTRACTION_COMPLETED",
+  "OCR_COMPLETED",
+  "OCR_STARTED",
+  "OCR_FAILED",
+  "EXTRACTION_FAILED",
+  "FAILED",
+];
+
+function makeDocument(i: number): DocumentSummary {
+  const status = i % 17 === 0 ? "FAILED" : i % 11 === 0 ? "OCR_FAILED" : pick(STATUSES);
+  const ok = status === "INDEXING_COMPLETED";
+  const conf = ok ? 0.7 + RNG() * 0.28 : 0.3 + RNG() * 0.4;
+  const name = `${pick(FILE_PREFIXES)}_${String(1000 + i).slice(-4)}.pdf`;
+  return {
+    id: uuid(),
+    original_file_name: name,
+    source_path: `/archive/${pick(["raw", "scanned", "vendor"])}/${name}`,
+    sha256: Array.from({ length: 64 }, () => Math.floor(RNG() * 16).toString(16)).join(""),
+    status,
+    file_size_bytes: Math.floor(120_000 + RNG() * 25_000_000),
+    pages: Math.floor(3 + RNG() * 180),
+    chunks: Math.floor(8 + RNG() * 220),
+    ocr_confidence: conf,
+    language_hint: RNG() > 0.4 ? "ru" : "en",
+    created_at: isoDaysAgo(Math.floor(RNG() * 30), 1440),
+    updated_at: isoDaysAgo(Math.floor(RNG() * 5), 720),
+    flags: {
+      low_ocr_confidence: conf < 0.6,
+      possible_garbled_text: RNG() > 0.9,
+      table_detected: RNG() > 0.55,
+      figure_detected: RNG() > 0.7,
+      handwriting_detected: RNG() > 0.92,
+      needs_manual_review: !ok || conf < 0.6,
+    },
+  };
+}
+
+export const documents: DocumentSummary[] = Array.from({ length: 280 }, (_, i) => makeDocument(i));
+
+export function findDocument(id: string): DocumentDetail | undefined {
+  const doc = documents.find((d) => d.id === id);
+  if (!doc) return undefined;
+  const pages: PageSummary[] = Array.from({ length: Math.min(doc.pages, 24) }, (_, p) => ({
+    page_number: p + 1,
+    text: `Страница ${p + 1}. Образец извлечённого текста для ${doc.original_file_name}. ГОСТ 21.501-93 определяет правила выполнения архитектурно-строительных рабочих чертежей.`,
+    ocr_confidence: Math.min(1, (doc.ocr_confidence ?? 0.7) + (RNG() - 0.5) * 0.2),
+    has_tables: RNG() > 0.6,
+    has_figures: RNG() > 0.75,
+    has_handwriting: RNG() > 0.92,
+  }));
+  const chunks: ChunkSummary[] = Array.from({ length: Math.min(doc.chunks, 40) }, (_, c) => ({
+    id: uuid(),
+    document_id: doc.id,
+    page_number: 1 + Math.floor(c / 3),
+    block_type: pick([
+      "paragraph",
+      "paragraph",
+      "heading",
+      "list",
+      "table",
+      "figure_caption",
+      "title",
+    ]),
+    block_id: `block-${c}`,
+    chunk_index: c,
+    text: `Фрагмент ${c}. ${pick([
+      "Описание узлов и сопряжений несущих конструкций.",
+      "Требования к точности изготовления железобетонных изделий.",
+      "Перечень используемых материалов с указанием марок и ГОСТ.",
+      "Указания по производству работ при пониженных температурах.",
+      "Контрольные размеры приведены в таблице на следующей странице.",
+    ])}`,
+    token_count: 80 + Math.floor(RNG() * 600),
+    quality_flags: { ...doc.flags },
+    metadata: { section_heading: "Глава 2. Основные положения" },
+  }));
+  const tables: TableData[] = Array.from({ length: 4 }, (_, t) => ({
+    id: uuid(),
+    page_number: 3 + t * 2,
+    table_index: t,
+    markdown: `| Параметр | Значение | Ед. |\n| --- | --- | --- |\n| Высота | 3.4 | м |\n| Толщина | 200 | мм |\n| Класс | B25 | — |`,
+    summary: `Table ${t} on page ${3 + t * 2}: 3 rows × 3 cols. Columns: Параметр, Значение, Ед.`,
+  }));
+  const figures: FigureData[] = Array.from({ length: 3 }, (_, f) => ({
+    id: uuid(),
+    page_number: 5 + f * 3,
+    figure_index: f,
+    caption: `Рисунок ${f + 1}. Схема расположения узлов`,
+    description: `Figure ${f + 1} detected on page ${5 + f * 3}.`,
+  }));
+  const stages = [
+    "DISCOVERED",
+    "STORED_ORIGINAL",
+    "OCR_STARTED",
+    "OCR_COMPLETED",
+    "EXTRACTION_STARTED",
+    "EXTRACTION_COMPLETED",
+    "CHUNKING_COMPLETED",
+    "INDEXING_COMPLETED",
+  ];
+  const timeline: TimelineEvent[] = stages.map((s, i) => ({
+    id: uuid(),
+    stage: s,
+    level: "INFO",
+    message: `${s.replaceAll("_", " ")} completed`,
+    data: {},
+    created_at: isoDaysAgo(0, 60 * (stages.length - i)),
+  }));
+  return { ...doc, pages_data: pages, chunks_data: chunks, tables, figures, timeline };
+}
+
+export function searchMock(query: string, mode: string, limit: number): SearchResponse {
+  const q = query.toLowerCase();
+  const candidates = documents
+    .filter((d) => d.status === "INDEXING_COMPLETED")
+    .slice(0, 80);
+  const results: SearchHit[] = candidates.slice(0, limit).map((d, i) => {
+    const score = Math.max(0.05, 1 - i * 0.04 - RNG() * 0.05);
+    const block_type = pick(["paragraph", "heading", "table", "figure_caption", "list"]);
+    return {
+      rank: i + 1,
+      score,
+      document_id: d.id,
+      chunk_id: uuid(),
+      original_file_name: d.original_file_name,
+      source_path: d.source_path,
+      page_number: 1 + Math.floor(RNG() * d.pages),
+      block_type,
+      text: highlightedSnippet(query),
+      citation: {
+        pdf: d.original_file_name,
+        page: 1 + Math.floor(RNG() * d.pages),
+        block_id: `block-${i}`,
+      },
+      quality_flags: d.flags,
+      metadata: { section_heading: "Глава 2", mode, q },
+    };
+  });
+  return {
+    query,
+    mode: mode as SearchResponse["mode"],
+    total_candidates: candidates.length,
+    reranked: mode === "hybrid",
+    results,
+  };
+}
+
+function highlightedSnippet(query: string): string {
+  const q = query.trim() || "ГОСТ";
+  return (
+    `Согласно ${q}, требования к выполнению рабочих чертежей определяются разделом 4. ` +
+    `Все размеры приведены в миллиметрах, если иное не указано. Ссылка на смежные документы — ` +
+    `${q} приложение Б. Контрольные значения должны соответствовать таблице 5.1.`
+  );
+}
+
+export const health: HealthResponse = {
+  status: "ok",
+  version: "0.1.0",
+  components: [
+    { name: "postgres", status: "ok", detail: { latency_ms: 4 } },
+    { name: "minio", status: "ok", detail: { buckets: ["legacyhub-originals", "legacyhub-derived"] } },
+    { name: "opensearch", status: "ok", detail: { cluster_status: "yellow", nodes: 1 } },
+    { name: "qdrant", status: "ok", detail: { collections: ["legacy_chunks"] } },
+    { name: "redis", status: "ok", detail: {} },
+  ],
+};
+
+export const queue: QueueState = {
+  pending: 1234,
+  in_progress: 16,
+  completed_last_hour: 482,
+  failed_last_hour: 9,
+  average_latency_ms: 12_400,
+};
+
+export const ingestionRuns: IngestionRun[] = Array.from({ length: 12 }, (_, i) => {
+  const total = 200 + Math.floor(RNG() * 1500);
+  const failed = Math.floor(total * (RNG() * 0.05));
+  return {
+    id: uuid(),
+    started_at: isoDaysAgo(i),
+    finished_at: i === 0 ? null : isoDaysAgo(i - 0.1),
+    status: i === 0 ? "RUNNING" : failed > total * 0.03 ? "PARTIAL" : "COMPLETED",
+    source_folder: pick(["/data/input/2024", "/data/input/2023", "/data/input/archive"]),
+    total_files: total,
+    processed_files: total - failed,
+    failed_files: failed,
+  };
+});
+
+export const dashboard: DashboardStats = (() => {
+  const byStatus = documents.reduce((acc, d) => {
+    acc[d.status] = (acc[d.status] ?? 0) + 1;
+    return acc;
+  }, {} as Record<DocumentStatus, number>);
+  const totalChunks = documents.reduce((a, d) => a + d.chunks, 0);
+  const totalPages = documents.reduce((a, d) => a + d.pages, 0);
+  return {
+    total_documents: documents.length,
+    total_pages: totalPages,
+    total_chunks: totalChunks,
+    total_storage_bytes: documents.reduce((a, d) => a + d.file_size_bytes, 0),
+    failed_documents: (byStatus.FAILED ?? 0) + (byStatus.OCR_FAILED ?? 0) + (byStatus.EXTRACTION_FAILED ?? 0),
+    needs_review: documents.filter((d) => d.flags.needs_manual_review).length,
+    avg_ocr_confidence:
+      documents.reduce((a, d) => a + (d.ocr_confidence ?? 0), 0) / documents.length,
+    processed_last_24h: 482,
+    by_status: byStatus,
+    daily_ingest: Array.from({ length: 14 }, (_, i) => ({
+      date: new Date(Date.now() - (13 - i) * 86_400_000).toISOString().slice(0, 10),
+      ingested: 120 + Math.floor(RNG() * 280),
+      failed: Math.floor(RNG() * 18),
+    })),
+    ocr_distribution: [
+      { bucket: "0.0-0.4", count: 18 },
+      { bucket: "0.4-0.6", count: 42 },
+      { bucket: "0.6-0.75", count: 76 },
+      { bucket: "0.75-0.85", count: 84 },
+      { bucket: "0.85-0.95", count: 41 },
+      { bucket: "0.95-1.0", count: 19 },
+    ],
+    storage_growth: Array.from({ length: 14 }, (_, i) => ({
+      date: new Date(Date.now() - (13 - i) * 86_400_000).toISOString().slice(0, 10),
+      bytes: (3 + i * 0.4 + RNG()) * 1024 * 1024 * 1024,
+    })),
+    throughput: Array.from({ length: 24 }, (_, i) => ({
+      time: `${String(i).padStart(2, "0")}:00`,
+      docs_per_min: 4 + Math.floor(RNG() * 18),
+      chunks_per_min: 80 + Math.floor(RNG() * 420),
+    })),
+  };
+})();
diff --git a/frontend/src/services/quality.ts b/frontend/src/services/quality.ts
new file mode 100644
index 0000000..697e257
--- /dev/null
+++ b/frontend/src/services/quality.ts
@@ -0,0 +1,42 @@
+import { USE_MOCK } from "@/services/apiClient";
+import * as mock from "@/services/mock/mockData";
+import type { DocumentSummary } from "@/services/types";
+
+export type QualityQueueKind = "low_confidence" | "handwriting" | "failed";
+
+export interface QualityQueueItem {
+  document: DocumentSummary;
+  reason: string;
+  pages_flagged: number;
+  detected_at: string;
+}
+
+export async function getQualityQueue(kind: QualityQueueKind): Promise<QualityQueueItem[]> {
+  if (USE_MOCK) {
+    await new Promise((r) => setTimeout(r, 200));
+    return mock.documents
+      .filter((d) => {
+        if (kind === "low_confidence") return (d.ocr_confidence ?? 1) < 0.6;
+        if (kind === "handwriting") return d.flags.handwriting_detected;
+        return d.status === "FAILED" || d.status === "OCR_FAILED" || d.status === "EXTRACTION_FAILED";
+      })
+      .slice(0, 40)
+      .map((d) => ({
+        document: d,
+        reason:
+          kind === "low_confidence"
+            ? "OCR confidence below 60%"
+            : kind === "handwriting"
+            ? "Handwritten fragments detected"
+            : `Pipeline failure (${d.status})`,
+        pages_flagged: Math.max(1, Math.floor(d.pages * 0.18)),
+        detected_at: d.updated_at,
+      }));
+  }
+  // Placeholder for real endpoint
+  return [];
+}
+
+export async function markReviewed(_documentId: string): Promise<void> {
+  await new Promise((r) => setTimeout(r, 200));
+}
diff --git a/frontend/src/services/search.ts b/frontend/src/services/search.ts
new file mode 100644
index 0000000..57c1939
--- /dev/null
+++ b/frontend/src/services/search.ts
@@ -0,0 +1,29 @@
+import { apiClient, USE_MOCK } from "@/services/apiClient";
+import * as mock from "@/services/mock/mockData";
+import type { SearchRequest, SearchResponse } from "@/services/types";
+
+export async function search(req: SearchRequest): Promise<SearchResponse> {
+  if (USE_MOCK) {
+    await new Promise((r) => setTimeout(r, 320));
+    return mock.searchMock(req.query, req.search_mode, req.limit);
+  }
+  const { data } = await apiClient.post<SearchResponse>("/search", req);
+  return data;
+}
+
+export async function suggest(query: string): Promise<string[]> {
+  // No backend endpoint yet — derive cheap suggestions from history + recent docs.
+  await new Promise((r) => setTimeout(r, 80));
+  const q = query.toLowerCase();
+  const base = [
+    "ГОСТ 21.501-93 рабочие чертежи",
+    "класс бетона B25",
+    "журнал ремонтов узлов",
+    "правила производства земляных работ",
+    "схема электропитания корпус 3",
+    "регламент ТО 2014",
+    "контроль качества сварных соединений",
+  ];
+  if (!q) return base.slice(0, 5);
+  return base.filter((s) => s.toLowerCase().includes(q)).slice(0, 6);
+}
diff --git a/frontend/src/services/types.ts b/frontend/src/services/types.ts
new file mode 100644
index 0000000..9feab51
--- /dev/null
+++ b/frontend/src/services/types.ts
@@ -0,0 +1,209 @@
+// Typed API contracts (mirror backend Pydantic schemas)
+
+export type SearchMode = "lexical" | "semantic" | "hybrid";
+
+export interface SearchFilters {
+  document_id: string | null;
+  source_path: string | null;
+  block_type: string | null;
+  min_ocr_confidence: number | null;
+}
+
+export interface SearchRequest {
+  query: string;
+  limit: number;
+  filters: SearchFilters;
+  search_mode: SearchMode;
+}
+
+export interface Citation {
+  pdf: string;
+  page: number;
+  block_id?: string | null;
+  table_id?: string | null;
+  figure_id?: string | null;
+}
+
+export interface QualityFlags {
+  low_ocr_confidence?: boolean;
+  very_short_text?: boolean;
+  possible_garbled_text?: boolean;
+  table_detected?: boolean;
+  figure_detected?: boolean;
+  handwriting_detected?: boolean;
+  needs_manual_review?: boolean;
+}
+
+export interface SearchHit {
+  rank: number;
+  score: number;
+  document_id: string;
+  chunk_id: string;
+  original_file_name: string;
+  source_path: string;
+  page_number: number;
+  block_type: string;
+  text: string;
+  citation: Citation;
+  quality_flags: QualityFlags;
+  metadata: Record<string, unknown>;
+}
+
+export interface SearchResponse {
+  query: string;
+  mode: SearchMode;
+  total_candidates: number;
+  reranked: boolean;
+  results: SearchHit[];
+}
+
+// Health
+export interface ComponentHealth {
+  name: string;
+  status: "ok" | "error" | "degraded";
+  detail: Record<string, unknown>;
+}
+
+export interface HealthResponse {
+  status: "ok" | "error" | "degraded";
+  version: string;
+  components: ComponentHealth[];
+}
+
+// Documents
+export type DocumentStatus =
+  | "DISCOVERED"
+  | "STORED_ORIGINAL"
+  | "OCR_STARTED"
+  | "OCR_COMPLETED"
+  | "OCR_FAILED"
+  | "EXTRACTION_STARTED"
+  | "EXTRACTION_COMPLETED"
+  | "EXTRACTION_FAILED"
+  | "CHUNKING_COMPLETED"
+  | "INDEXING_COMPLETED"
+  | "FAILED";
+
+export interface DocumentSummary {
+  id: string;
+  original_file_name: string;
+  source_path: string;
+  sha256: string;
+  status: DocumentStatus;
+  file_size_bytes: number;
+  pages: number;
+  chunks: number;
+  ocr_confidence: number | null;
+  language_hint: string | null;
+  created_at: string;
+  updated_at: string;
+  flags: QualityFlags;
+}
+
+export interface PageSummary {
+  page_number: number;
+  text: string;
+  ocr_confidence: number | null;
+  has_tables: boolean;
+  has_figures: boolean;
+  has_handwriting: boolean;
+}
+
+export interface ChunkSummary {
+  id: string;
+  document_id: string;
+  page_number: number;
+  block_type: string;
+  block_id: string | null;
+  chunk_index: number;
+  text: string;
+  token_count: number | null;
+  quality_flags: QualityFlags;
+  metadata: Record<string, unknown>;
+}
+
+export interface DocumentDetail extends DocumentSummary {
+  pages_data: PageSummary[];
+  chunks_data: ChunkSummary[];
+  tables: TableData[];
+  figures: FigureData[];
+  timeline: TimelineEvent[];
+}
+
+export interface TableData {
+  id: string;
+  page_number: number;
+  table_index: number;
+  markdown: string;
+  summary: string | null;
+}
+
+export interface FigureData {
+  id: string;
+  page_number: number;
+  figure_index: number;
+  caption: string | null;
+  description: string | null;
+}
+
+export interface TimelineEvent {
+  id: string;
+  stage: string;
+  level: "INFO" | "WARN" | "ERROR";
+  message: string;
+  data: Record<string, unknown>;
+  created_at: string;
+}
+
+// Ingestion
+export type RunStatus = "RUNNING" | "COMPLETED" | "FAILED" | "PARTIAL";
+
+export interface IngestionRun {
+  id: string;
+  started_at: string;
+  finished_at: string | null;
+  status: RunStatus;
+  source_folder: string;
+  total_files: number;
+  processed_files: number;
+  failed_files: number;
+}
+
+export interface IngestFolderRequest {
+  path: string;
+  recursive: boolean;
+  force: boolean;
+}
+
+export interface IngestFolderResponse {
+  run_id: string;
+  discovered: number;
+  queued: number;
+  skipped_duplicates: number;
+  invalid_files: number;
+}
+
+// Dashboard / system
+export interface DashboardStats {
+  total_documents: number;
+  total_pages: number;
+  total_chunks: number;
+  total_storage_bytes: number;
+  failed_documents: number;
+  needs_review: number;
+  avg_ocr_confidence: number;
+  processed_last_24h: number;
+  by_status: Record<DocumentStatus, number>;
+  daily_ingest: { date: string; ingested: number; failed: number }[];
+  ocr_distribution: { bucket: string; count: number }[];
+  storage_growth: { date: string; bytes: number }[];
+  throughput: { time: string; docs_per_min: number; chunks_per_min: number }[];
+}
+
+export interface QueueState {
+  pending: number;
+  in_progress: number;
+  completed_last_hour: number;
+  failed_last_hour: number;
+  average_latency_ms: number;
+}
diff --git a/frontend/src/stores/searchStore.ts b/frontend/src/stores/searchStore.ts
new file mode 100644
index 0000000..4778b9f
--- /dev/null
+++ b/frontend/src/stores/searchStore.ts
@@ -0,0 +1,36 @@
+import { create } from "zustand";
+import type { SearchFilters, SearchMode } from "@/services/types";
+
+interface SearchState {
+  query: string;
+  mode: SearchMode;
+  filters: SearchFilters;
+  history: string[];
+  setQuery: (q: string) => void;
+  setMode: (mode: SearchMode) => void;
+  setFilters: (filters: Partial<SearchFilters>) => void;
+  pushHistory: (q: string) => void;
+  reset: () => void;
+}
+
+const emptyFilters: SearchFilters = {
+  document_id: null,
+  source_path: null,
+  block_type: null,
+  min_ocr_confidence: null,
+};
+
+export const useSearchStore = create<SearchState>((set) => ({
+  query: "",
+  mode: "hybrid",
+  filters: emptyFilters,
+  history: [],
+  setQuery: (q) => set({ query: q }),
+  setMode: (mode) => set({ mode }),
+  setFilters: (filters) => set((s) => ({ filters: { ...s.filters, ...filters } })),
+  pushHistory: (q) =>
+    set((s) => ({
+      history: [q, ...s.history.filter((x) => x !== q)].slice(0, 12),
+    })),
+  reset: () => set({ query: "", filters: emptyFilters }),
+}));
diff --git a/frontend/src/stores/uiStore.ts b/frontend/src/stores/uiStore.ts
new file mode 100644
index 0000000..12aea85
--- /dev/null
+++ b/frontend/src/stores/uiStore.ts
@@ -0,0 +1,47 @@
+import { create } from "zustand";
+import { persist } from "zustand/middleware";
+
+export type ThemeMode = "light" | "dark" | "system";
+
+interface UiState {
+  theme: ThemeMode;
+  sidebarCollapsed: boolean;
+  commandOpen: boolean;
+  setTheme: (theme: ThemeMode) => void;
+  toggleSidebar: () => void;
+  setSidebar: (collapsed: boolean) => void;
+  openCommand: () => void;
+  closeCommand: () => void;
+  toggleCommand: () => void;
+}
+
+export const useUiStore = create<UiState>()(
+  persist(
+    (set) => ({
+      theme: "light",
+      sidebarCollapsed: false,
+      commandOpen: false,
+      setTheme: (theme) => set({ theme }),
+      toggleSidebar: () => set((s) => ({ sidebarCollapsed: !s.sidebarCollapsed })),
+      setSidebar: (collapsed) => set({ sidebarCollapsed: collapsed }),
+      openCommand: () => set({ commandOpen: true }),
+      closeCommand: () => set({ commandOpen: false }),
+      toggleCommand: () => set((s) => ({ commandOpen: !s.commandOpen })),
+    }),
+    {
+      name: "legacyhub-ui",
+      partialize: (s) => ({ theme: s.theme, sidebarCollapsed: s.sidebarCollapsed }),
+    }
+  )
+);
+
+export function applyTheme(theme: ThemeMode) {
+  const root = document.documentElement;
+  const effective =
+    theme === "system"
+      ? window.matchMedia("(prefers-color-scheme: dark)").matches
+        ? "dark"
+        : "light"
+      : theme;
+  root.classList.toggle("dark", effective === "dark");
+}
diff --git a/frontend/src/styles/globals.css b/frontend/src/styles/globals.css
new file mode 100644
index 0000000..ab378a0
--- /dev/null
+++ b/frontend/src/styles/globals.css
@@ -0,0 +1,172 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+@layer base {
+  :root {
+    /* base */
+    --background: 210 40% 99%;
+    --foreground: 222 47% 11%;
+    --surface: 0 0% 100%;
+    --surface-raised: 0 0% 100%;
+    --surface-sunken: 210 40% 96%;
+    --card: 0 0% 100%;
+    --card-foreground: 222 47% 11%;
+    --popover: 0 0% 100%;
+    --popover-foreground: 222 47% 11%;
+
+    /* TeamHUB green accent */
+    --primary: 158 64% 32%;
+    --primary-foreground: 0 0% 100%;
+    --primary-50: 152 70% 97%;
+    --primary-100: 152 67% 92%;
+    --primary-200: 154 60% 84%;
+    --primary-500: 158 64% 42%;
+    --primary-600: 158 64% 36%;
+    --primary-700: 158 64% 28%;
+
+    --secondary: 210 40% 96%;
+    --secondary-foreground: 222 47% 11%;
+
+    --muted: 210 40% 96%;
+    --muted-foreground: 215 16% 47%;
+
+    --accent: 152 67% 95%;
+    --accent-foreground: 158 64% 24%;
+
+    --destructive: 0 72% 51%;
+    --destructive-foreground: 0 0% 100%;
+    --warning: 38 92% 50%;
+    --warning-foreground: 30 80% 20%;
+    --success: 158 64% 36%;
+    --success-foreground: 0 0% 100%;
+
+    --border: 215 20% 91%;
+    --input: 215 20% 91%;
+    --ring: 158 64% 42%;
+    --radius: 14px;
+
+    color-scheme: light;
+  }
+
+  .dark {
+    --background: 222 47% 5%;
+    --foreground: 210 40% 98%;
+    --surface: 222 47% 8%;
+    --surface-raised: 222 47% 11%;
+    --surface-sunken: 222 47% 4%;
+    --card: 222 47% 9%;
+    --card-foreground: 210 40% 98%;
+    --popover: 222 47% 9%;
+    --popover-foreground: 210 40% 98%;
+
+    --primary: 158 64% 48%;
+    --primary-foreground: 222 47% 5%;
+    --primary-50: 158 50% 12%;
+    --primary-100: 158 50% 16%;
+    --primary-200: 158 50% 22%;
+    --primary-500: 158 64% 50%;
+    --primary-600: 158 64% 44%;
+    --primary-700: 158 64% 36%;
+
+    --secondary: 217 32% 14%;
+    --secondary-foreground: 210 40% 98%;
+
+    --muted: 217 32% 14%;
+    --muted-foreground: 215 20% 65%;
+
+    --accent: 158 50% 14%;
+    --accent-foreground: 158 60% 80%;
+
+    --destructive: 0 62% 50%;
+    --destructive-foreground: 0 0% 100%;
+    --warning: 38 92% 55%;
+    --warning-foreground: 30 80% 12%;
+    --success: 158 64% 48%;
+    --success-foreground: 222 47% 5%;
+
+    --border: 217 32% 17%;
+    --input: 217 32% 17%;
+    --ring: 158 64% 48%;
+    color-scheme: dark;
+  }
+
+  * {
+    @apply border-border;
+  }
+  html, body, #root {
+    @apply h-full;
+  }
+  body {
+    @apply bg-background text-foreground antialiased;
+    font-feature-settings: "ss01", "cv11", "tnum";
+  }
+  ::selection {
+    @apply bg-primary/20 text-primary-foreground;
+  }
+}
+
+@layer components {
+  .glass {
+    background: hsl(var(--surface) / 0.72);
+    backdrop-filter: saturate(140%) blur(14px);
+    -webkit-backdrop-filter: saturate(140%) blur(14px);
+    border: 1px solid hsl(var(--border) / 0.7);
+  }
+  .glass-strong {
+    background: hsl(var(--surface) / 0.86);
+    backdrop-filter: saturate(150%) blur(20px);
+    -webkit-backdrop-filter: saturate(150%) blur(20px);
+    border: 1px solid hsl(var(--border) / 0.85);
+  }
+  .panel {
+    @apply rounded-2xl bg-card text-card-foreground shadow-soft border border-border/70;
+  }
+  .panel-raised {
+    @apply rounded-2xl bg-card text-card-foreground shadow-elevated border border-border/70;
+  }
+  .ring-focus {
+    @apply outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background;
+  }
+  .skeleton-shimmer {
+    background: linear-gradient(
+      90deg,
+      hsl(var(--muted)) 0%,
+      hsl(var(--muted) / 0.6) 50%,
+      hsl(var(--muted)) 100%
+    );
+    background-size: 200% 100%;
+    @apply animate-shimmer;
+  }
+  .scrollbar-thin {
+    scrollbar-width: thin;
+    scrollbar-color: hsl(var(--muted-foreground) / 0.4) transparent;
+  }
+  .scrollbar-thin::-webkit-scrollbar {
+    width: 8px;
+    height: 8px;
+  }
+  .scrollbar-thin::-webkit-scrollbar-thumb {
+    background: hsl(var(--muted-foreground) / 0.3);
+    border-radius: 8px;
+  }
+  .grid-canvas {
+    background-image: var(--tw-bg-grid, none);
+  }
+}
+
+@layer utilities {
+  .text-balance {
+    text-wrap: balance;
+  }
+  .bg-grid-faint {
+    background-image:
+      linear-gradient(to right, hsl(var(--border) / 0.45) 1px, transparent 1px),
+      linear-gradient(to bottom, hsl(var(--border) / 0.45) 1px, transparent 1px);
+    background-size: 32px 32px;
+  }
+  .mask-fade-b {
+    -webkit-mask-image: linear-gradient(to bottom, black 60%, transparent 100%);
+    mask-image: linear-gradient(to bottom, black 60%, transparent 100%);
+  }
+}
diff --git a/frontend/src/widgets/ChunkPreview.tsx b/frontend/src/widgets/ChunkPreview.tsx
new file mode 100644
index 0000000..de45591
--- /dev/null
+++ b/frontend/src/widgets/ChunkPreview.tsx
@@ -0,0 +1,108 @@
+import { motion } from "framer-motion";
+import { FileText } from "lucide-react";
+
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Button } from "@/components/ui/button";
+import { Badge } from "@/components/ui/badge";
+import { ConfidenceMeter } from "@/components/common/ConfidenceMeter";
+import { QualityFlags } from "@/components/common/QualityFlag";
+import { Highlight } from "@/components/common/Highlight";
+import { BlockTypeLabel } from "@/components/common/BlockTypeIcon";
+import type { SearchHit } from "@/services/types";
+
+interface Props {
+  hit: SearchHit | null;
+  query: string;
+}
+
+export function ChunkPreview({ hit, query }: Props) {
+  if (!hit) {
+    return (
+      <Card className="sticky top-20 h-full">
+        <CardContent className="grid h-full place-items-center p-10 text-center text-sm text-muted-foreground">
+          Select a result to preview the chunk, page thumbnail, and full citation.
+        </CardContent>
+      </Card>
+    );
+  }
+  return (
+    <motion.div
+      key={hit.chunk_id}
+      initial={{ opacity: 0, y: 8 }}
+      animate={{ opacity: 1, y: 0 }}
+      transition={{ duration: 0.2 }}
+      className="sticky top-20"
+    >
+      <Card>
+        <CardHeader>
+          <div className="flex items-center gap-2 text-xs text-muted-foreground">
+            <BlockTypeLabel type={hit.block_type} />
+            <span>·</span>
+            <span className="font-mono">page {hit.page_number}</span>
+            <span>·</span>
+            <span className="font-mono">chunk {hit.chunk_id.slice(0, 8)}</span>
+          </div>
+          <CardTitle className="flex items-baseline gap-2 truncate">
+            <FileText className="h-4 w-4 shrink-0 text-primary" />
+            <span className="truncate">{hit.original_file_name}</span>
+          </CardTitle>
+        </CardHeader>
+        <CardContent className="space-y-4">
+          <PageThumbnail page={hit.page_number} fileName={hit.original_file_name} />
+
+          <div className="rounded-xl border border-border/70 bg-muted/30 p-4 text-[13px] leading-relaxed text-foreground/90">
+            <Highlight text={hit.text} query={query} />
+          </div>
+
+          <div className="grid grid-cols-2 gap-2 text-xs">
+            <Field label="Source">{hit.source_path}</Field>
+            <Field label="Page">{hit.page_number}</Field>
+            <Field label="Block id">{hit.citation.block_id ?? "—"}</Field>
+            <Field label="Score">{hit.score.toFixed(4)}</Field>
+          </div>
+
+          <div className="space-y-2">
+            <div className="text-[10px] uppercase tracking-wide text-muted-foreground">Quality</div>
+            <QualityFlags flags={hit.quality_flags} />
+          </div>
+
+          <div className="flex items-center justify-between gap-2">
+            <ConfidenceMeter value={0.82} />
+            <div className="flex gap-2">
+              <Button size="sm" variant="outline">Open viewer</Button>
+              <Button size="sm">Copy citation</Button>
+            </div>
+          </div>
+        </CardContent>
+      </Card>
+    </motion.div>
+  );
+}
+
+function Field({ label, children }: { label: string; children: React.ReactNode }) {
+  return (
+    <div className="rounded-lg border border-border/60 bg-card px-3 py-2">
+      <div className="text-[10px] uppercase tracking-wide text-muted-foreground">{label}</div>
+      <div className="mt-0.5 truncate font-mono text-xs text-foreground">{children}</div>
+    </div>
+  );
+}
+
+function PageThumbnail({ page, fileName }: { page: number; fileName: string }) {
+  return (
+    <div className="relative overflow-hidden rounded-xl border border-border/70 bg-card">
+      <div className="bg-grid-faint relative aspect-[3/4] w-full">
+        <div className="absolute inset-x-6 top-8 h-2 rounded bg-muted-foreground/20" />
+        <div className="absolute inset-x-6 top-14 h-2 w-2/3 rounded bg-muted-foreground/20" />
+        <div className="absolute inset-x-6 top-24 h-1.5 rounded bg-primary/30" />
+        <div className="absolute inset-x-6 top-28 h-1.5 w-5/6 rounded bg-muted-foreground/20" />
+        <div className="absolute inset-x-6 top-32 h-1.5 w-4/6 rounded bg-muted-foreground/20" />
+        <div className="absolute inset-x-6 bottom-12 h-12 rounded bg-muted-foreground/12 ring-1 ring-inset ring-primary/30" />
+      </div>
+      <div className="flex items-center justify-between border-t border-border/70 bg-card px-3 py-2 text-[11px] text-muted-foreground">
+        <span className="truncate font-medium">{fileName}</span>
+        <Badge variant="outline" className="font-mono">p.{page}</Badge>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/widgets/DocumentTimeline.tsx b/frontend/src/widgets/DocumentTimeline.tsx
new file mode 100644
index 0000000..1f5408e
--- /dev/null
+++ b/frontend/src/widgets/DocumentTimeline.tsx
@@ -0,0 +1,68 @@
+import { Check, CircleDashed } from "lucide-react";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { cn } from "@/lib/utils";
+import { relativeTime } from "@/lib/utils";
+import type { TimelineEvent } from "@/services/types";
+
+const STAGES = [
+  "DISCOVERED",
+  "STORED_ORIGINAL",
+  "OCR_STARTED",
+  "OCR_COMPLETED",
+  "EXTRACTION_STARTED",
+  "EXTRACTION_COMPLETED",
+  "CHUNKING_COMPLETED",
+  "INDEXING_COMPLETED",
+];
+
+export function DocumentTimeline({ events }: { events: TimelineEvent[] }) {
+  const seen = new Set(events.map((e) => e.stage));
+  return (
+    <Card>
+      <CardHeader>
+        <CardTitle>Pipeline timeline</CardTitle>
+        <CardDescription>End-to-end stages for this document</CardDescription>
+      </CardHeader>
+      <CardContent>
+        <ol className="relative ml-2 border-l border-border/70 pl-5">
+          {STAGES.map((stage, i) => {
+            const evt = events.find((e) => e.stage === stage);
+            const done = seen.has(stage);
+            return (
+              <li key={stage} className="relative pb-4 last:pb-0">
+                <span
+                  className={cn(
+                    "absolute -left-[31px] grid h-6 w-6 place-items-center rounded-full border-2",
+                    done
+                      ? "border-primary bg-primary text-primary-foreground"
+                      : "border-border bg-card text-muted-foreground"
+                  )}
+                >
+                  {done ? <Check className="h-3 w-3" /> : <CircleDashed className="h-3 w-3" />}
+                </span>
+                <div className="flex items-center justify-between gap-2">
+                  <div className="text-sm font-medium">{labelFor(stage)}</div>
+                  {evt && (
+                    <span className="text-[11px] text-muted-foreground">
+                      {relativeTime(evt.created_at)}
+                    </span>
+                  )}
+                </div>
+                <p className="text-xs text-muted-foreground">
+                  {evt?.message ?? `Stage ${i + 1} of ${STAGES.length}`}
+                </p>
+              </li>
+            );
+          })}
+        </ol>
+      </CardContent>
+    </Card>
+  );
+}
+
+function labelFor(stage: string): string {
+  return stage
+    .toLowerCase()
+    .replace(/_/g, " ")
+    .replace(/\b\w/g, (m) => m.toUpperCase());
+}
diff --git a/frontend/src/widgets/IngestionStatsChart.tsx b/frontend/src/widgets/IngestionStatsChart.tsx
new file mode 100644
index 0000000..6136fcd
--- /dev/null
+++ b/frontend/src/widgets/IngestionStatsChart.tsx
@@ -0,0 +1,92 @@
+import {
+  Area,
+  AreaChart,
+  CartesianGrid,
+  Legend,
+  ResponsiveContainer,
+  Tooltip,
+  XAxis,
+  YAxis,
+} from "recharts";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+
+interface Props {
+  data: { date: string; ingested: number; failed: number }[];
+}
+
+export function IngestionStatsChart({ data }: Props) {
+  return (
+    <Card>
+      <CardHeader>
+        <div className="flex items-center justify-between gap-4">
+          <div>
+            <CardTitle>Ingestion volume</CardTitle>
+            <CardDescription>Documents processed per day · last 14 days</CardDescription>
+          </div>
+          <Legend
+            verticalAlign="top"
+            iconType="circle"
+            wrapperStyle={{ fontSize: 11, color: "hsl(var(--muted-foreground))" }}
+          />
+        </div>
+      </CardHeader>
+      <CardContent className="h-[260px]">
+        <ResponsiveContainer width="100%" height="100%">
+          <AreaChart data={data} margin={{ top: 5, right: 8, left: -10, bottom: 0 }}>
+            <defs>
+              <linearGradient id="ingested" x1="0" x2="0" y1="0" y2="1">
+                <stop offset="0%" stopColor="hsl(var(--primary))" stopOpacity={0.4} />
+                <stop offset="100%" stopColor="hsl(var(--primary))" stopOpacity={0} />
+              </linearGradient>
+              <linearGradient id="failed" x1="0" x2="0" y1="0" y2="1">
+                <stop offset="0%" stopColor="hsl(var(--destructive))" stopOpacity={0.35} />
+                <stop offset="100%" stopColor="hsl(var(--destructive))" stopOpacity={0} />
+              </linearGradient>
+            </defs>
+            <CartesianGrid stroke="hsl(var(--border))" strokeOpacity={0.5} vertical={false} />
+            <XAxis
+              dataKey="date"
+              tickFormatter={(v) => v.slice(5)}
+              stroke="hsl(var(--muted-foreground))"
+              fontSize={11}
+              tickLine={false}
+              axisLine={false}
+            />
+            <YAxis
+              stroke="hsl(var(--muted-foreground))"
+              fontSize={11}
+              tickLine={false}
+              axisLine={false}
+            />
+            <Tooltip
+              cursor={{ stroke: "hsl(var(--border))", strokeDasharray: "3 3" }}
+              contentStyle={{
+                background: "hsl(var(--popover))",
+                border: "1px solid hsl(var(--border))",
+                borderRadius: 12,
+                fontSize: 12,
+                boxShadow: "0 12px 40px -12px rgba(15,23,42,0.18)",
+              }}
+            />
+            <Area
+              type="monotone"
+              dataKey="ingested"
+              stroke="hsl(var(--primary))"
+              strokeWidth={2}
+              fill="url(#ingested)"
+              name="Ingested"
+            />
+            <Area
+              type="monotone"
+              dataKey="failed"
+              stroke="hsl(var(--destructive))"
+              strokeWidth={1.5}
+              fill="url(#failed)"
+              name="Failed"
+            />
+          </AreaChart>
+        </ResponsiveContainer>
+      </CardContent>
+    </Card>
+  );
+}
diff --git a/frontend/src/widgets/KpiCard.tsx b/frontend/src/widgets/KpiCard.tsx
new file mode 100644
index 0000000..fa2088f
--- /dev/null
+++ b/frontend/src/widgets/KpiCard.tsx
@@ -0,0 +1,111 @@
+import type { ReactNode } from "react";
+import { ArrowDownRight, ArrowUpRight } from "lucide-react";
+import { motion } from "framer-motion";
+import { cn } from "@/lib/utils";
+
+interface KpiCardProps {
+  label: string;
+  value: ReactNode;
+  delta?: number;
+  helper?: string;
+  icon?: ReactNode;
+  tone?: "default" | "success" | "warning" | "destructive" | "primary";
+  trend?: number[];
+  className?: string;
+}
+
+const TONE: Record<NonNullable<KpiCardProps["tone"]>, string> = {
+  default: "from-muted/30",
+  primary: "from-primary/10",
+  success: "from-success/10",
+  warning: "from-warning/10",
+  destructive: "from-destructive/10",
+};
+
+export function KpiCard({
+  label,
+  value,
+  delta,
+  helper,
+  icon,
+  tone = "default",
+  trend,
+  className,
+}: KpiCardProps) {
+  const up = (delta ?? 0) >= 0;
+  return (
+    <motion.div
+      initial={{ opacity: 0, y: 8 }}
+      animate={{ opacity: 1, y: 0 }}
+      transition={{ duration: 0.25 }}
+      className={cn(
+        "relative overflow-hidden panel p-5",
+        "before:absolute before:inset-x-0 before:top-0 before:h-px before:bg-gradient-to-r before:from-transparent before:via-border before:to-transparent",
+        className
+      )}
+    >
+      <div className={cn("pointer-events-none absolute -right-12 -top-16 h-32 w-32 rounded-full bg-gradient-to-br to-transparent blur-3xl", TONE[tone])} />
+      <div className="relative flex items-start justify-between gap-3">
+        <div className="space-y-1">
+          <div className="text-[11px] font-medium uppercase tracking-[0.16em] text-muted-foreground">
+            {label}
+          </div>
+          <div className="text-2xl font-semibold tracking-tight text-foreground">{value}</div>
+        </div>
+        {icon && (
+          <div className="grid h-9 w-9 place-items-center rounded-xl border border-border/70 bg-card text-muted-foreground">
+            {icon}
+          </div>
+        )}
+      </div>
+      <div className="relative mt-3 flex items-center justify-between gap-3 text-xs">
+        {delta !== undefined && (
+          <span
+            className={cn(
+              "inline-flex items-center gap-1 rounded-md px-1.5 py-0.5 font-medium",
+              up ? "bg-success/12 text-success" : "bg-destructive/12 text-destructive"
+            )}
+          >
+            {up ? <ArrowUpRight className="h-3 w-3" /> : <ArrowDownRight className="h-3 w-3" />}
+            {Math.abs(delta).toFixed(1)}%
+          </span>
+        )}
+        {helper && <span className="ml-auto text-muted-foreground">{helper}</span>}
+      </div>
+      {trend && trend.length > 1 && <Sparkline values={trend} />}
+    </motion.div>
+  );
+}
+
+function Sparkline({ values }: { values: number[] }) {
+  const min = Math.min(...values);
+  const max = Math.max(...values);
+  const range = max - min || 1;
+  const points = values
+    .map((v, i) => {
+      const x = (i / (values.length - 1)) * 100;
+      const y = 100 - ((v - min) / range) * 100;
+      return `${x.toFixed(2)},${y.toFixed(2)}`;
+    })
+    .join(" ");
+  return (
+    <svg viewBox="0 0 100 32" className="mt-3 h-7 w-full" preserveAspectRatio="none">
+      <defs>
+        <linearGradient id="kpiSpark" x1="0" x2="0" y1="0" y2="1">
+          <stop offset="0%" stopColor="hsl(var(--primary))" stopOpacity="0.4" />
+          <stop offset="100%" stopColor="hsl(var(--primary))" stopOpacity="0" />
+        </linearGradient>
+      </defs>
+      <polyline
+        points={points}
+        fill="none"
+        stroke="hsl(var(--primary))"
+        strokeWidth="1.6"
+        strokeLinejoin="round"
+        strokeLinecap="round"
+        vectorEffect="non-scaling-stroke"
+      />
+      <polygon points={`0,32 ${points} 100,32`} fill="url(#kpiSpark)" opacity="0.6" />
+    </svg>
+  );
+}
diff --git a/frontend/src/widgets/OCRQualityWidget.tsx b/frontend/src/widgets/OCRQualityWidget.tsx
new file mode 100644
index 0000000..85e8a14
--- /dev/null
+++ b/frontend/src/widgets/OCRQualityWidget.tsx
@@ -0,0 +1,53 @@
+import { Bar, BarChart, CartesianGrid, Cell, ResponsiveContainer, Tooltip, XAxis, YAxis } from "recharts";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+
+interface Props {
+  distribution: { bucket: string; count: number }[];
+  avg: number;
+}
+
+const COLORS = [
+  "hsl(var(--destructive))",
+  "hsl(var(--warning))",
+  "hsl(var(--warning))",
+  "hsl(var(--primary))",
+  "hsl(var(--success))",
+  "hsl(var(--success))",
+];
+
+export function OCRQualityWidget({ distribution, avg }: Props) {
+  return (
+    <Card>
+      <CardHeader>
+        <CardTitle>OCR confidence distribution</CardTitle>
+        <CardDescription>
+          Average <span className="font-mono text-foreground">{(avg * 100).toFixed(1)}%</span>{" "}
+          across all documents
+        </CardDescription>
+      </CardHeader>
+      <CardContent className="h-[220px]">
+        <ResponsiveContainer width="100%" height="100%">
+          <BarChart data={distribution} margin={{ top: 5, right: 8, left: -10, bottom: 0 }}>
+            <CartesianGrid stroke="hsl(var(--border))" strokeOpacity={0.5} vertical={false} />
+            <XAxis dataKey="bucket" tickLine={false} axisLine={false} fontSize={11} />
+            <YAxis tickLine={false} axisLine={false} fontSize={11} />
+            <Tooltip
+              cursor={{ fill: "hsl(var(--muted) / 0.5)" }}
+              contentStyle={{
+                background: "hsl(var(--popover))",
+                border: "1px solid hsl(var(--border))",
+                borderRadius: 12,
+                fontSize: 12,
+              }}
+            />
+            <Bar dataKey="count" radius={[8, 8, 0, 0]}>
+              {distribution.map((_, i) => (
+                <Cell key={i} fill={COLORS[i % COLORS.length]} />
+              ))}
+            </Bar>
+          </BarChart>
+        </ResponsiveContainer>
+      </CardContent>
+    </Card>
+  );
+}
diff --git a/frontend/src/widgets/PdfPreviewPane.tsx b/frontend/src/widgets/PdfPreviewPane.tsx
new file mode 100644
index 0000000..a8bd203
--- /dev/null
+++ b/frontend/src/widgets/PdfPreviewPane.tsx
@@ -0,0 +1,108 @@
+import { ChevronLeft, ChevronRight } from "lucide-react";
+import { useState } from "react";
+
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Button } from "@/components/ui/button";
+import { Badge } from "@/components/ui/badge";
+import { cn } from "@/lib/utils";
+import type { PageSummary } from "@/services/types";
+
+interface Props {
+  fileName: string;
+  pages: PageSummary[];
+  onPageChange?: (page: number) => void;
+}
+
+export function PdfPreviewPane({ fileName, pages, onPageChange }: Props) {
+  const [active, setActive] = useState(pages[0]?.page_number ?? 1);
+  const current = pages.find((p) => p.page_number === active) ?? pages[0];
+
+  function set(page: number) {
+    setActive(page);
+    onPageChange?.(page);
+  }
+
+  return (
+    <Card className="flex h-full flex-col">
+      <CardHeader className="flex flex-row items-center justify-between gap-2 pb-0">
+        <div>
+          <CardTitle className="truncate text-sm">{fileName}</CardTitle>
+          <div className="text-[11px] text-muted-foreground">
+            Page {active} of {pages.length}
+          </div>
+        </div>
+        <div className="flex items-center gap-1">
+          <Button
+            size="icon-sm"
+            variant="outline"
+            onClick={() => set(Math.max(pages[0]?.page_number ?? 1, active - 1))}
+            disabled={active <= 1}
+          >
+            <ChevronLeft className="h-4 w-4" />
+          </Button>
+          <Button
+            size="icon-sm"
+            variant="outline"
+            onClick={() => set(Math.min(pages.at(-1)?.page_number ?? active, active + 1))}
+            disabled={active >= pages.length}
+          >
+            <ChevronRight className="h-4 w-4" />
+          </Button>
+        </div>
+      </CardHeader>
+      <CardContent className="flex flex-1 flex-col gap-3 pt-3">
+        <div className="relative grid flex-1 grid-cols-[60px_1fr] gap-3">
+          <div className="space-y-1.5 overflow-y-auto pr-1 scrollbar-thin">
+            {pages.map((p) => (
+              <button
+                key={p.page_number}
+                onClick={() => set(p.page_number)}
+                className={cn(
+                  "block w-full overflow-hidden rounded-md border text-left transition-all",
+                  p.page_number === active
+                    ? "border-primary shadow-soft"
+                    : "border-border/60 hover:border-primary/40"
+                )}
+              >
+                <div className="bg-grid-faint relative aspect-[3/4] w-full">
+                  <div className="absolute inset-2 rounded-sm bg-muted-foreground/15" />
+                </div>
+                <div className="bg-card px-1.5 py-0.5 text-center text-[10px] font-mono text-muted-foreground">
+                  {p.page_number}
+                </div>
+              </button>
+            ))}
+          </div>
+          <div className="relative overflow-hidden rounded-xl border border-border/70 bg-card shadow-soft">
+            <div className="bg-grid-faint flex h-full min-h-[420px] flex-col">
+              <div className="flex items-center justify-between border-b border-border/70 bg-card/80 px-4 py-2 text-[11px] text-muted-foreground backdrop-blur">
+                <Badge variant="outline" className="font-mono">scanned page</Badge>
+                <span className="font-mono">p.{active}</span>
+              </div>
+              <div className="flex flex-1 flex-col gap-3 px-8 py-6 text-[13px] leading-relaxed text-foreground/90">
+                <div className="h-2 w-3/4 rounded bg-foreground/10" />
+                <div className="h-2 w-2/3 rounded bg-foreground/10" />
+                <div className="h-2 w-5/6 rounded bg-foreground/10" />
+                <div className="h-2 w-1/2 rounded bg-foreground/10" />
+                <div className="mt-4 rounded-lg border border-primary/40 bg-primary/5 px-3 py-3 text-xs text-foreground">
+                  <strong className="text-primary">Highlighted block</strong>
+                  <p className="mt-1 text-muted-foreground">
+                    {current?.text ?? "Sample text content for this page."}
+                  </p>
+                </div>
+                <div className="mt-2 grid grid-cols-3 gap-1 rounded-md border border-border/60 p-2">
+                  <div className="h-2 rounded bg-foreground/10" />
+                  <div className="h-2 rounded bg-foreground/15" />
+                  <div className="h-2 rounded bg-foreground/10" />
+                  <div className="h-2 rounded bg-foreground/15" />
+                  <div className="h-2 rounded bg-foreground/10" />
+                  <div className="h-2 rounded bg-foreground/15" />
+                </div>
+              </div>
+            </div>
+          </div>
+        </div>
+      </CardContent>
+    </Card>
+  );
+}
diff --git a/frontend/src/widgets/QueueWidget.tsx b/frontend/src/widgets/QueueWidget.tsx
new file mode 100644
index 0000000..0bd2d6e
--- /dev/null
+++ b/frontend/src/widgets/QueueWidget.tsx
@@ -0,0 +1,75 @@
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { Progress } from "@/components/ui/progress";
+import { useQueue } from "@/hooks/useHealth";
+import { formatDuration, formatNumber } from "@/lib/utils";
+import { Activity, AlertCircle, Loader2 } from "lucide-react";
+
+export function QueueWidget() {
+  const { data, isLoading } = useQueue();
+  const pending = data?.pending ?? 0;
+  const inProgress = data?.in_progress ?? 0;
+  const failedHour = data?.failed_last_hour ?? 0;
+  const completedHour = data?.completed_last_hour ?? 0;
+  const total = pending + inProgress;
+  const progress = total > 0 ? Math.min(100, (completedHour / (completedHour + total)) * 100) : 100;
+
+  return (
+    <Card>
+      <CardHeader>
+        <CardTitle>Processing queue</CardTitle>
+        <CardDescription>Live Celery workload</CardDescription>
+      </CardHeader>
+      <CardContent className="space-y-4">
+        <div className="grid grid-cols-2 gap-3 sm:grid-cols-4">
+          <Tile icon={<Activity className="h-4 w-4 text-primary" />} label="Pending" value={pending} loading={isLoading} />
+          <Tile icon={<Loader2 className="h-4 w-4 animate-spin text-primary" />} label="In progress" value={inProgress} loading={isLoading} />
+          <Tile label="Done · 1h" value={completedHour} loading={isLoading} tone="success" />
+          <Tile icon={<AlertCircle className="h-4 w-4 text-destructive" />} label="Failed · 1h" value={failedHour} loading={isLoading} tone="destructive" />
+        </div>
+
+        <div className="space-y-2">
+          <div className="flex items-center justify-between text-xs text-muted-foreground">
+            <span>Throughput vs. backlog</span>
+            <span>Avg latency · {formatDuration(data?.average_latency_ms ?? 0)}</span>
+          </div>
+          <Progress value={progress} indicatorClassName="bg-gradient-to-r from-primary to-primary-700" />
+        </div>
+      </CardContent>
+    </Card>
+  );
+}
+
+function Tile({
+  icon,
+  label,
+  value,
+  loading,
+  tone,
+}: {
+  icon?: React.ReactNode;
+  label: string;
+  value: number;
+  loading?: boolean;
+  tone?: "success" | "destructive";
+}) {
+  return (
+    <div className="rounded-xl border border-border/70 bg-card/40 px-3 py-3">
+      <div className="flex items-center gap-1.5 text-[11px] font-medium uppercase tracking-wide text-muted-foreground">
+        {icon}
+        {label}
+      </div>
+      <div
+        className={
+          "mt-1.5 font-mono text-xl tabular-nums " +
+          (tone === "success"
+            ? "text-success"
+            : tone === "destructive"
+            ? "text-destructive"
+            : "text-foreground")
+        }
+      >
+        {loading ? "—" : formatNumber(value)}
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/widgets/RecentRunsWidget.tsx b/frontend/src/widgets/RecentRunsWidget.tsx
new file mode 100644
index 0000000..eedf699
--- /dev/null
+++ b/frontend/src/widgets/RecentRunsWidget.tsx
@@ -0,0 +1,75 @@
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { StatusChip, statusToTone } from "@/components/common/StatusChip";
+import { useIngestionRuns } from "@/hooks/useIngestion";
+import { formatNumber, relativeTime } from "@/lib/utils";
+import { Skeleton } from "@/components/ui/skeleton";
+
+export function RecentRunsWidget() {
+  const { data, isLoading } = useIngestionRuns();
+  return (
+    <Card>
+      <CardHeader>
+        <CardTitle>Recent ingestion runs</CardTitle>
+        <CardDescription>Most recent batch operations</CardDescription>
+      </CardHeader>
+      <CardContent>
+        <div className="overflow-hidden rounded-xl border border-border/60">
+          <table className="w-full text-sm">
+            <thead className="bg-muted/40 text-[10px] uppercase tracking-wide text-muted-foreground">
+              <tr>
+                <th className="px-3 py-2 text-left font-medium">Status</th>
+                <th className="px-3 py-2 text-left font-medium">Source</th>
+                <th className="px-3 py-2 text-right font-medium">Processed</th>
+                <th className="px-3 py-2 text-right font-medium">Failed</th>
+                <th className="px-3 py-2 text-right font-medium">Started</th>
+              </tr>
+            </thead>
+            <tbody className="divide-y divide-border/60 bg-card">
+              {isLoading &&
+                Array.from({ length: 5 }).map((_, i) => (
+                  <tr key={i}>
+                    <td className="px-3 py-2.5">
+                      <Skeleton className="h-5 w-16" />
+                    </td>
+                    <td className="px-3 py-2.5">
+                      <Skeleton className="h-4 w-48" />
+                    </td>
+                    <td className="px-3 py-2.5 text-right">
+                      <Skeleton className="ml-auto h-4 w-12" />
+                    </td>
+                    <td className="px-3 py-2.5 text-right">
+                      <Skeleton className="ml-auto h-4 w-8" />
+                    </td>
+                    <td className="px-3 py-2.5 text-right">
+                      <Skeleton className="ml-auto h-4 w-16" />
+                    </td>
+                  </tr>
+                ))}
+              {data?.slice(0, 6).map((r) => (
+                <tr key={r.id} className="transition-colors hover:bg-muted/30">
+                  <td className="px-3 py-2.5">
+                    <StatusChip tone={statusToTone(r.status)} label={r.status} />
+                  </td>
+                  <td className="px-3 py-2.5 font-mono text-xs text-muted-foreground">
+                    {r.source_folder}
+                  </td>
+                  <td className="px-3 py-2.5 text-right font-mono text-xs tabular-nums">
+                    {formatNumber(r.processed_files)}/{formatNumber(r.total_files)}
+                  </td>
+                  <td className="px-3 py-2.5 text-right font-mono text-xs tabular-nums">
+                    <span className={r.failed_files > 0 ? "text-destructive" : "text-muted-foreground"}>
+                      {formatNumber(r.failed_files)}
+                    </span>
+                  </td>
+                  <td className="px-3 py-2.5 text-right text-xs text-muted-foreground">
+                    {relativeTime(r.started_at)}
+                  </td>
+                </tr>
+              ))}
+            </tbody>
+          </table>
+        </div>
+      </CardContent>
+    </Card>
+  );
+}
diff --git a/frontend/src/widgets/SearchResultCard.tsx b/frontend/src/widgets/SearchResultCard.tsx
new file mode 100644
index 0000000..b017519
--- /dev/null
+++ b/frontend/src/widgets/SearchResultCard.tsx
@@ -0,0 +1,93 @@
+import { motion } from "framer-motion";
+import { FileText, Hash, MoveUpRight } from "lucide-react";
+
+import { Card, CardContent } from "@/components/ui/card";
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { ConfidenceMeter } from "@/components/common/ConfidenceMeter";
+import { Highlight } from "@/components/common/Highlight";
+import { BlockTypeLabel } from "@/components/common/BlockTypeIcon";
+import { QualityFlags } from "@/components/common/QualityFlag";
+import type { SearchHit } from "@/services/types";
+import { cn } from "@/lib/utils";
+
+interface Props {
+  hit: SearchHit;
+  query: string;
+  active: boolean;
+  onSelect: () => void;
+  reranked: boolean;
+}
+
+export function SearchResultCard({ hit, query, active, onSelect, reranked }: Props) {
+  const ocrConf =
+    (hit.metadata as { ocr_confidence?: number })?.ocr_confidence ??
+    null;
+  return (
+    <motion.button
+      layout
+      onClick={onSelect}
+      whileHover={{ y: -1 }}
+      transition={{ duration: 0.15 }}
+      className={cn(
+        "panel w-full overflow-hidden text-left transition-colors",
+        active ? "border-primary/60 shadow-elevated ring-1 ring-primary/30" : "hover:border-primary/30"
+      )}
+    >
+      <Card className="border-0 shadow-none">
+        <CardContent className="space-y-2.5 p-4">
+          <div className="flex items-center gap-2 text-xs">
+            <span className="inline-flex h-6 min-w-[1.6rem] items-center justify-center rounded-md bg-muted px-1.5 font-mono font-medium text-muted-foreground">
+              #{hit.rank}
+            </span>
+            <BlockTypeLabel type={hit.block_type} />
+            <span className="text-muted-foreground">·</span>
+            <span className="font-mono text-muted-foreground">
+              <Hash className="inline h-3 w-3" /> p.{hit.page_number}
+            </span>
+            <ScoreBar score={hit.score} reranked={reranked} />
+          </div>
+
+          <div className="flex items-baseline gap-2">
+            <FileText className="h-4 w-4 shrink-0 text-primary" />
+            <div className="truncate text-sm font-semibold text-foreground">
+              {hit.original_file_name}
+            </div>
+          </div>
+
+          <p className="line-clamp-4 text-[13px] leading-relaxed text-foreground/90">
+            <Highlight text={hit.text} query={query} />
+          </p>
+
+          <div className="flex flex-wrap items-center gap-2 pt-1 text-xs">
+            <ConfidenceMeter value={ocrConf ?? 0.8} />
+            <Badge variant="outline" className="font-mono text-[10px]">
+              {hit.source_path}
+            </Badge>
+            <QualityFlags flags={hit.quality_flags} compact className="ml-auto" />
+          </div>
+
+          <div className="-mb-1 flex items-center justify-end">
+            <Button variant="ghost" size="sm" className="text-xs text-primary">
+              Open citation
+              <MoveUpRight className="h-3.5 w-3.5" />
+            </Button>
+          </div>
+        </CardContent>
+      </Card>
+    </motion.button>
+  );
+}
+
+function ScoreBar({ score, reranked }: { score: number; reranked: boolean }) {
+  const pct = Math.max(0, Math.min(1, score)) * 100;
+  return (
+    <div className="ml-auto flex items-center gap-2 text-[11px] text-muted-foreground">
+      <span className="hidden sm:inline">{reranked ? "reranked" : "raw"}</span>
+      <div className="h-1.5 w-24 overflow-hidden rounded-full bg-muted">
+        <div className="h-full bg-primary" style={{ width: `${pct}%` }} />
+      </div>
+      <span className="font-mono tabular-nums text-foreground">{score.toFixed(3)}</span>
+    </div>
+  );
+}
diff --git a/frontend/src/widgets/ServiceHealthCard.tsx b/frontend/src/widgets/ServiceHealthCard.tsx
new file mode 100644
index 0000000..f5365fb
--- /dev/null
+++ b/frontend/src/widgets/ServiceHealthCard.tsx
@@ -0,0 +1,69 @@
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { StatusChip } from "@/components/common/StatusChip";
+import { useHealth } from "@/hooks/useHealth";
+import { Database, Cloud, Search, Boxes, MemoryStick } from "lucide-react";
+import type { ReactNode } from "react";
+
+const ICONS: Record<string, ReactNode> = {
+  postgres: <Database className="h-4 w-4" />,
+  minio: <Cloud className="h-4 w-4" />,
+  opensearch: <Search className="h-4 w-4" />,
+  qdrant: <Boxes className="h-4 w-4" />,
+  redis: <MemoryStick className="h-4 w-4" />,
+};
+
+export function ServiceHealthCard() {
+  const { data, isLoading } = useHealth();
+  const components = data?.components ?? [];
+
+  return (
+    <Card>
+      <CardHeader>
+        <CardTitle>Service health</CardTitle>
+        <CardDescription>Backing services for the indexing platform</CardDescription>
+      </CardHeader>
+      <CardContent>
+        <ul className="divide-y divide-border/60">
+          {(isLoading || components.length === 0) &&
+            ["postgres", "minio", "opensearch", "qdrant", "redis"].map((n) => (
+              <li key={n} className="flex items-center gap-3 py-2.5">
+                <span className="grid h-8 w-8 place-items-center rounded-lg border border-border/70 bg-muted/40 text-muted-foreground">
+                  {ICONS[n] ?? null}
+                </span>
+                <span className="font-medium capitalize">{n}</span>
+                <span className="ml-auto text-xs text-muted-foreground">checking…</span>
+              </li>
+            ))}
+          {components.map((c) => (
+            <li key={c.name} className="flex items-center gap-3 py-2.5">
+              <span className="grid h-8 w-8 place-items-center rounded-lg border border-border/70 bg-muted/40 text-muted-foreground">
+                {ICONS[c.name] ?? null}
+              </span>
+              <div className="leading-tight">
+                <div className="text-sm font-medium capitalize">{c.name}</div>
+                <div className="text-[11px] text-muted-foreground">{describe(c.detail)}</div>
+              </div>
+              <StatusChip
+                className="ml-auto"
+                tone={c.status === "ok" ? "ok" : c.status === "degraded" ? "warning" : "error"}
+                label={c.status}
+              />
+            </li>
+          ))}
+        </ul>
+      </CardContent>
+    </Card>
+  );
+}
+
+function describe(detail: Record<string, unknown>): string {
+  if (!detail) return "";
+  const cluster = detail.cluster_status;
+  if (cluster) return `cluster · ${String(cluster)}`;
+  const buckets = detail.buckets;
+  if (Array.isArray(buckets) && buckets.length) return `${buckets.length} buckets`;
+  const cols = detail.collections;
+  if (Array.isArray(cols) && cols.length) return `${cols.length} collections`;
+  if (detail.latency_ms) return `${detail.latency_ms} ms latency`;
+  return "operational";
+}
diff --git a/frontend/src/widgets/StorageWidget.tsx b/frontend/src/widgets/StorageWidget.tsx
new file mode 100644
index 0000000..6fe3ff1
--- /dev/null
+++ b/frontend/src/widgets/StorageWidget.tsx
@@ -0,0 +1,84 @@
+import { Area, AreaChart, ResponsiveContainer, Tooltip, XAxis, YAxis } from "recharts";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { formatBytes } from "@/lib/utils";
+import { Database, HardDrive } from "lucide-react";
+
+interface Props {
+  totalBytes: number;
+  growth: { date: string; bytes: number }[];
+}
+
+export function StorageWidget({ totalBytes, growth }: Props) {
+  const latest = growth.at(-1)?.bytes ?? totalBytes;
+  const earliest = growth[0]?.bytes ?? totalBytes;
+  const delta = ((latest - earliest) / Math.max(earliest, 1)) * 100;
+
+  return (
+    <Card>
+      <CardHeader>
+        <div className="flex items-center justify-between gap-4">
+          <div>
+            <CardTitle>Storage usage</CardTitle>
+            <CardDescription>MinIO + derived artifacts</CardDescription>
+          </div>
+          <div className="flex items-center gap-3 text-xs text-muted-foreground">
+            <Legend label="Originals" color="hsl(var(--primary))" icon={<Database className="h-3.5 w-3.5" />} />
+            <Legend label="Derived" color="hsl(var(--primary) / 0.4)" icon={<HardDrive className="h-3.5 w-3.5" />} />
+          </div>
+        </div>
+      </CardHeader>
+      <CardContent>
+        <div className="grid grid-cols-3 gap-3 pb-3">
+          <Stat label="Used" value={formatBytes(totalBytes)} />
+          <Stat label="14d Δ" value={`${delta >= 0 ? "+" : ""}${delta.toFixed(1)}%`} tone={delta >= 0 ? "default" : "success"} />
+          <Stat label="Projected · 30d" value={formatBytes(totalBytes + (latest - earliest) * 2)} />
+        </div>
+        <div className="h-[160px]">
+          <ResponsiveContainer width="100%" height="100%">
+            <AreaChart data={growth} margin={{ top: 0, right: 8, left: -10, bottom: 0 }}>
+              <defs>
+                <linearGradient id="storage" x1="0" x2="0" y1="0" y2="1">
+                  <stop offset="0%" stopColor="hsl(var(--primary))" stopOpacity={0.4} />
+                  <stop offset="100%" stopColor="hsl(var(--primary))" stopOpacity={0} />
+                </linearGradient>
+              </defs>
+              <XAxis dataKey="date" tickFormatter={(v) => v.slice(5)} tickLine={false} axisLine={false} fontSize={11} />
+              <YAxis tickFormatter={(v) => formatBytes(v, 0)} tickLine={false} axisLine={false} fontSize={11} width={48} />
+              <Tooltip
+                formatter={(v: number) => formatBytes(v)}
+                contentStyle={{
+                  background: "hsl(var(--popover))",
+                  border: "1px solid hsl(var(--border))",
+                  borderRadius: 12,
+                  fontSize: 12,
+                }}
+              />
+              <Area type="monotone" dataKey="bytes" stroke="hsl(var(--primary))" strokeWidth={2} fill="url(#storage)" />
+            </AreaChart>
+          </ResponsiveContainer>
+        </div>
+      </CardContent>
+    </Card>
+  );
+}
+
+function Legend({ label, color, icon }: { label: string; color: string; icon: React.ReactNode }) {
+  return (
+    <span className="inline-flex items-center gap-1.5">
+      <span className="inline-block h-2.5 w-2.5 rounded-full" style={{ background: color }} />
+      {icon}
+      {label}
+    </span>
+  );
+}
+
+function Stat({ label, value, tone = "default" }: { label: string; value: string; tone?: "default" | "success" }) {
+  return (
+    <div className="rounded-xl border border-border/70 bg-card/40 px-3 py-2">
+      <div className="text-[11px] font-medium uppercase tracking-wide text-muted-foreground">{label}</div>
+      <div className={"mt-0.5 font-mono text-base " + (tone === "success" ? "text-success" : "text-foreground")}>
+        {value}
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/tailwind.config.ts b/frontend/tailwind.config.ts
new file mode 100644
index 0000000..e8ce1cc
--- /dev/null
+++ b/frontend/tailwind.config.ts
@@ -0,0 +1,131 @@
+import type { Config } from "tailwindcss";
+import animate from "tailwindcss-animate";
+
+const config: Config = {
+  darkMode: ["class"],
+  content: ["./index.html", "./src/**/*.{ts,tsx}"],
+  theme: {
+    container: {
+      center: true,
+      padding: "1.5rem",
+      screens: {
+        "2xl": "1440px",
+      },
+    },
+    extend: {
+      fontFamily: {
+        sans: [
+          "InterVariable",
+          "Inter",
+          "ui-sans-serif",
+          "system-ui",
+          "-apple-system",
+          "Segoe UI",
+          "Roboto",
+          "sans-serif",
+        ],
+        mono: ["JetBrains Mono", "ui-monospace", "SFMono-Regular", "monospace"],
+      },
+      colors: {
+        border: "hsl(var(--border))",
+        input: "hsl(var(--input))",
+        ring: "hsl(var(--ring))",
+        background: "hsl(var(--background))",
+        foreground: "hsl(var(--foreground))",
+        primary: {
+          DEFAULT: "hsl(var(--primary))",
+          foreground: "hsl(var(--primary-foreground))",
+          50: "hsl(var(--primary-50))",
+          100: "hsl(var(--primary-100))",
+          200: "hsl(var(--primary-200))",
+          500: "hsl(var(--primary-500))",
+          600: "hsl(var(--primary-600))",
+          700: "hsl(var(--primary-700))",
+        },
+        secondary: {
+          DEFAULT: "hsl(var(--secondary))",
+          foreground: "hsl(var(--secondary-foreground))",
+        },
+        destructive: {
+          DEFAULT: "hsl(var(--destructive))",
+          foreground: "hsl(var(--destructive-foreground))",
+        },
+        warning: {
+          DEFAULT: "hsl(var(--warning))",
+          foreground: "hsl(var(--warning-foreground))",
+        },
+        success: {
+          DEFAULT: "hsl(var(--success))",
+          foreground: "hsl(var(--success-foreground))",
+        },
+        muted: {
+          DEFAULT: "hsl(var(--muted))",
+          foreground: "hsl(var(--muted-foreground))",
+        },
+        accent: {
+          DEFAULT: "hsl(var(--accent))",
+          foreground: "hsl(var(--accent-foreground))",
+        },
+        popover: {
+          DEFAULT: "hsl(var(--popover))",
+          foreground: "hsl(var(--popover-foreground))",
+        },
+        card: {
+          DEFAULT: "hsl(var(--card))",
+          foreground: "hsl(var(--card-foreground))",
+        },
+        surface: {
+          DEFAULT: "hsl(var(--surface))",
+          raised: "hsl(var(--surface-raised))",
+          sunken: "hsl(var(--surface-sunken))",
+        },
+      },
+      borderRadius: {
+        lg: "var(--radius)",
+        md: "calc(var(--radius) - 4px)",
+        sm: "calc(var(--radius) - 8px)",
+        xl: "calc(var(--radius) + 4px)",
+        "2xl": "calc(var(--radius) + 10px)",
+      },
+      boxShadow: {
+        soft: "0 1px 2px rgba(15,23,42,0.04), 0 4px 16px rgba(15,23,42,0.04)",
+        glass: "0 1px 0 rgba(255,255,255,0.6) inset, 0 8px 32px rgba(15,23,42,0.06)",
+        ring: "0 0 0 4px hsl(var(--primary) / 0.18)",
+        elevated: "0 12px 40px -12px rgba(15,23,42,0.18)",
+      },
+      backgroundImage: {
+        "grid-light":
+          "linear-gradient(to right, hsl(var(--border)) 1px, transparent 1px), linear-gradient(to bottom, hsl(var(--border)) 1px, transparent 1px)",
+        "radial-fade":
+          "radial-gradient(60% 40% at 30% 20%, hsl(var(--primary) / 0.10), transparent 70%)",
+      },
+      keyframes: {
+        "accordion-down": {
+          from: { height: "0" },
+          to: { height: "var(--radix-accordion-content-height)" },
+        },
+        "accordion-up": {
+          from: { height: "var(--radix-accordion-content-height)" },
+          to: { height: "0" },
+        },
+        shimmer: {
+          "0%": { backgroundPosition: "-200% 0" },
+          "100%": { backgroundPosition: "200% 0" },
+        },
+        "fade-in-up": {
+          "0%": { opacity: "0", transform: "translateY(6px)" },
+          "100%": { opacity: "1", transform: "translateY(0)" },
+        },
+      },
+      animation: {
+        "accordion-down": "accordion-down 0.2s ease-out",
+        "accordion-up": "accordion-up 0.2s ease-out",
+        shimmer: "shimmer 1.6s linear infinite",
+        "fade-in-up": "fade-in-up 0.3s ease-out",
+      },
+    },
+  },
+  plugins: [animate],
+};
+
+export default config;
diff --git a/frontend/tsconfig.json b/frontend/tsconfig.json
new file mode 100644
index 0000000..8f17968
--- /dev/null
+++ b/frontend/tsconfig.json
@@ -0,0 +1,26 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "lib": ["ES2023", "DOM", "DOM.Iterable"],
+    "module": "ESNext",
+    "moduleResolution": "Bundler",
+    "jsx": "react-jsx",
+    "useDefineForClassFields": true,
+    "strict": true,
+    "noUnusedLocals": false,
+    "noUnusedParameters": false,
+    "noFallthroughCasesInSwitch": true,
+    "esModuleInterop": true,
+    "isolatedModules": true,
+    "resolveJsonModule": true,
+    "skipLibCheck": true,
+    "allowSyntheticDefaultImports": true,
+    "forceConsistentCasingInFileNames": true,
+    "baseUrl": ".",
+    "paths": {
+      "@/*": ["src/*"]
+    }
+  },
+  "include": ["src"],
+  "references": [{ "path": "./tsconfig.node.json" }]
+}
diff --git a/frontend/tsconfig.node.json b/frontend/tsconfig.node.json
new file mode 100644
index 0000000..cbd2a63
--- /dev/null
+++ b/frontend/tsconfig.node.json
@@ -0,0 +1,11 @@
+{
+  "compilerOptions": {
+    "composite": true,
+    "skipLibCheck": true,
+    "module": "ESNext",
+    "moduleResolution": "Bundler",
+    "allowSyntheticDefaultImports": true,
+    "strict": true
+  },
+  "include": ["vite.config.ts"]
+}
diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts
new file mode 100644
index 0000000..5f0c73b
--- /dev/null
+++ b/frontend/vite.config.ts
@@ -0,0 +1,26 @@
+import { defineConfig } from "vite";
+import react from "@vitejs/plugin-react";
+import path from "node:path";
+
+export default defineConfig({
+  plugins: [react()],
+  resolve: {
+    alias: {
+      "@": path.resolve(__dirname, "./src"),
+    },
+  },
+  server: {
+    port: 5273,
+    strictPort: true,
+    proxy: {
+      "/api": {
+        target: "http://localhost:8000",
+        changeOrigin: true,
+      },
+    },
+  },
+  build: {
+    outDir: "dist",
+    sourcemap: true,
+  },
+});
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..fb3ffdc
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,92 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "legacy-knowledge-indexer"
+version = "0.1.0"
+description = "LegacyHUB - production-grade ingestion and hybrid search over legacy PDF archives"
+requires-python = ">=3.11,<3.13"
+authors = [{ name = "TeamHUB" }]
+license = { text = "Apache-2.0" }
+readme = "README.md"
+
+dependencies = [
+    "fastapi>=0.115.0",
+    "uvicorn[standard]>=0.30.0",
+    "pydantic>=2.7.0",
+    "pydantic-settings>=2.4.0",
+    "python-multipart>=0.0.9",
+
+    # DB
+    "sqlalchemy>=2.0.30",
+    "psycopg[binary]>=3.2.0",
+    "alembic>=1.13.0",
+
+    # Object storage
+    "minio>=7.2.7",
+
+    # Search/index
+    "opensearch-py>=2.6.0",
+    "qdrant-client>=1.10.0",
+
+    # Workers
+    "celery>=5.4.0",
+    "redis>=5.0.7",
+
+    # Ingestion
+    "ocrmypdf>=16.4.0",
+    "pikepdf>=9.0.0",
+    "pypdf>=4.3.0",
+    "pdfminer.six>=20240706",
+    "docling>=2.0.0",
+
+    # ML
+    "FlagEmbedding>=1.3.0",
+    "sentence-transformers>=3.0.0",
+    "torch>=2.2.0",
+    "numpy>=1.26.0",
+    "transformers>=4.42.0",
+
+    # Misc
+    "httpx>=0.27.0",
+    "tenacity>=8.5.0",
+    "structlog>=24.2.0",
+    "orjson>=3.10.0",
+    "python-magic>=0.4.27; platform_system != 'Windows'",
+    "python-magic-bin>=0.4.14; platform_system == 'Windows'",
+    "langdetect>=1.0.9",
+    "regex>=2024.5.15",
+    "rich>=13.7.1",
+    "tqdm>=4.66.4",
+    "click>=8.1.7",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.2.0",
+    "pytest-asyncio>=0.23.7",
+    "ruff>=0.5.0",
+    "mypy>=1.10.0",
+    "types-requests",
+]
+
+[project.scripts]
+legacyhub-ingest = "scripts.ingest_folder:main"
+legacyhub-reindex = "scripts.reindex_document:main"
+legacyhub-smoke = "scripts.smoke_test:main"
+
+[tool.hatch.build.targets.wheel]
+packages = ["app", "scripts"]
+
+[tool.ruff]
+line-length = 100
+target-version = "py311"
+
+[tool.ruff.lint]
+select = ["E", "F", "I", "B", "UP", "N", "PL", "RUF"]
+ignore = ["E501", "PLR0913", "PLR2004"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+asyncio_mode = "auto"
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/ingest_folder.py b/scripts/ingest_folder.py
new file mode 100644
index 0000000..220e7df
--- /dev/null
+++ b/scripts/ingest_folder.py
@@ -0,0 +1,60 @@
+"""Synchronous CLI ingestion: discover -> queue -> process inline.
+
+Use ``--async`` to push tasks to Celery instead of running inline (default
+inline mode is convenient for ad-hoc runs without a worker container).
+"""
+
+from __future__ import annotations
+
+import sys
+import uuid
+from pathlib import Path
+
+import click
+
+from app.ingestion.scanner import discover_documents
+from app.logging_config import configure_logging, get_logger
+
+configure_logging()
+logger = get_logger(__name__)
+
+
+@click.command()
+@click.option("--path", required=True, type=click.Path(exists=True, file_okay=True, dir_okay=True, path_type=Path))
+@click.option("--recursive/--no-recursive", default=True)
+@click.option("--force", is_flag=True, default=False, help="Re-process even if SHA already exists")
+@click.option("--mode", type=click.Choice(["inline", "celery"]), default="inline")
+def main(path: Path, recursive: bool, force: bool, mode: str) -> None:
+    run_id = uuid.uuid4()
+    discovered = queued = dups = invalid = 0
+
+    for record in discover_documents(path, recursive=recursive, force=force):
+        discovered += 1
+        if record.duplicate and not force:
+            dups += 1
+            continue
+        if not record.document_id:
+            invalid += 1
+            continue
+
+        if mode == "celery":
+            from app.workers.tasks import process_document
+            process_document.delay(str(record.document_id), str(run_id))
+        else:
+            from app.ingestion.pipeline import process_document_id
+            try:
+                result = process_document_id(record.document_id, run_id)
+                logger.info("ingest.cli.processed", path=str(record.path), result=result)
+            except Exception as exc:  # noqa: BLE001
+                logger.exception("ingest.cli.failed", path=str(record.path), error=str(exc))
+                invalid += 1
+                continue
+        queued += 1
+
+    click.echo(
+        f"discovered={discovered} queued={queued} duplicates={dups} invalid={invalid} run={run_id}"
+    )
+
+
+if __name__ == "__main__":
+    sys.exit(main(standalone_mode=True) or 0)
diff --git a/scripts/init_db.py b/scripts/init_db.py
new file mode 100644
index 0000000..a2390d1
--- /dev/null
+++ b/scripts/init_db.py
@@ -0,0 +1,25 @@
+"""Apply Alembic migrations against the configured Postgres."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+from alembic import command
+from alembic.config import Config
+
+from app.config import settings
+
+
+def main() -> int:
+    root = Path(__file__).resolve().parents[1]
+    cfg = Config(str(root / "alembic.ini"))
+    cfg.set_main_option("script_location", str(root / "app" / "db" / "migrations"))
+    cfg.set_main_option("sqlalchemy.url", settings.database_url)
+    command.upgrade(cfg, "head")
+    print("alembic upgrade head: ok")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/init_opensearch.py b/scripts/init_opensearch.py
new file mode 100644
index 0000000..90dea68
--- /dev/null
+++ b/scripts/init_opensearch.py
@@ -0,0 +1,17 @@
+"""Bootstrap the OpenSearch chunk index."""
+
+from __future__ import annotations
+
+import sys
+
+from app.indexing.opensearch_client import ensure_index
+
+
+def main() -> int:
+    ensure_index()
+    print("opensearch index ensured")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/init_qdrant.py b/scripts/init_qdrant.py
new file mode 100644
index 0000000..6d2d058
--- /dev/null
+++ b/scripts/init_qdrant.py
@@ -0,0 +1,17 @@
+"""Bootstrap the Qdrant chunk collection."""
+
+from __future__ import annotations
+
+import sys
+
+from app.indexing.qdrant_client import ensure_collection
+
+
+def main() -> int:
+    ensure_collection()
+    print("qdrant collection ensured")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/reindex_document.py b/scripts/reindex_document.py
new file mode 100644
index 0000000..8543340
--- /dev/null
+++ b/scripts/reindex_document.py
@@ -0,0 +1,24 @@
+"""Re-run the pipeline for a single document by ID."""
+
+from __future__ import annotations
+
+import sys
+import uuid
+
+import click
+
+from app.ingestion.pipeline import process_document_id
+from app.logging_config import configure_logging
+
+configure_logging()
+
+
+@click.command()
+@click.option("--document-id", required=True, type=str)
+def main(document_id: str) -> None:
+    result = process_document_id(uuid.UUID(document_id))
+    click.echo(result)
+
+
+if __name__ == "__main__":
+    sys.exit(main(standalone_mode=True) or 0)
diff --git a/scripts/smoke_test.py b/scripts/smoke_test.py
new file mode 100644
index 0000000..b482503
--- /dev/null
+++ b/scripts/smoke_test.py
@@ -0,0 +1,74 @@
+"""Smoke test - verify all infrastructure is reachable and indices are present.
+
+Exits non-zero on first hard error.
+"""
+
+from __future__ import annotations
+
+import sys
+
+from sqlalchemy import text
+
+from app.db.session import get_engine
+from app.indexing.opensearch_client import ensure_index, get_opensearch
+from app.indexing.qdrant_client import ensure_collection, get_qdrant
+from app.logging_config import configure_logging, get_logger
+from app.storage.minio_client import get_storage
+
+configure_logging()
+logger = get_logger(__name__)
+
+
+def main() -> int:
+    failures: list[str] = []
+
+    # Postgres
+    try:
+        with get_engine().connect() as conn:
+            conn.execute(text("SELECT 1"))
+        print("[ok] postgres")
+    except Exception as exc:  # noqa: BLE001
+        failures.append(f"postgres: {exc}")
+        print(f"[err] postgres: {exc}")
+
+    # MinIO
+    try:
+        s = get_storage()
+        s.ensure_buckets()
+        info = s.health()
+        if info.get("status") != "ok":
+            raise RuntimeError(info)
+        print("[ok] minio:", info.get("buckets"))
+    except Exception as exc:  # noqa: BLE001
+        failures.append(f"minio: {exc}")
+        print(f"[err] minio: {exc}")
+
+    # OpenSearch
+    try:
+        ensure_index()
+        info = get_opensearch().cluster.health()
+        print("[ok] opensearch:", info.get("status"))
+    except Exception as exc:  # noqa: BLE001
+        failures.append(f"opensearch: {exc}")
+        print(f"[err] opensearch: {exc}")
+
+    # Qdrant
+    try:
+        ensure_collection()
+        cols = [c.name for c in get_qdrant().get_collections().collections]
+        print("[ok] qdrant collections:", cols)
+    except Exception as exc:  # noqa: BLE001
+        failures.append(f"qdrant: {exc}")
+        print(f"[err] qdrant: {exc}")
+
+    if failures:
+        print("\nSMOKE FAIL:")
+        for f in failures:
+            print(" -", f)
+        return 1
+    print("\nSMOKE OK")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..e46979b
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,10 @@
+"""Pytest configuration - put repository root on sys.path."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
diff --git a/tests/test_chunker.py b/tests/test_chunker.py
new file mode 100644
index 0000000..9ffc5ef
--- /dev/null
+++ b/tests/test_chunker.py
@@ -0,0 +1,81 @@
+from __future__ import annotations
+
+from app.ingestion.chunker import chunk_extraction
+from app.ingestion.docling_extractor import (
+    ExtractedBlock,
+    ExtractedFigure,
+    ExtractedPage,
+    ExtractedTable,
+    ExtractionResult,
+)
+
+
+def _extraction(blocks=None, tables=None, figures=None, pages=None) -> ExtractionResult:
+    return ExtractionResult(
+        markdown="",
+        json_payload={},
+        blocks=blocks or [],
+        tables=tables or [],
+        figures=figures or [],
+        pages=pages or [ExtractedPage(page_number=1, text="")],
+    )
+
+
+def test_chunker_emits_table_unsplit():
+    md = "| a | b |\n| --- | --- |\n| 1 | 2 |\n| 3 | 4 |"
+    extraction = _extraction(tables=[ExtractedTable(page_number=2, table_index=0, markdown=md)])
+    chunks = chunk_extraction(extraction)
+    table_chunks = [c for c in chunks if c.block_type == "table"]
+    assert len(table_chunks) == 1
+    assert "| 1 | 2 |" in table_chunks[0].text
+    assert table_chunks[0].page_number == 2
+    assert table_chunks[0].quality_flags["table_detected"] is True
+
+
+def test_chunker_handles_paragraphs_with_section_heading():
+    extraction = _extraction(
+        blocks=[
+            ExtractedBlock(page_number=1, block_type="heading", text="Глава 1. Введение"),
+            ExtractedBlock(
+                page_number=1,
+                block_type="paragraph",
+                text="Первый параграф документа " * 30,
+            ),
+            ExtractedBlock(
+                page_number=1,
+                block_type="paragraph",
+                text="Второй параграф продолжает тему " * 30,
+            ),
+        ]
+    )
+    chunks = chunk_extraction(extraction)
+    text_chunks = [c for c in chunks if c.block_type in ("paragraph", "heading")]
+    assert text_chunks, "expected at least one narrative chunk"
+    # The section heading should be included as context in at least one chunk.
+    assert any("Глава 1" in c.text for c in text_chunks)
+    # Each chunk should carry citation metadata.
+    for c in text_chunks:
+        assert c.page_number == 1
+        assert c.quality_flags is not None
+        assert "needs_manual_review" in c.quality_flags
+
+
+def test_chunker_emits_figure_caption_chunks():
+    extraction = _extraction(
+        figures=[ExtractedFigure(page_number=4, figure_index=0, caption="Схема ремонта")]
+    )
+    chunks = chunk_extraction(extraction)
+    fig_chunks = [c for c in chunks if c.block_type.startswith("figure")]
+    assert fig_chunks
+    assert fig_chunks[0].page_number == 4
+    assert "Схема ремонта" in fig_chunks[0].text
+
+
+def test_chunker_splits_giant_block():
+    huge = " ".join(f"word{i}" for i in range(5000))
+    extraction = _extraction(
+        blocks=[ExtractedBlock(page_number=1, block_type="paragraph", text=huge)]
+    )
+    chunks = chunk_extraction(extraction)
+    narrative = [c for c in chunks if c.block_type == "paragraph"]
+    assert len(narrative) >= 2  # the giant block must be split
diff --git a/tests/test_duplicates.py b/tests/test_duplicates.py
new file mode 100644
index 0000000..878f1d8
--- /dev/null
+++ b/tests/test_duplicates.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+from app.utils.hashing import sha256_file
+
+
+def test_two_files_with_same_content_share_sha(tmp_path: Path):
+    a = tmp_path / "a.pdf"
+    b = tmp_path / "b.pdf"
+    payload = b"%PDF-1.4\n" + b"x" * 4096
+    a.write_bytes(payload)
+    b.write_bytes(payload)
+    assert sha256_file(a) == sha256_file(b)
+
+
+def test_one_byte_difference_changes_sha(tmp_path: Path):
+    a = tmp_path / "a.pdf"
+    b = tmp_path / "b.pdf"
+    a.write_bytes(b"%PDF-1.4\n" + b"x" * 4096)
+    b.write_bytes(b"%PDF-1.4\n" + b"x" * 4095 + b"y")
+    assert sha256_file(a) != sha256_file(b)
diff --git a/tests/test_hashing.py b/tests/test_hashing.py
new file mode 100644
index 0000000..a5e9a45
--- /dev/null
+++ b/tests/test_hashing.py
@@ -0,0 +1,26 @@
+from __future__ import annotations
+
+import hashlib
+from pathlib import Path
+
+from app.utils.hashing import sha256_bytes, sha256_file
+
+
+def test_sha256_bytes_matches_hashlib():
+    data = b"legacyhub" * 1000
+    assert sha256_bytes(data) == hashlib.sha256(data).hexdigest()
+
+
+def test_sha256_file_streaming_matches_hashlib(tmp_path: Path):
+    payload = b"\x01\x02\x03" * 5_000_000  # 15 MiB - exercises chunking
+    target = tmp_path / "blob.bin"
+    target.write_bytes(payload)
+    assert sha256_file(target) == hashlib.sha256(payload).hexdigest()
+
+
+def test_sha256_file_distinguishes_content(tmp_path: Path):
+    a = tmp_path / "a.bin"
+    b = tmp_path / "b.bin"
+    a.write_bytes(b"alpha")
+    b.write_bytes(b"beta")
+    assert sha256_file(a) != sha256_file(b)
diff --git a/tests/test_hybrid_search.py b/tests/test_hybrid_search.py
new file mode 100644
index 0000000..150b9ed
--- /dev/null
+++ b/tests/test_hybrid_search.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+from app.api.schemas import SearchMode
+from app.indexing.hybrid_search import _Candidate, _merge
+
+
+def _make(chunk_id: str, *, bm25_rank: int | None = None, dense_rank: int | None = None) -> _Candidate:
+    return _Candidate(
+        chunk_id=chunk_id,
+        document_id="00000000-0000-0000-0000-000000000000",
+        page_number=1,
+        block_type="paragraph",
+        block_id=None,
+        text=f"text-{chunk_id}",
+        source_path="/tmp/doc.pdf",
+        original_file_name="doc.pdf",
+        quality_flags={},
+        metadata={},
+        bm25_score=None if bm25_rank is None else 1.0 / bm25_rank,
+        bm25_rank=bm25_rank,
+        dense_score=None if dense_rank is None else 1.0 - 0.1 * dense_rank,
+        dense_rank=dense_rank,
+    )
+
+
+def test_merge_lexical_passes_through():
+    lex = [_make("a", bm25_rank=1), _make("b", bm25_rank=2)]
+    out = _merge(lex, [], "lexical")
+    assert [c.chunk_id for c in out] == ["a", "b"]
+
+
+def test_merge_hybrid_uses_rrf_to_rank_intersected_results_higher():
+    lex = [_make("a", bm25_rank=2), _make("b", bm25_rank=1)]
+    sem = [_make("a", dense_rank=1), _make("c", dense_rank=2)]
+    merged = _merge(lex, sem, "hybrid")
+    ids = [c.chunk_id for c in merged]
+    # ``a`` appears in both, so it should beat ``b`` and ``c`` after RRF.
+    assert ids[0] == "a"
+    assert set(ids) == {"a", "b", "c"}
+
+
+def test_merge_hybrid_handles_disjoint_sets():
+    lex = [_make("x", bm25_rank=1)]
+    sem = [_make("y", dense_rank=1)]
+    merged = _merge(lex, sem, "hybrid")
+    assert {c.chunk_id for c in merged} == {"x", "y"}
+
+
+def test_search_mode_typed():
+    # Smoke - the literal type accepts the three valid values.
+    valid: list[SearchMode] = ["lexical", "semantic", "hybrid"]
+    assert valid == ["lexical", "semantic", "hybrid"]
diff --git a/tests/test_quality.py b/tests/test_quality.py
new file mode 100644
index 0000000..8813e90
--- /dev/null
+++ b/tests/test_quality.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from app.ingestion.quality import compute_quality_flags
+from app.utils.text_cleaning import clean_ocr_text, looks_garbled, normalize_for_search
+
+
+def test_quality_low_confidence_flags_review():
+    flags = compute_quality_flags(text="hello world", block_type="paragraph", ocr_confidence=0.4)
+    assert flags["low_ocr_confidence"] is True
+    assert flags["needs_manual_review"] is True
+
+
+def test_quality_short_text():
+    flags = compute_quality_flags(text="abc", block_type="paragraph", ocr_confidence=0.95)
+    assert flags["very_short_text"] is True
+    assert flags["needs_manual_review"] is False
+
+
+def test_quality_handwriting_forces_review():
+    flags = compute_quality_flags(
+        text="неразборчивый текст",
+        block_type="handwriting",
+        ocr_confidence=0.9,
+        has_handwriting=True,
+    )
+    assert flags["handwriting_detected"] is True
+    assert flags["needs_manual_review"] is True
+
+
+def test_clean_ocr_text_removes_hyphenation():
+    raw = "инвен-\nтарный номер 123"
+    assert clean_ocr_text(raw) == "инвентарный номер 123"
+
+
+def test_normalize_preserves_codes():
+    text = "ГОСТ 21.501-93 № 12/345"
+    norm = normalize_for_search(text)
+    # Standard codes survive normalization (digits, slashes, dashes, dots).
+    assert "21.501-93" in norm
+    assert "12/345" in norm
+
+
+def test_looks_garbled_detects_noise():
+    assert looks_garbled("@@##$$%%^^&&**(()_)(*&^%$#@!" * 5)
+    assert not looks_garbled("Hello world, this is a perfectly ordinary line of text.")