chore: bootstrap repository with governance docs

Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line
endings), AGENTS.md (entry points, stack, discovery order, baseline
checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion,
failures, rollback, scaling notes), .env.prod.example with rotated
credential placeholders, and dev-only warnings on .env.example.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Vadim Malanov
2026-05-13 16:41:50 +03:00
commit 7f72171572
157 changed files with 11298 additions and 0 deletions

92
pyproject.toml Normal file
View File

@@ -0,0 +1,92 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "legacy-knowledge-indexer"
version = "0.1.0"
description = "LegacyHUB - production-grade ingestion and hybrid search over legacy PDF archives"
requires-python = ">=3.11,<3.13"
authors = [{ name = "TeamHUB" }]
license = { text = "Apache-2.0" }
readme = "README.md"
dependencies = [
"fastapi>=0.115.0",
"uvicorn[standard]>=0.30.0",
"pydantic>=2.7.0",
"pydantic-settings>=2.4.0",
"python-multipart>=0.0.9",
# DB
"sqlalchemy>=2.0.30",
"psycopg[binary]>=3.2.0",
"alembic>=1.13.0",
# Object storage
"minio>=7.2.7",
# Search/index
"opensearch-py>=2.6.0",
"qdrant-client>=1.10.0",
# Workers
"celery>=5.4.0",
"redis>=5.0.7",
# Ingestion
"ocrmypdf>=16.4.0",
"pikepdf>=9.0.0",
"pypdf>=4.3.0",
"pdfminer.six>=20240706",
"docling>=2.0.0",
# ML
"FlagEmbedding>=1.3.0",
"sentence-transformers>=3.0.0",
"torch>=2.2.0",
"numpy>=1.26.0",
"transformers>=4.42.0",
# Misc
"httpx>=0.27.0",
"tenacity>=8.5.0",
"structlog>=24.2.0",
"orjson>=3.10.0",
"python-magic>=0.4.27; platform_system != 'Windows'",
"python-magic-bin>=0.4.14; platform_system == 'Windows'",
"langdetect>=1.0.9",
"regex>=2024.5.15",
"rich>=13.7.1",
"tqdm>=4.66.4",
"click>=8.1.7",
]
[project.optional-dependencies]
dev = [
"pytest>=8.2.0",
"pytest-asyncio>=0.23.7",
"ruff>=0.5.0",
"mypy>=1.10.0",
"types-requests",
]
[project.scripts]
legacyhub-ingest = "scripts.ingest_folder:main"
legacyhub-reindex = "scripts.reindex_document:main"
legacyhub-smoke = "scripts.smoke_test:main"
[tool.hatch.build.targets.wheel]
packages = ["app", "scripts"]
[tool.ruff]
line-length = 100
target-version = "py311"
[tool.ruff.lint]
select = ["E", "F", "I", "B", "UP", "N", "PL", "RUF"]
ignore = ["E501", "PLR0913", "PLR2004"]
[tool.pytest.ini_options]
testpaths = ["tests"]
asyncio_mode = "auto"