Files
LegacyHUB/pyproject.toml
Vadim Malanov 463622c644 deps: tighten version ranges, pin Docling to <2.15
Docling's DocumentConverter shape (text_items, prov[0].page_no,
export_to_markdown signature) still moves between 2.x minor releases.
Cap docling to >=2.0.0,<2.15 so a wheel bump cannot silently break
the defensive walkers in app/ingestion/docling_extractor.py until a
staging smoke test has run against the new minor.

Every other runtime dep gets the same major/minor upper bound:
- web/api: fastapi <0.117, uvicorn <0.33, pydantic <3
- db: sqlalchemy <2.1, psycopg <3.3, alembic <1.14
- search: opensearch-py <3, qdrant-client <1.13
- ingest: ocrmypdf <17, pikepdf <10, pypdf <6
- ml: FlagEmbedding <2, sentence-transformers <4, transformers <5,
      torch <3, numpy <3
- ops/utils: structlog <26, orjson <4, httpx <0.29, click <9

Lift any specific upper bound only after the corresponding regression
test passes on a staging upgrade.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 17:12:15 +03:00

98 lines
2.5 KiB
TOML

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "legacy-knowledge-indexer"
version = "0.1.0"
description = "LegacyHUB - production-grade ingestion and hybrid search over legacy PDF archives"
requires-python = ">=3.11,<3.13"
authors = [{ name = "TeamHUB" }]
license = { text = "Apache-2.0" }
readme = "README.md"
dependencies = [
"fastapi>=0.115.0,<0.117",
"uvicorn[standard]>=0.30.0,<0.33",
"pydantic>=2.7.0,<3",
"pydantic-settings>=2.4.0,<3",
"python-multipart>=0.0.9",
# DB
"sqlalchemy>=2.0.30,<2.1",
"psycopg[binary]>=3.2.0,<3.3",
"alembic>=1.13.0,<1.14",
# Object storage
"minio>=7.2.7,<8",
# Search/index
"opensearch-py>=2.6.0,<3",
"qdrant-client>=1.10.0,<1.13",
# Workers
"celery>=5.4.0,<6",
"redis>=5.0.7,<6",
# Ingestion - pin Docling tight since its DocumentConverter API
# still moves between minor releases; lift the upper bound only
# after a smoke test on a staging corpus.
"ocrmypdf>=16.4.0,<17",
"pikepdf>=9.0.0,<10",
"pypdf>=4.3.0,<6",
"pdfminer.six>=20240706",
"docling>=2.0.0,<2.15",
# ML - pin Flag/sentence-transformers/transformers within the
# families that have been verified against the reranker contract
# tests. Torch follows the family-major pin to keep CUDA wheels
# discoverable.
"FlagEmbedding>=1.3.0,<2",
"sentence-transformers>=3.0.0,<4",
"torch>=2.2.0,<3",
"numpy>=1.26.0,<3",
"transformers>=4.42.0,<5",
# Misc
"httpx>=0.27.0,<0.29",
"tenacity>=8.5.0,<10",
"structlog>=24.2.0,<26",
"orjson>=3.10.0,<4",
"python-magic>=0.4.27; platform_system != 'Windows'",
"python-magic-bin>=0.4.14; platform_system == 'Windows'",
"langdetect>=1.0.9,<2",
"regex>=2024.5.15",
"rich>=13.7.1,<14",
"tqdm>=4.66.4,<5",
"click>=8.1.7,<9",
]
[project.optional-dependencies]
dev = [
"pytest>=8.2.0",
"pytest-asyncio>=0.23.7",
"ruff>=0.5.0",
"mypy>=1.10.0",
"types-requests",
]
[project.scripts]
legacyhub-ingest = "scripts.ingest_folder:main"
legacyhub-reindex = "scripts.reindex_document:main"
legacyhub-smoke = "scripts.smoke_test:main"
[tool.hatch.build.targets.wheel]
packages = ["app", "scripts"]
[tool.ruff]
line-length = 100
target-version = "py311"
[tool.ruff.lint]
select = ["E", "F", "I", "B", "UP", "N", "PL", "RUF"]
ignore = ["E501", "PLR0913", "PLR2004"]
[tool.pytest.ini_options]
testpaths = ["tests"]
asyncio_mode = "auto"