[build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] name = "legacy-knowledge-indexer" version = "0.1.0" description = "LegacyHUB - production-grade ingestion and hybrid search over legacy PDF archives" requires-python = ">=3.11,<3.13" authors = [{ name = "TeamHUB" }] license = { text = "Apache-2.0" } readme = "README.md" dependencies = [ "fastapi>=0.115.0,<0.117", "uvicorn[standard]>=0.30.0,<0.33", "pydantic>=2.7.0,<3", "pydantic-settings>=2.4.0,<3", "python-multipart>=0.0.9", # DB "sqlalchemy>=2.0.30,<2.1", "psycopg[binary]>=3.2.0,<3.3", "alembic>=1.13.0,<1.14", # Object storage "minio>=7.2.7,<8", # Search/index "opensearch-py>=2.6.0,<3", "qdrant-client>=1.10.0,<1.13", # Workers "celery>=5.4.0,<6", "redis>=5.0.7,<6", # Ingestion - pin Docling tight since its DocumentConverter API # still moves between minor releases; lift the upper bound only # after a smoke test on a staging corpus. "ocrmypdf>=16.4.0,<17", "pikepdf>=9.0.0,<10", "pypdf>=4.3.0,<6", "pdfminer.six>=20240706", "docling>=2.0.0,<2.15", # ML - pin Flag/sentence-transformers/transformers within the # families that have been verified against the reranker contract # tests. Torch follows the family-major pin to keep CUDA wheels # discoverable. "FlagEmbedding>=1.3.0,<2", "sentence-transformers>=3.0.0,<4", "torch>=2.2.0,<3", "numpy>=1.26.0,<3", "transformers>=4.42.0,<5", # Misc "httpx>=0.27.0,<0.29", "tenacity>=8.5.0,<10", "structlog>=24.2.0,<26", "orjson>=3.10.0,<4", "python-magic>=0.4.27; platform_system != 'Windows'", "python-magic-bin>=0.4.14; platform_system == 'Windows'", "langdetect>=1.0.9,<2", "regex>=2024.5.15", "rich>=13.7.1,<14", "tqdm>=4.66.4,<5", "click>=8.1.7,<9", ] [project.optional-dependencies] dev = [ "pytest>=8.2.0", "pytest-asyncio>=0.23.7", "ruff>=0.5.0", "mypy>=1.10.0", "types-requests", ] [project.scripts] legacyhub-ingest = "scripts.ingest_folder:main" legacyhub-reindex = "scripts.reindex_document:main" legacyhub-smoke = "scripts.smoke_test:main" [tool.hatch.build.targets.wheel] packages = ["app", "scripts"] [tool.ruff] line-length = 100 target-version = "py311" [tool.ruff.lint] select = ["E", "F", "I", "B", "UP", "N", "PL", "RUF"] ignore = ["E501", "PLR0913", "PLR2004"] [tool.pytest.ini_options] testpaths = ["tests"] asyncio_mode = "auto"