[build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] name = "legacy-knowledge-indexer" version = "0.1.0" description = "LegacyHUB - production-grade ingestion and hybrid search over legacy PDF archives" requires-python = ">=3.11,<3.13" authors = [{ name = "TeamHUB" }] license = { text = "Apache-2.0" } readme = "README.md" dependencies = [ "fastapi>=0.115.0", "uvicorn[standard]>=0.30.0", "pydantic>=2.7.0", "pydantic-settings>=2.4.0", "python-multipart>=0.0.9", # DB "sqlalchemy>=2.0.30", "psycopg[binary]>=3.2.0", "alembic>=1.13.0", # Object storage "minio>=7.2.7", # Search/index "opensearch-py>=2.6.0", "qdrant-client>=1.10.0", # Workers "celery>=5.4.0", "redis>=5.0.7", # Ingestion "ocrmypdf>=16.4.0", "pikepdf>=9.0.0", "pypdf>=4.3.0", "pdfminer.six>=20240706", "docling>=2.0.0", # ML "FlagEmbedding>=1.3.0", "sentence-transformers>=3.0.0", "torch>=2.2.0", "numpy>=1.26.0", "transformers>=4.42.0", # Misc "httpx>=0.27.0", "tenacity>=8.5.0", "structlog>=24.2.0", "orjson>=3.10.0", "python-magic>=0.4.27; platform_system != 'Windows'", "python-magic-bin>=0.4.14; platform_system == 'Windows'", "langdetect>=1.0.9", "regex>=2024.5.15", "rich>=13.7.1", "tqdm>=4.66.4", "click>=8.1.7", ] [project.optional-dependencies] dev = [ "pytest>=8.2.0", "pytest-asyncio>=0.23.7", "ruff>=0.5.0", "mypy>=1.10.0", "types-requests", ] [project.scripts] legacyhub-ingest = "scripts.ingest_folder:main" legacyhub-reindex = "scripts.reindex_document:main" legacyhub-smoke = "scripts.smoke_test:main" [tool.hatch.build.targets.wheel] packages = ["app", "scripts"] [tool.ruff] line-length = 100 target-version = "py311" [tool.ruff.lint] select = ["E", "F", "I", "B", "UP", "N", "PL", "RUF"] ignore = ["E501", "PLR0913", "PLR2004"] [tool.pytest.ini_options] testpaths = ["tests"] asyncio_mode = "auto"