Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line endings), AGENTS.md (entry points, stack, discovery order, baseline checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion, failures, rollback, scaling notes), .env.prod.example with rotated credential placeholders, and dev-only warnings on .env.example. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
25 lines
664 B
Python
25 lines
664 B
Python
"""Language detection helper - tolerant to short / mixed text."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from langdetect import DetectorFactory, LangDetectException, detect_langs
|
|
|
|
DetectorFactory.seed = 42
|
|
|
|
|
|
def detect_language(text: str, min_chars: int = 40) -> str | None:
|
|
"""Return ISO 639-1 language code or ``None`` if undetectable."""
|
|
if not text or len(text.strip()) < min_chars:
|
|
return None
|
|
try:
|
|
ranked = detect_langs(text)
|
|
except LangDetectException:
|
|
return None
|
|
if not ranked:
|
|
return None
|
|
return ranked[0].lang
|
|
|
|
|
|
def has_cyrillic(text: str) -> bool:
|
|
return any("Ѐ" <= ch <= "ӿ" for ch in text)
|