datasets<=3.2
numpy<2.0.0
scipy
torch
torchvision
torchaudio
tqdm
transformers<=4.51.3
aisuite
math_verify
word2number
accelerate
rapidfuzz
colorlog
appdirs
datasketch
modelscope
addict
pytest
rich
docstring_parser
pydantic
nltk
colorama
# text2sql
func_timeout
sqlglot
# general text
fasttext-wheel
kenlm
langkit
openai
sentencepiece
datasketch

# knowledge base cleaning
chonkie
trafilatura
lxml_html_clean