albumentations==1.4.11
bs4
cn2an==0.5.22
datrie==0.8.2
effdet==0.3
hanziconv==0.3.2
html_text==0.6.2
lxml==5.1.0
layoutparser
nougat-ocr
nltk
opencv-python==4.9.0.80
openpyxl==3.1.2
pdfplumber
pyclipper
PyPDF2
python-docx==1.1.0
python-pptx==0.6.23
ruamel.yaml==0.18.6
roman-numbers==1.0.2
shapely==2.0.3
StrEnum==0.4.15
tika
transformers
tokenizers==0.19.1
word2number==1.1
xgboost==2.0.3
langdetect
