chardet
filetype
python-magic
lxml
nltk
tabulate
requests
beautifulsoup4
emoji
dataclasses-json
python-iso639
langdetect
numpy

[airtable]
pyairtable

[all-docs]
unstructured.pytesseract>=0.3.12
python-docx
pandas
pdf2image
ebooklib
msg_parser
openpyxl
markdown
pypandoc
pdfminer.six
xlrd
unstructured-inference==0.6.6
python-pptx<=0.6.21

[azure]
adlfs
fsspec==2023.9.1

[azure-cognitive-search]
azure-search-documents

[biomed]
bs4

[box]
boxfs
fsspec==2023.9.1

[confluence]
atlassian-python-api

[csv]
pandas

[delta-table]
deltalake
fsspec==2023.9.1

[discord]
discord-py

[doc]
python-docx

[docx]
python-docx

[dropbox]
dropboxdrivefs
fsspec==2023.9.1

[elasticsearch]
elasticsearch
jq

[epub]
ebooklib

[gcs]
gcsfs
fsspec==2023.9.1
bs4

[github]
pygithub>1.58.0

[gitlab]
python-gitlab

[google-drive]
google-api-python-client

[huggingface]
langdetect
sacremoses
sentencepiece
torch
transformers

[image]
pdf2image
pdfminer.six
unstructured-inference==0.6.6
unstructured.pytesseract>=0.3.12

[jira]
atlassian-python-api

[local-inference]
unstructured.pytesseract>=0.3.12
python-docx
pandas
pdf2image
ebooklib
msg_parser
openpyxl
markdown
pypandoc
pdfminer.six
xlrd
unstructured-inference==0.6.6
python-pptx<=0.6.21

[md]
markdown

[msg]
msg_parser

[notion]
notion-client
htmlBuilder

[odt]
python-docx
pypandoc

[onedrive]
msal
Office365-REST-Python-Client<2.4.3
bs4

[openai]
langchain
tiktoken
openai

[org]
pypandoc

[outlook]
msal
Office365-REST-Python-Client<2.4.3

[paddleocr]
unstructured.paddleocr==2.6.1.3

[pdf]
pdf2image
pdfminer.six
unstructured-inference==0.6.6
unstructured.pytesseract>=0.3.12

[ppt]
python-pptx<=0.6.21

[pptx]
python-pptx<=0.6.21

[reddit]
praw

[rst]
pypandoc

[rtf]
pypandoc

[s3]
s3fs
fsspec==2023.9.1

[salesforce]
simple-salesforce

[sharepoint]
msal
Office365-REST-Python-Client<2.4.3

[slack]
slack_sdk

[tsv]
pandas

[wikipedia]
wikipedia

[xlsx]
openpyxl
pandas
xlrd
