
# data & resources
outputs/
assets/

# setup
data_juicer.egg-info/
py_data_juicer.egg-info/
build/
dist

# others
.DS_Store
.idea/
wandb/
__pycache__
.vscode/
.ipynb_checkpoints/

# label studio related
label_studio_data/
label_studio_venv/
label_studio_connection.json

**/__dj__produced_data__/*
venv/
.venv/

# dup files created by tests
tests/ops/data/*dup*
tests/tools/tmp_*/
tests/ops/deduplicator/chinese_dedup/
tests/ops/deduplicator/english_dedup/


# perf bench data
perf_bench_data/

# env file
.env

# cython outputs
/data_juicer/ops/deduplicator/minhash.cpython-*
/data_juicer/ops/deduplicator/tokenize.c
/data_juicer/ops/deduplicator/tokenize.cpython-*
