LICENSE
README.md
pyproject.toml
src/docpull/__init__.py
src/docpull/__main__.py
src/docpull/cli.py
src/docpull/doctor.py
src/docpull/logging_config.py
src/docpull/metadata_extractor.py
src/docpull/py.typed
src/docpull.egg-info/PKG-INFO
src/docpull.egg-info/SOURCES.txt
src/docpull.egg-info/dependency_links.txt
src/docpull.egg-info/entry_points.txt
src/docpull.egg-info/requires.txt
src/docpull.egg-info/top_level.txt
src/docpull/cache/__init__.py
src/docpull/cache/manager.py
src/docpull/cache/streaming_dedup.py
src/docpull/concurrency/__init__.py
src/docpull/concurrency/manager.py
src/docpull/conversion/__init__.py
src/docpull/conversion/chunking.py
src/docpull/conversion/extractor.py
src/docpull/conversion/markdown.py
src/docpull/conversion/protocols.py
src/docpull/conversion/special_cases.py
src/docpull/conversion/trafilatura_extractor.py
src/docpull/core/__init__.py
src/docpull/core/fetcher.py
src/docpull/discovery/__init__.py
src/docpull/discovery/composite.py
src/docpull/discovery/crawler.py
src/docpull/discovery/filters.py
src/docpull/discovery/protocols.py
src/docpull/discovery/sitemap.py
src/docpull/discovery/link_extractors/__init__.py
src/docpull/discovery/link_extractors/enhanced.py
src/docpull/discovery/link_extractors/protocols.py
src/docpull/discovery/link_extractors/static.py
src/docpull/http/__init__.py
src/docpull/http/client.py
src/docpull/http/protocols.py
src/docpull/http/rate_limiter.py
src/docpull/mcp/__init__.py
src/docpull/mcp/server.py
src/docpull/mcp/sources.py
src/docpull/mcp/tools.py
src/docpull/models/__init__.py
src/docpull/models/config.py
src/docpull/models/events.py
src/docpull/models/profiles.py
src/docpull/pipeline/__init__.py
src/docpull/pipeline/base.py
src/docpull/pipeline/steps/__init__.py
src/docpull/pipeline/steps/chunk.py
src/docpull/pipeline/steps/convert.py
src/docpull/pipeline/steps/dedup.py
src/docpull/pipeline/steps/fetch.py
src/docpull/pipeline/steps/metadata.py
src/docpull/pipeline/steps/save.py
src/docpull/pipeline/steps/save_json.py
src/docpull/pipeline/steps/save_ndjson.py
src/docpull/pipeline/steps/save_sqlite.py
src/docpull/pipeline/steps/validate.py
src/docpull/security/__init__.py
src/docpull/security/robots.py
src/docpull/security/url_validator.py
tests/test_cache_conditional_get.py
tests/test_chunking.py
tests/test_cli.py
tests/test_convert_step_new.py
tests/test_fixes_v2_3_0.py
tests/test_link_extractors.py
tests/test_mcp_tools.py
tests/test_naming.py
tests/test_save_ndjson.py
tests/test_security_hardening.py
tests/test_special_cases.py
tests/test_v2_conversion.py
tests/test_v2_discovery.py
tests/test_v2_integration.py
tests/test_v2_pipeline.py