.env.sample
.gitignore
.pre-commit-config.yaml
LICENSE
README.md
pyproject.toml
pyrightconfig.json
requirements.dev.txt
requirements.test.txt
requirements.txt
.github/workflows/release.yml
.github/workflows/test_and_types.yml
.vscode/launch.json
.vscode/settings.json
CmonCrawl.egg-info/PKG-INFO
CmonCrawl.egg-info/SOURCES.txt
CmonCrawl.egg-info/dependency_links.txt
CmonCrawl.egg-info/entry_points.txt
CmonCrawl.egg-info/requires.txt
CmonCrawl.egg-info/top_level.txt
cmoncrawl/__init__.py
cmoncrawl/config.py
cmoncrawl/aggregator/__init__.py
cmoncrawl/aggregator/athena_query.py
cmoncrawl/aggregator/index_query.py
cmoncrawl/aggregator/.vscode/settings.json
cmoncrawl/aggregator/utils/__init__.py
cmoncrawl/aggregator/utils/athena_query_maker.py
cmoncrawl/aggregator/utils/helpers.py
cmoncrawl/aggregator/utils/ndjson.py
cmoncrawl/common/__init__.py
cmoncrawl/common/loggers.py
cmoncrawl/common/types.py
cmoncrawl/integrations/commands.py
cmoncrawl/integrations/download.py
cmoncrawl/integrations/extract.py
cmoncrawl/integrations/utils.py
cmoncrawl/middleware/stompware.py
cmoncrawl/middleware/synchronized.py
cmoncrawl/processor/__init__.py
cmoncrawl/processor/connectors/api.py
cmoncrawl/processor/connectors/base.py
cmoncrawl/processor/connectors/s3.py
cmoncrawl/processor/extraction/__init__.py
cmoncrawl/processor/extraction/filters.py
cmoncrawl/processor/extraction/utils.py
cmoncrawl/processor/pipeline/__init__.py
cmoncrawl/processor/pipeline/downloader.py
cmoncrawl/processor/pipeline/extractor.py
cmoncrawl/processor/pipeline/pipeline.py
cmoncrawl/processor/pipeline/router.py
cmoncrawl/processor/pipeline/streamer.py
docs/.nojekyll
docs/Makefile
docs/index.html
docs/make.bat
docs/build/doctrees/api.doctree
docs/build/doctrees/environment.pickle
docs/build/doctrees/index.doctree
docs/build/doctrees/usage.doctree
docs/build/doctrees/cli/cli.doctree
docs/build/doctrees/cli/download.doctree
docs/build/doctrees/cli/extract.doctree
docs/build/doctrees/cli/index.doctree
docs/build/doctrees/extraction/config_file.doctree
docs/build/doctrees/extraction/creating_extractor.doctree
docs/build/doctrees/extraction/index.doctree
docs/build/doctrees/extraction/utils.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.index_query.IndexAggregator.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.index_query.IndexAggregator.aclose.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.index_query.IndexAggregator.aopen.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.index_query.IndexAggregator.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.index_query.IndexAggregator.get_all_CC_indexes.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.index_query.IndexAggregator.get_captured_responses.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.index_query.IndexAggregator.get_number_of_pages.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.index_query.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.utils.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.utils.helpers.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.decode.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.raw_decode.doctree
docs/build/doctrees/generated/cmoncrawl.aggregator.utils.ndjson_decoder.doctree
docs/build/doctrees/generated/cmoncrawl.common.doctree
docs/build/doctrees/generated/cmoncrawl.common.loggers.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.DomainCrawl.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.DomainCrawl.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.DomainRecord.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.DomainRecord.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.DomainRecord.from_dict.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.DomainRecord.from_json.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.DomainRecord.schema.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.DomainRecord.to_dict.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.DomainRecord.to_json.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractConfig.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractConfig.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractConfig.from_dict.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractConfig.from_json.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractConfig.schema.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractConfig.to_dict.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractConfig.to_json.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractorConfig.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractorConfig.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractorConfig.from_dict.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractorConfig.from_json.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractorConfig.schema.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractorConfig.to_dict.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.ExtractorConfig.to_json.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.PipeMetadata.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.PipeMetadata.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.RetrieveResponse.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.RetrieveResponse.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.RoutesConfig.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.RoutesConfig.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.RoutesConfig.from_dict.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.RoutesConfig.from_json.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.RoutesConfig.schema.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.RoutesConfig.to_dict.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.RoutesConfig.to_json.doctree
docs/build/doctrees/generated/cmoncrawl.common.types.doctree
docs/build/doctrees/generated/cmoncrawl.doctree
docs/build/doctrees/generated/cmoncrawl.processor.doctree
docs/build/doctrees/generated/cmoncrawl.processor.extraction.doctree
docs/build/doctrees/generated/cmoncrawl.processor.extraction.filters.doctree
docs/build/doctrees/generated/cmoncrawl.processor.extraction.utils.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.aclose.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.aopen.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.download.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.unwrap.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.download.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.extract_url.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.extract_year.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.mine_metadata.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.IDownloader.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.IDownloader.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.IDownloader.download.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.downloader.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.extract.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.extract_soup.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.filter_raw.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.filter_soup.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.preprocess.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.extract.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.extract_soup.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.filter_raw.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.filter_soup.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.preprocess.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.extract.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.extract_soup.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.filter_raw.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.filter_soup.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.preprocess.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.IExtractor.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.IExtractor.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.IExtractor.extract.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.extractor.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.pipeline.ProcessorPipeline.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.pipeline.ProcessorPipeline.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.pipeline.ProcessorPipeline.process_domain_record.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.pipeline.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.IRouter.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.IRouter.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.IRouter.route.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.Route.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.Route.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.Router.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.Router.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.Router.load_extractor.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.Router.load_module.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.Router.load_module_as_extractor.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.Router.load_modules.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.Router.register_route.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.Router.register_routes.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.Router.route.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.router.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.clean_up.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.get_file_name.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.metadata_to_string.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.stream.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.clean_up.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.stream.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.clean_up.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.stream.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.clean_up.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.get_file_name.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.metadata_to_string.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.stream.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.__init__.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.clean_up.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.get_file_name.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.metadata_to_string.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.stream.doctree
docs/build/doctrees/generated/cmoncrawl.processor.pipeline.streamer.doctree
docs/build/doctrees/misc/domain_record.doctree
docs/build/doctrees/misc/index.doctree
docs/build/doctrees/prog_guide/index.doctree
docs/build/doctrees/prog_guide/overview.doctree
docs/build/doctrees/prog_guide/pip.doctree
docs/build/html/.buildinfo
docs/build/html/api.html
docs/build/html/genindex.html
docs/build/html/index.html
docs/build/html/objects.inv
docs/build/html/py-modindex.html
docs/build/html/search.html
docs/build/html/searchindex.js
docs/build/html/usage.html
docs/build/html/_sources/api.rst.txt
docs/build/html/_sources/index.rst.txt
docs/build/html/_sources/usage.rst.txt
docs/build/html/_sources/cli/cli.rst.txt
docs/build/html/_sources/cli/download.rst.txt
docs/build/html/_sources/cli/extract.rst.txt
docs/build/html/_sources/cli/index.rst.txt
docs/build/html/_sources/extraction/config_file.rst.txt
docs/build/html/_sources/extraction/creating_extractor.rst.txt
docs/build/html/_sources/extraction/index.rst.txt
docs/build/html/_sources/extraction/utils.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.index_query.IndexAggregator.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.index_query.IndexAggregator.aclose.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.index_query.IndexAggregator.aopen.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.index_query.IndexAggregator.get_all_CC_indexes.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.index_query.IndexAggregator.get_captured_responses.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.index_query.IndexAggregator.get_number_of_pages.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.index_query.IndexAggregator.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.index_query.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.utils.helpers.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.decode.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.raw_decode.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.utils.ndjson_decoder.rst.txt
docs/build/html/_sources/generated/cmoncrawl.aggregator.utils.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.loggers.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.DomainCrawl.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.DomainCrawl.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.DomainRecord.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.DomainRecord.from_dict.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.DomainRecord.from_json.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.DomainRecord.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.DomainRecord.schema.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.DomainRecord.to_dict.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.DomainRecord.to_json.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractConfig.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractConfig.from_dict.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractConfig.from_json.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractConfig.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractConfig.schema.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractConfig.to_dict.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractConfig.to_json.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractorConfig.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractorConfig.from_dict.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractorConfig.from_json.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractorConfig.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractorConfig.schema.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractorConfig.to_dict.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.ExtractorConfig.to_json.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.PipeMetadata.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.PipeMetadata.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.RetrieveResponse.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.RetrieveResponse.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.RoutesConfig.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.RoutesConfig.from_dict.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.RoutesConfig.from_json.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.RoutesConfig.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.RoutesConfig.schema.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.RoutesConfig.to_dict.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.RoutesConfig.to_json.rst.txt
docs/build/html/_sources/generated/cmoncrawl.common.types.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.extraction.filters.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.extraction.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.extraction.utils.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.aclose.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.aopen.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.download.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.unwrap.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.download.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.extract_url.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.extract_year.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.mine_metadata.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.IDownloader.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.IDownloader.download.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.IDownloader.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.downloader.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.extract.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.extract_soup.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.filter_raw.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.filter_soup.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.preprocess.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.extract.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.extract_soup.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.filter_raw.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.filter_soup.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.preprocess.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.extract.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.extract_soup.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.filter_raw.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.filter_soup.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.preprocess.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.IExtractor.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.IExtractor.extract.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.IExtractor.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.extractor.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.pipeline.ProcessorPipeline.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.pipeline.ProcessorPipeline.process_domain_record.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.pipeline.ProcessorPipeline.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.pipeline.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.IRouter.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.IRouter.route.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.IRouter.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.Route.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.Route.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.Router.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.Router.load_extractor.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.Router.load_module.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.Router.load_module_as_extractor.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.Router.load_modules.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.Router.register_route.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.Router.register_routes.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.Router.route.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.Router.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.router.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.clean_up.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.get_file_name.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.metadata_to_string.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.stream.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.clean_up.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.stream.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.clean_up.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.stream.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.clean_up.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.get_file_name.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.metadata_to_string.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.stream.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.__init__.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.clean_up.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.get_file_name.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.metadata_to_string.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.stream.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.pipeline.streamer.rst.txt
docs/build/html/_sources/generated/cmoncrawl.processor.rst.txt
docs/build/html/_sources/generated/cmoncrawl.rst.txt
docs/build/html/_sources/misc/domain_record.rst.txt
docs/build/html/_sources/misc/index.rst.txt
docs/build/html/_sources/prog_guide/index.rst.txt
docs/build/html/_sources/prog_guide/overview.rst.txt
docs/build/html/_sources/prog_guide/pip.rst.txt
docs/build/html/_static/_sphinx_javascript_frameworks_compat.js
docs/build/html/_static/basic.css
docs/build/html/_static/check-solid.svg
docs/build/html/_static/clipboard.min.js
docs/build/html/_static/copy-button.svg
docs/build/html/_static/copybutton.css
docs/build/html/_static/copybutton.js
docs/build/html/_static/copybutton_funcs.js
docs/build/html/_static/doctools.js
docs/build/html/_static/documentation_options.js
docs/build/html/_static/file.png
docs/build/html/_static/jquery-3.6.0.js
docs/build/html/_static/jquery.js
docs/build/html/_static/language_data.js
docs/build/html/_static/minus.png
docs/build/html/_static/plus.png
docs/build/html/_static/pygments.css
docs/build/html/_static/sbt-webpack-macros.html
docs/build/html/_static/searchtools.js
docs/build/html/_static/sphinx_highlight.js
docs/build/html/_static/underscore-1.13.1.js
docs/build/html/_static/underscore.js
docs/build/html/_static/webpack-macros.html
docs/build/html/_static/images/logo_binder.svg
docs/build/html/_static/images/logo_colab.png
docs/build/html/_static/images/logo_deepnote.svg
docs/build/html/_static/images/logo_jupyterhub.svg
docs/build/html/_static/locales/ar/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/bg/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/bn/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/ca/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/cs/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/da/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/de/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/el/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/eo/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/es/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/et/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/fi/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/fr/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/hr/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/id/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/it/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/iw/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/ja/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/ko/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/lt/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/lv/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/ml/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/mr/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/ms/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/nl/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/no/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/pl/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/pt/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/ro/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/ru/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/sk/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/sl/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/sr/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/sv/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/ta/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/te/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/tg/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/th/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/tl/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/tr/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/uk/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/ur/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/vi/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/zh_CN/LC_MESSAGES/booktheme.po
docs/build/html/_static/locales/zh_TW/LC_MESSAGES/booktheme.po
docs/build/html/_static/scripts/pydata-sphinx-theme.js
docs/build/html/_static/scripts/sphinx-book-theme.js
docs/build/html/_static/scripts/sphinx-book-theme.js.map
docs/build/html/_static/styles/pydata-sphinx-theme.css
docs/build/html/_static/styles/sphinx-book-theme.css
docs/build/html/_static/styles/theme.css
docs/build/html/_static/vendor/fontawesome/5.13.0/LICENSE.txt
docs/build/html/_static/vendor/fontawesome/5.13.0/css/all.min.css
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.eot
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.svg
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.ttf
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.eot
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.svg
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.ttf
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff2
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.eot
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.svg
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.ttf
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff
docs/build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2
docs/build/html/cli/cli.html
docs/build/html/cli/download.html
docs/build/html/cli/extract.html
docs/build/html/cli/index.html
docs/build/html/extraction/config_file.html
docs/build/html/extraction/creating_extractor.html
docs/build/html/extraction/index.html
docs/build/html/extraction/utils.html
docs/build/html/generated/cmoncrawl.aggregator.html
docs/build/html/generated/cmoncrawl.aggregator.index_query.IndexAggregator.__init__.html
docs/build/html/generated/cmoncrawl.aggregator.index_query.IndexAggregator.aclose.html
docs/build/html/generated/cmoncrawl.aggregator.index_query.IndexAggregator.aopen.html
docs/build/html/generated/cmoncrawl.aggregator.index_query.IndexAggregator.get_all_CC_indexes.html
docs/build/html/generated/cmoncrawl.aggregator.index_query.IndexAggregator.get_captured_responses.html
docs/build/html/generated/cmoncrawl.aggregator.index_query.IndexAggregator.get_number_of_pages.html
docs/build/html/generated/cmoncrawl.aggregator.index_query.IndexAggregator.html
docs/build/html/generated/cmoncrawl.aggregator.index_query.html
docs/build/html/generated/cmoncrawl.aggregator.utils.helpers.html
docs/build/html/generated/cmoncrawl.aggregator.utils.html
docs/build/html/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.__init__.html
docs/build/html/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.decode.html
docs/build/html/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.html
docs/build/html/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.raw_decode.html
docs/build/html/generated/cmoncrawl.aggregator.utils.ndjson_decoder.html
docs/build/html/generated/cmoncrawl.common.html
docs/build/html/generated/cmoncrawl.common.loggers.html
docs/build/html/generated/cmoncrawl.common.types.DomainCrawl.__init__.html
docs/build/html/generated/cmoncrawl.common.types.DomainCrawl.html
docs/build/html/generated/cmoncrawl.common.types.DomainRecord.__init__.html
docs/build/html/generated/cmoncrawl.common.types.DomainRecord.from_dict.html
docs/build/html/generated/cmoncrawl.common.types.DomainRecord.from_json.html
docs/build/html/generated/cmoncrawl.common.types.DomainRecord.html
docs/build/html/generated/cmoncrawl.common.types.DomainRecord.schema.html
docs/build/html/generated/cmoncrawl.common.types.DomainRecord.to_dict.html
docs/build/html/generated/cmoncrawl.common.types.DomainRecord.to_json.html
docs/build/html/generated/cmoncrawl.common.types.ExtractConfig.__init__.html
docs/build/html/generated/cmoncrawl.common.types.ExtractConfig.from_dict.html
docs/build/html/generated/cmoncrawl.common.types.ExtractConfig.from_json.html
docs/build/html/generated/cmoncrawl.common.types.ExtractConfig.html
docs/build/html/generated/cmoncrawl.common.types.ExtractConfig.schema.html
docs/build/html/generated/cmoncrawl.common.types.ExtractConfig.to_dict.html
docs/build/html/generated/cmoncrawl.common.types.ExtractConfig.to_json.html
docs/build/html/generated/cmoncrawl.common.types.ExtractorConfig.__init__.html
docs/build/html/generated/cmoncrawl.common.types.ExtractorConfig.from_dict.html
docs/build/html/generated/cmoncrawl.common.types.ExtractorConfig.from_json.html
docs/build/html/generated/cmoncrawl.common.types.ExtractorConfig.html
docs/build/html/generated/cmoncrawl.common.types.ExtractorConfig.schema.html
docs/build/html/generated/cmoncrawl.common.types.ExtractorConfig.to_dict.html
docs/build/html/generated/cmoncrawl.common.types.ExtractorConfig.to_json.html
docs/build/html/generated/cmoncrawl.common.types.PipeMetadata.__init__.html
docs/build/html/generated/cmoncrawl.common.types.PipeMetadata.html
docs/build/html/generated/cmoncrawl.common.types.RetrieveResponse.__init__.html
docs/build/html/generated/cmoncrawl.common.types.RetrieveResponse.html
docs/build/html/generated/cmoncrawl.common.types.RoutesConfig.__init__.html
docs/build/html/generated/cmoncrawl.common.types.RoutesConfig.from_dict.html
docs/build/html/generated/cmoncrawl.common.types.RoutesConfig.from_json.html
docs/build/html/generated/cmoncrawl.common.types.RoutesConfig.html
docs/build/html/generated/cmoncrawl.common.types.RoutesConfig.schema.html
docs/build/html/generated/cmoncrawl.common.types.RoutesConfig.to_dict.html
docs/build/html/generated/cmoncrawl.common.types.RoutesConfig.to_json.html
docs/build/html/generated/cmoncrawl.common.types.html
docs/build/html/generated/cmoncrawl.html
docs/build/html/generated/cmoncrawl.processor.extraction.filters.html
docs/build/html/generated/cmoncrawl.processor.extraction.html
docs/build/html/generated/cmoncrawl.processor.extraction.utils.html
docs/build/html/generated/cmoncrawl.processor.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.aclose.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.aopen.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.download.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.unwrap.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.download.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.extract_url.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.extract_year.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.mine_metadata.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.IDownloader.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.IDownloader.download.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.IDownloader.html
docs/build/html/generated/cmoncrawl.processor.pipeline.downloader.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.extract.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.extract_soup.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.filter_raw.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.filter_soup.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.preprocess.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.extract.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.extract_soup.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.filter_raw.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.filter_soup.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.preprocess.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.extract.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.extract_soup.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.filter_raw.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.filter_soup.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.preprocess.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.IExtractor.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.IExtractor.extract.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.IExtractor.html
docs/build/html/generated/cmoncrawl.processor.pipeline.extractor.html
docs/build/html/generated/cmoncrawl.processor.pipeline.html
docs/build/html/generated/cmoncrawl.processor.pipeline.pipeline.ProcessorPipeline.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.pipeline.ProcessorPipeline.html
docs/build/html/generated/cmoncrawl.processor.pipeline.pipeline.ProcessorPipeline.process_domain_record.html
docs/build/html/generated/cmoncrawl.processor.pipeline.pipeline.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.IRouter.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.IRouter.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.IRouter.route.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.Route.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.Route.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.Router.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.Router.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.Router.load_extractor.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.Router.load_module.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.Router.load_module_as_extractor.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.Router.load_modules.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.Router.register_route.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.Router.register_routes.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.Router.route.html
docs/build/html/generated/cmoncrawl.processor.pipeline.router.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.clean_up.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.get_file_name.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.metadata_to_string.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.stream.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.clean_up.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.stream.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.clean_up.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.stream.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.clean_up.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.get_file_name.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.metadata_to_string.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.stream.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.__init__.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.clean_up.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.get_file_name.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.metadata_to_string.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.stream.html
docs/build/html/generated/cmoncrawl.processor.pipeline.streamer.html
docs/build/html/misc/domain_record.html
docs/build/html/misc/index.html
docs/build/html/prog_guide/index.html
docs/build/html/prog_guide/overview.html
docs/build/html/prog_guide/pip.html
docs/source/api.rst
docs/source/conf.py
docs/source/index.rst
docs/source/usage.rst
docs/source/cli/cli.rst
docs/source/cli/download.rst
docs/source/cli/extract.rst
docs/source/cli/index.rst
docs/source/extraction/config_file.rst
docs/source/extraction/creating_extractor.rst
docs/source/extraction/index.rst
docs/source/extraction/utils.rst
docs/source/generated/cmoncrawl.aggregator.index_query.IndexAggregator.__init__.rst
docs/source/generated/cmoncrawl.aggregator.index_query.IndexAggregator.aclose.rst
docs/source/generated/cmoncrawl.aggregator.index_query.IndexAggregator.aopen.rst
docs/source/generated/cmoncrawl.aggregator.index_query.IndexAggregator.get_all_CC_indexes.rst
docs/source/generated/cmoncrawl.aggregator.index_query.IndexAggregator.get_captured_responses.rst
docs/source/generated/cmoncrawl.aggregator.index_query.IndexAggregator.get_number_of_pages.rst
docs/source/generated/cmoncrawl.aggregator.index_query.IndexAggregator.rst
docs/source/generated/cmoncrawl.aggregator.index_query.rst
docs/source/generated/cmoncrawl.aggregator.rst
docs/source/generated/cmoncrawl.aggregator.utils.helpers.rst
docs/source/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.__init__.rst
docs/source/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.decode.rst
docs/source/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.raw_decode.rst
docs/source/generated/cmoncrawl.aggregator.utils.ndjson_decoder.Decoder.rst
docs/source/generated/cmoncrawl.aggregator.utils.ndjson_decoder.rst
docs/source/generated/cmoncrawl.aggregator.utils.rst
docs/source/generated/cmoncrawl.common.loggers.rst
docs/source/generated/cmoncrawl.common.rst
docs/source/generated/cmoncrawl.common.types.DomainCrawl.__init__.rst
docs/source/generated/cmoncrawl.common.types.DomainCrawl.rst
docs/source/generated/cmoncrawl.common.types.DomainRecord.__init__.rst
docs/source/generated/cmoncrawl.common.types.DomainRecord.from_dict.rst
docs/source/generated/cmoncrawl.common.types.DomainRecord.from_json.rst
docs/source/generated/cmoncrawl.common.types.DomainRecord.rst
docs/source/generated/cmoncrawl.common.types.DomainRecord.schema.rst
docs/source/generated/cmoncrawl.common.types.DomainRecord.to_dict.rst
docs/source/generated/cmoncrawl.common.types.DomainRecord.to_json.rst
docs/source/generated/cmoncrawl.common.types.ExtractConfig.__init__.rst
docs/source/generated/cmoncrawl.common.types.ExtractConfig.from_dict.rst
docs/source/generated/cmoncrawl.common.types.ExtractConfig.from_json.rst
docs/source/generated/cmoncrawl.common.types.ExtractConfig.rst
docs/source/generated/cmoncrawl.common.types.ExtractConfig.schema.rst
docs/source/generated/cmoncrawl.common.types.ExtractConfig.to_dict.rst
docs/source/generated/cmoncrawl.common.types.ExtractConfig.to_json.rst
docs/source/generated/cmoncrawl.common.types.ExtractorConfig.__init__.rst
docs/source/generated/cmoncrawl.common.types.ExtractorConfig.from_dict.rst
docs/source/generated/cmoncrawl.common.types.ExtractorConfig.from_json.rst
docs/source/generated/cmoncrawl.common.types.ExtractorConfig.rst
docs/source/generated/cmoncrawl.common.types.ExtractorConfig.schema.rst
docs/source/generated/cmoncrawl.common.types.ExtractorConfig.to_dict.rst
docs/source/generated/cmoncrawl.common.types.ExtractorConfig.to_json.rst
docs/source/generated/cmoncrawl.common.types.PipeMetadata.__init__.rst
docs/source/generated/cmoncrawl.common.types.PipeMetadata.rst
docs/source/generated/cmoncrawl.common.types.RetrieveResponse.__init__.rst
docs/source/generated/cmoncrawl.common.types.RetrieveResponse.rst
docs/source/generated/cmoncrawl.common.types.RoutesConfig.__init__.rst
docs/source/generated/cmoncrawl.common.types.RoutesConfig.from_dict.rst
docs/source/generated/cmoncrawl.common.types.RoutesConfig.from_json.rst
docs/source/generated/cmoncrawl.common.types.RoutesConfig.rst
docs/source/generated/cmoncrawl.common.types.RoutesConfig.schema.rst
docs/source/generated/cmoncrawl.common.types.RoutesConfig.to_dict.rst
docs/source/generated/cmoncrawl.common.types.RoutesConfig.to_json.rst
docs/source/generated/cmoncrawl.common.types.rst
docs/source/generated/cmoncrawl.processor.extraction.filters.rst
docs/source/generated/cmoncrawl.processor.extraction.rst
docs/source/generated/cmoncrawl.processor.extraction.utils.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.aclose.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.aopen.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.download.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.AsyncDownloader.unwrap.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.download.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.extract_url.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.extract_year.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.mine_metadata.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.DownloaderDummy.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.IDownloader.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.IDownloader.download.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.IDownloader.rst
docs/source/generated/cmoncrawl.processor.pipeline.downloader.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.extract.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.extract_soup.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.filter_raw.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.filter_soup.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.preprocess.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.BaseExtractor.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.extract.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.extract_soup.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.filter_raw.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.filter_soup.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.preprocess.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.DomainRecordExtractor.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.extract.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.extract_soup.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.filter_raw.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.filter_soup.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.preprocess.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.HTMLExtractor.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.IExtractor.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.IExtractor.extract.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.IExtractor.rst
docs/source/generated/cmoncrawl.processor.pipeline.extractor.rst
docs/source/generated/cmoncrawl.processor.pipeline.pipeline.ProcessorPipeline.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.pipeline.ProcessorPipeline.process_domain_record.rst
docs/source/generated/cmoncrawl.processor.pipeline.pipeline.ProcessorPipeline.rst
docs/source/generated/cmoncrawl.processor.pipeline.pipeline.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.IRouter.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.IRouter.route.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.IRouter.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.Route.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.Route.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.Router.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.Router.load_extractor.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.Router.load_module.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.Router.load_module_as_extractor.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.Router.load_modules.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.Router.register_route.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.Router.register_routes.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.Router.route.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.Router.rst
docs/source/generated/cmoncrawl.processor.pipeline.router.rst
docs/source/generated/cmoncrawl.processor.pipeline.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.clean_up.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.get_file_name.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.metadata_to_string.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.BaseStreamerFile.stream.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.clean_up.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.IStreamer.stream.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.clean_up.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerDummy.stream.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.clean_up.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.get_file_name.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.metadata_to_string.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileHTML.stream.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.__init__.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.clean_up.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.get_file_name.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.metadata_to_string.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.StreamerFileJSON.stream.rst
docs/source/generated/cmoncrawl.processor.pipeline.streamer.rst
docs/source/generated/cmoncrawl.processor.rst
docs/source/generated/cmoncrawl.rst
docs/source/misc/domain_record.rst
docs/source/misc/index.rst
docs/source/prog_guide/index.rst
docs/source/prog_guide/overview.rst
docs/source/prog_guide/pip.rst
examples/code-usage/offline-warc-iteration.py
examples/extractor_tutorial/config.json
examples/extractor_tutorial/Extractors/bbc_extractor.py
examples/extractor_tutorial/Extractors/idnes_extractor.py
extractors/my_extractor.py
tests/aggregator_tests.py
tests/athena_tests.py
tests/end_to_end_tests.py
tests/helpers_test.py
tests/processor_tests.py
tests/utils.py
tests/files/mini.warc.gz
tests/test_extract/cfg.json
tests/test_extract/extractors/test_extract.py
tests/test_extract/files/file.html
tests/test_extract/files/file.jsonl
tests/test_routes/a.py
tests/test_routes/b.py