LICENSE
README.md
pyproject.toml
tests/test_package_metadata.py
themis/__init__.py
themis/_version.py
themis/api.py
themis/py.typed
themis/backends/__init__.py
themis/backends/execution.py
themis/backends/storage.py
themis/cli/__init__.py
themis/cli/__main__.py
themis/cli/main.py
themis/cli/new_project.py
themis/cli/utils.py
themis/cli/commands/__init__.py
themis/cli/commands/benchmarks.py
themis/cli/commands/comparison.py
themis/cli/commands/config_commands.py
themis/cli/commands/cost.py
themis/cli/commands/demo.py
themis/cli/commands/info.py
themis/cli/commands/leaderboard.py
themis/cli/commands/math_benchmarks.py
themis/cli/commands/mcq_benchmarks.py
themis/cli/commands/results.py
themis/cli/commands/sample_run.py
themis/cli/commands/visualize.py
themis/comparison/__init__.py
themis/comparison/engine.py
themis/comparison/reports.py
themis/comparison/statistics.py
themis/config/__init__.py
themis/config/loader.py
themis/config/registry.py
themis/config/runtime.py
themis/config/schema.py
themis/core/__init__.py
themis/core/conversation.py
themis/core/entities.py
themis/core/serialization.py
themis/core/tools.py
themis/core/types.py
themis/datasets/__init__.py
themis/datasets/base.py
themis/datasets/commonsense_qa.py
themis/datasets/competition_math.py
themis/datasets/coqa.py
themis/datasets/gpqa.py
themis/datasets/gsm8k.py
themis/datasets/gsm_symbolic.py
themis/datasets/math500.py
themis/datasets/med_qa.py
themis/datasets/medmcqa.py
themis/datasets/mmlu_pro.py
themis/datasets/piqa.py
themis/datasets/registry.py
themis/datasets/schema.py
themis/datasets/sciq.py
themis/datasets/social_i_qa.py
themis/datasets/super_gpqa.py
themis/evaluation/__init__.py
themis/evaluation/conditional.py
themis/evaluation/math_verify_utils.py
themis/evaluation/pipeline.py
themis/evaluation/reports.py
themis/evaluation/extractors/__init__.py
themis/evaluation/extractors/error_taxonomy_extractor.py
themis/evaluation/extractors/exceptions.py
themis/evaluation/extractors/identity_extractor.py
themis/evaluation/extractors/json_field_extractor.py
themis/evaluation/extractors/math_verify_extractor.py
themis/evaluation/extractors/regex_extractor.py
themis/evaluation/metrics/__init__.py
themis/evaluation/metrics/composite_metric.py
themis/evaluation/metrics/consistency_metric.py
themis/evaluation/metrics/exact_match.py
themis/evaluation/metrics/length_difference_tolerance.py
themis/evaluation/metrics/math_verify_accuracy.py
themis/evaluation/metrics/pairwise_judge_metric.py
themis/evaluation/metrics/response_length.py
themis/evaluation/metrics/rubric_judge_metric.py
themis/evaluation/metrics/code/__init__.py
themis/evaluation/metrics/code/codebleu.py
themis/evaluation/metrics/code/execution.py
themis/evaluation/metrics/code/pass_at_k.py
themis/evaluation/metrics/nlp/__init__.py
themis/evaluation/metrics/nlp/bertscore.py
themis/evaluation/metrics/nlp/bleu.py
themis/evaluation/metrics/nlp/meteor.py
themis/evaluation/metrics/nlp/rouge.py
themis/evaluation/pipelines/__init__.py
themis/evaluation/pipelines/composable_pipeline.py
themis/evaluation/pipelines/standard_pipeline.py
themis/evaluation/statistics/__init__.py
themis/evaluation/statistics/bootstrap.py
themis/evaluation/statistics/confidence_intervals.py
themis/evaluation/statistics/distributions.py
themis/evaluation/statistics/effect_sizes.py
themis/evaluation/statistics/hypothesis_tests.py
themis/evaluation/statistics/types.py
themis/evaluation/strategies/__init__.py
themis/evaluation/strategies/attempt_aware_evaluation_strategy.py
themis/evaluation/strategies/default_evaluation_strategy.py
themis/evaluation/strategies/evaluation_strategy.py
themis/evaluation/strategies/judge_evaluation_strategy.py
themis/experiment/__init__.py
themis/experiment/builder.py
themis/experiment/cache_manager.py
themis/experiment/comparison.py
themis/experiment/cost.py
themis/experiment/definitions.py
themis/experiment/export.py
themis/experiment/export_csv.py
themis/experiment/integration_manager.py
themis/experiment/math.py
themis/experiment/mcq.py
themis/experiment/orchestrator.py
themis/experiment/pricing.py
themis/experiment/storage.py
themis/experiment/visualization.py
themis/generation/__init__.py
themis/generation/agentic_runner.py
themis/generation/batching.py
themis/generation/clients.py
themis/generation/conversation_runner.py
themis/generation/plan.py
themis/generation/router.py
themis/generation/runner.py
themis/generation/strategies.py
themis/generation/templates.py
themis/generation/turn_strategies.py
themis/generation/types.py
themis/generation/providers/litellm_provider.py
themis/generation/providers/vllm_provider.py
themis/integrations/__init__.py
themis/integrations/huggingface.py
themis/integrations/wandb.py
themis/interfaces/__init__.py
themis/presets/__init__.py
themis/presets/benchmarks.py
themis/presets/models.py
themis/project/__init__.py
themis/project/definitions.py
themis/project/patterns.py
themis/providers/__init__.py
themis/providers/registry.py
themis/server/__init__.py
themis/server/app.py
themis/utils/api_generator.py
themis/utils/cost_tracking.py
themis/utils/dashboard.py
themis/utils/logging_utils.py
themis/utils/progress.py
themis/utils/tracing.py
themis_eval.egg-info/PKG-INFO
themis_eval.egg-info/SOURCES.txt
themis_eval.egg-info/dependency_links.txt
themis_eval.egg-info/requires.txt
themis_eval.egg-info/top_level.txt