LICENSE
README.md
pyproject.toml
setup.py
lmms_eval/__init__.py
lmms_eval/__main__.py
lmms_eval/evaluator.py
lmms_eval/evaluator_utils.py
lmms_eval/imports.py
lmms_eval/launch_server.py
lmms_eval/logging_utils.py
lmms_eval/protocol.py
lmms_eval/utils.py
lmms_eval.egg-info/PKG-INFO
lmms_eval.egg-info/SOURCES.txt
lmms_eval.egg-info/dependency_links.txt
lmms_eval.egg-info/entry_points.txt
lmms_eval.egg-info/requires.txt
lmms_eval.egg-info/top_level.txt
lmms_eval/api/__init__.py
lmms_eval/api/filter.py
lmms_eval/api/group.py
lmms_eval/api/instance.py
lmms_eval/api/metrics.py
lmms_eval/api/model.py
lmms_eval/api/reasoning.py
lmms_eval/api/registry.py
lmms_eval/api/samplers.py
lmms_eval/api/task.py
lmms_eval/baselines/__init__.py
lmms_eval/baselines/loader.py
lmms_eval/baselines/registry.py
lmms_eval/caching/__init__.py
lmms_eval/caching/cache.py
lmms_eval/caching/response_cache.py
lmms_eval/cli/__init__.py
lmms_eval/cli/dispatch.py
lmms_eval/cli/mcp_cmd.py
lmms_eval/cli/models_cmd.py
lmms_eval/cli/power_cmd.py
lmms_eval/cli/serve_cmd.py
lmms_eval/cli/tasks_cmd.py
lmms_eval/cli/ui_cmd.py
lmms_eval/cli/version_cmd.py
lmms_eval/cli/wizard.py
lmms_eval/entrypoints/__init__.py
lmms_eval/entrypoints/client.py
lmms_eval/entrypoints/http_server.py
lmms_eval/entrypoints/job_scheduler.py
lmms_eval/entrypoints/protocol.py
lmms_eval/entrypoints/server_args.py
lmms_eval/filters/__init__.py
lmms_eval/filters/decontamination.py
lmms_eval/filters/extraction.py
lmms_eval/filters/selection.py
lmms_eval/filters/transformation.py
lmms_eval/llm_judge/__init__.py
lmms_eval/llm_judge/base.py
lmms_eval/llm_judge/factory.py
lmms_eval/llm_judge/prompt.py
lmms_eval/llm_judge/protocol.py
lmms_eval/llm_judge/utils.py
lmms_eval/llm_judge/launcher/__init__.py
lmms_eval/llm_judge/launcher/base.py
lmms_eval/llm_judge/launcher/sglang.py
lmms_eval/llm_judge/providers/__init__.py
lmms_eval/llm_judge/providers/async_azure_openai.py
lmms_eval/llm_judge/providers/async_openai.py
lmms_eval/llm_judge/providers/azure_openai.py
lmms_eval/llm_judge/providers/dummy.py
lmms_eval/llm_judge/providers/openai.py
lmms_eval/loggers/__init__.py
lmms_eval/loggers/evaluation_tracker.py
lmms_eval/loggers/utils.py
lmms_eval/loggers/wandb_logger.py
lmms_eval/mcp/__init__.py
lmms_eval/mcp/client.py
lmms_eval/mcp/schemas.py
lmms_eval/mcp/server.py
lmms_eval/mcp/tools.py
lmms_eval/models/__init__.py
lmms_eval/models/registry_v2.py
lmms_eval/models/whisper_tt.py
lmms_eval/models/chat/async_hf_model.py
lmms_eval/models/chat/async_openai.py
lmms_eval/models/chat/bagel_lmms_engine.py
lmms_eval/models/chat/huggingface.py
lmms_eval/models/chat/internvl_hf.py
lmms_eval/models/chat/llava_hf.py
lmms_eval/models/chat/llava_onevision1_5.py
lmms_eval/models/chat/longvila.py
lmms_eval/models/chat/nanovlm.py
lmms_eval/models/chat/openai.py
lmms_eval/models/chat/phi4_multimodal.py
lmms_eval/models/chat/qwen2_5_vl.py
lmms_eval/models/chat/qwen3_vl.py
lmms_eval/models/chat/sglang.py
lmms_eval/models/chat/thyme.py
lmms_eval/models/chat/vllm.py
lmms_eval/models/chat/vllm_generate.py
lmms_eval/models/model_utils/__init__.py
lmms_eval/models/model_utils/audio_processing.py
lmms_eval/models/model_utils/concurrency_control.py
lmms_eval/models/model_utils/efficiency_metrics.py
lmms_eval/models/model_utils/gen_metrics.py
lmms_eval/models/model_utils/load_video.py
lmms_eval/models/model_utils/media_encoder.py
lmms_eval/models/model_utils/reasoning_model_utils.py
lmms_eval/models/model_utils/usage_metrics.py
lmms_eval/models/model_utils/qwen/qwen_generate_utils.py
lmms_eval/models/model_utils/thyme/sandbox.py
lmms_eval/models/model_utils/thyme/utils.py
lmms_eval/models/simple/aero.py
lmms_eval/models/simple/aria.py
lmms_eval/models/simple/audio_flamingo_3.py
lmms_eval/models/simple/auroracap.py
lmms_eval/models/simple/bagel.py
lmms_eval/models/simple/baichuan_omni.py
lmms_eval/models/simple/batch_gpt4.py
lmms_eval/models/simple/cambrian.py
lmms_eval/models/simple/cambrians.py
lmms_eval/models/simple/claude.py
lmms_eval/models/simple/cogvlm2.py
lmms_eval/models/simple/dummy_video_reader.py
lmms_eval/models/simple/egogpt.py
lmms_eval/models/simple/from_log.py
lmms_eval/models/simple/fuyu.py
lmms_eval/models/simple/gemini_api.py
lmms_eval/models/simple/gemma3.py
lmms_eval/models/simple/glm4v.py
lmms_eval/models/simple/gpt4o_audio.py
lmms_eval/models/simple/gpt4v.py
lmms_eval/models/simple/idefics2.py
lmms_eval/models/simple/instructblip.py
lmms_eval/models/simple/internvideo2.py
lmms_eval/models/simple/internvideo2_5.py
lmms_eval/models/simple/internvl.py
lmms_eval/models/simple/internvl2.py
lmms_eval/models/simple/internvl3.py
lmms_eval/models/simple/internvl3_5.py
lmms_eval/models/simple/kimi_audio.py
lmms_eval/models/simple/llama4_scout.py
lmms_eval/models/simple/llama_vid.py
lmms_eval/models/simple/llama_vision.py
lmms_eval/models/simple/llava.py
lmms_eval/models/simple/llava_hf.py
lmms_eval/models/simple/llava_onevision.py
lmms_eval/models/simple/llava_onevision1_5.py
lmms_eval/models/simple/llava_onevision_moviechat.py
lmms_eval/models/simple/llava_sglang.py
lmms_eval/models/simple/llava_vid.py
lmms_eval/models/simple/longva.py
lmms_eval/models/simple/mantis.py
lmms_eval/models/simple/minicpm_o.py
lmms_eval/models/simple/minicpm_v.py
lmms_eval/models/simple/minimonkey.py
lmms_eval/models/simple/moviechat.py
lmms_eval/models/simple/mplug_owl_video.py
lmms_eval/models/simple/ola.py
lmms_eval/models/simple/omnivinci.py
lmms_eval/models/simple/openai.py
lmms_eval/models/simple/oryx.py
lmms_eval/models/simple/phi3v.py
lmms_eval/models/simple/phi4_multimodal.py
lmms_eval/models/simple/plm.py
lmms_eval/models/simple/qwen2_5_omni.py
lmms_eval/models/simple/qwen2_5_vl.py
lmms_eval/models/simple/qwen2_5_vl_interleave.py
lmms_eval/models/simple/qwen2_audio.py
lmms_eval/models/simple/qwen2_vl.py
lmms_eval/models/simple/qwen3_omni.py
lmms_eval/models/simple/qwen3_vl.py
lmms_eval/models/simple/qwen_vl.py
lmms_eval/models/simple/qwen_vl_api.py
lmms_eval/models/simple/reka.py
lmms_eval/models/simple/ross.py
lmms_eval/models/simple/sam3.py
lmms_eval/models/simple/slime.py
lmms_eval/models/simple/srt_api.py
lmms_eval/models/simple/tinyllava.py
lmms_eval/models/simple/uni_moe_2_omni.py
lmms_eval/models/simple/video_chatgpt.py
lmms_eval/models/simple/video_llava.py
lmms_eval/models/simple/video_salmonn_2.py
lmms_eval/models/simple/videochat2.py
lmms_eval/models/simple/videochat_flash.py
lmms_eval/models/simple/videollama3.py
lmms_eval/models/simple/vila.py
lmms_eval/models/simple/vita.py
lmms_eval/models/simple/vllm.py
lmms_eval/models/simple/vora.py
lmms_eval/models/simple/whisper.py
lmms_eval/models/simple/whisper_vllm.py
lmms_eval/models/simple/xcomposer2_4KHD.py
lmms_eval/models/simple/xcomposer2d5.py
lmms_eval/tasks/__init__.py
lmms_eval/tasks/asr_wer_utils.py
lmms_eval/tasks/3dsrbench/utils.py
lmms_eval/tasks/FALCONBench/utils.py
lmms_eval/tasks/VisualPuzzles/utils.py
lmms_eval/tasks/_task_utils/default_template_yaml.py
lmms_eval/tasks/_task_utils/file_utils.py
lmms_eval/tasks/_task_utils/gpt_eval_utils.py
lmms_eval/tasks/_task_utils/lance_video_resolver.py
lmms_eval/tasks/_task_utils/math_verify_utils.py
lmms_eval/tasks/_task_utils/media_resolver.py
lmms_eval/tasks/_task_utils/mmmu_mcq_utils.py
lmms_eval/tasks/_task_utils/reasoning_utils.py
lmms_eval/tasks/_task_utils/video_loader.py
lmms_eval/tasks/_task_utils/vqa_eval_metric.py
lmms_eval/tasks/activitynetqa/utils.py
lmms_eval/tasks/ai2d/upload_ai2d.py
lmms_eval/tasks/ai2d/utils.py
lmms_eval/tasks/ai2d/reasoning/utils.py
lmms_eval/tasks/aime/utils.py
lmms_eval/tasks/aime/reasoning/utils.py
lmms_eval/tasks/air_bench/utils.py
lmms_eval/tasks/alpaca_audio/utils.py
lmms_eval/tasks/amber_g/utils.py
lmms_eval/tasks/ami/utils.py
lmms_eval/tasks/arc_agi_1/utils.py
lmms_eval/tasks/arc_agi_2/utils.py
lmms_eval/tasks/auxsolidmath/__init__.py
lmms_eval/tasks/auxsolidmath/utils.py
lmms_eval/tasks/av_asr/utils.py
lmms_eval/tasks/av_odyssey/utils.py
lmms_eval/tasks/av_speakerbench/utils.py
lmms_eval/tasks/babyvision/__init__.py
lmms_eval/tasks/babyvision/prompt.py
lmms_eval/tasks/babyvision/utils.py
lmms_eval/tasks/babyvision_gen/__init__.py
lmms_eval/tasks/babyvision_gen/prompt.py
lmms_eval/tasks/babyvision_gen/utils.py
lmms_eval/tasks/blink/utils.py
lmms_eval/tasks/browsecomp/utils.py
lmms_eval/tasks/camerabench_vqa/utils.py
lmms_eval/tasks/capability/prompt.py
lmms_eval/tasks/capability/utils.py
lmms_eval/tasks/captionqa/utils.py
lmms_eval/tasks/charades_sta/eval_tvg.py
lmms_eval/tasks/charades_sta/utils.py
lmms_eval/tasks/chartqa/upload_chartqa.py
lmms_eval/tasks/chartqa/utils.py
lmms_eval/tasks/chartqa/reasoning/utils.py
lmms_eval/tasks/charxiv/constant.py
lmms_eval/tasks/charxiv/descriptive_utils.py
lmms_eval/tasks/charxiv/reasoning_utils.py
lmms_eval/tasks/charxiv/utils.py
lmms_eval/tasks/charxiv/reasoning/utils.py
lmms_eval/tasks/cinepile/utils.py
lmms_eval/tasks/clotho_aqa/utils.py
lmms_eval/tasks/cmmmu/utils.py
lmms_eval/tasks/cn_college_listen_mcq/__init__.py
lmms_eval/tasks/cn_college_listen_mcq/utils.py
lmms_eval/tasks/coco_cap/utils.py
lmms_eval/tasks/coco_cap_chair/utils.py
lmms_eval/tasks/common_voice_15/utils.py
lmms_eval/tasks/conbench/utils.py
lmms_eval/tasks/corecognition/__init__.py
lmms_eval/tasks/corecognition/utils.py
lmms_eval/tasks/countbench/utils.py
lmms_eval/tasks/countbenchqa/utils.py
lmms_eval/tasks/countbenchqa/reasoning/utils.py
lmms_eval/tasks/countix/utils.py
lmms_eval/tasks/covost2/utils.py
lmms_eval/tasks/csbench/utils.py
lmms_eval/tasks/cuva/utils.py
lmms_eval/tasks/cv_bench/utils.py
lmms_eval/tasks/cv_bench/reasoning/utils.py
lmms_eval/tasks/cvrr/utils.py
lmms_eval/tasks/detailcaps/utils.py
lmms_eval/tasks/docvqa/utils.py
lmms_eval/tasks/docvqa/reasoning/utils.py
lmms_eval/tasks/dream_tts_mcq/__init__.py
lmms_eval/tasks/dream_tts_mcq/utils.py
lmms_eval/tasks/dtcbench/utils.py
lmms_eval/tasks/dude/utils.py
lmms_eval/tasks/dynamath/reasoning/_generate_config.py
lmms_eval/tasks/dynamath/reasoning/utils.py
lmms_eval/tasks/egoplan/utils.py
lmms_eval/tasks/egoschema/utils.py
lmms_eval/tasks/egotempo/utils.py
lmms_eval/tasks/egothink/utils.py
lmms_eval/tasks/embspatial/utils.py
lmms_eval/tasks/emma/utils.py
lmms_eval/tasks/erqa/utils.py
lmms_eval/tasks/europal_asr/utils.py
lmms_eval/tasks/ferret/utils.py
lmms_eval/tasks/fleurs/utils.py
lmms_eval/tasks/flickr30k/utils.py
lmms_eval/tasks/fsc147/utils.py
lmms_eval/tasks/funqa/utils.py
lmms_eval/tasks/gedit_bench/__init__.py
lmms_eval/tasks/gedit_bench/utils.py
lmms_eval/tasks/gedit_bench/viescore/__init__.py
lmms_eval/tasks/gedit_bench/viescore/openai_backend.py
lmms_eval/tasks/gedit_bench/viescore/parse_prompt.py
lmms_eval/tasks/gedit_bench/viescore/utils.py
lmms_eval/tasks/gedit_bench/viescore/vie_prompts.py
lmms_eval/tasks/geometry3k/__init__.py
lmms_eval/tasks/geometry3k/utils.py
lmms_eval/tasks/gigaspeech/utils.py
lmms_eval/tasks/gigaspeech/whisper_normalizer/basic.py
lmms_eval/tasks/gigaspeech/whisper_normalizer/english.py
lmms_eval/tasks/gpqa/cot_n_shot/_generate_configs.py
lmms_eval/tasks/gpqa/cot_n_shot/utils.py
lmms_eval/tasks/gpqa/cot_zeroshot/_generate_configs.py
lmms_eval/tasks/gpqa/cot_zeroshot/utils.py
lmms_eval/tasks/gpqa/generative/_generate_configs.py
lmms_eval/tasks/gpqa/generative/utils.py
lmms_eval/tasks/gpqa/n_shot/_generate_configs.py
lmms_eval/tasks/gpqa/n_shot/utils.py
lmms_eval/tasks/gpqa/openai/utils.py
lmms_eval/tasks/gpqa/reasoning/utils.py
lmms_eval/tasks/gpqa/zeroshot/_generate_configs.py
lmms_eval/tasks/gpqa/zeroshot/utils.py
lmms_eval/tasks/gqa/utils.py
lmms_eval/tasks/gqa_ru/utils.py
lmms_eval/tasks/groundingme/utils.py
lmms_eval/tasks/hallusion_bench/evaluate_hb.py
lmms_eval/tasks/hallusion_bench/utils.py
lmms_eval/tasks/hellaswag/utils.py
lmms_eval/tasks/hipho/utils.py
lmms_eval/tasks/hrbench/hrbench_evals.py
lmms_eval/tasks/hrbench/utils.py
lmms_eval/tasks/iconqa/utils.py
lmms_eval/tasks/ifeval/instructions.py
lmms_eval/tasks/ifeval/instructions_registry.py
lmms_eval/tasks/ifeval/instructions_util.py
lmms_eval/tasks/ifeval/utils.py
lmms_eval/tasks/ii_bench/utils.py
lmms_eval/tasks/illusionbench/__init__.py
lmms_eval/tasks/illusionbench/convert_dataset.py
lmms_eval/tasks/illusionbench/utils.py
lmms_eval/tasks/illusionvqa/utils.py
lmms_eval/tasks/imgedit/__init__.py
lmms_eval/tasks/imgedit/prompt.py
lmms_eval/tasks/imgedit/utils.py
lmms_eval/tasks/infovqa/utils.py
lmms_eval/tasks/infovqa/reasoning/utils.py
lmms_eval/tasks/internal_eval/d170_cn_utils.py
lmms_eval/tasks/internal_eval/d170_en_utils.py
lmms_eval/tasks/internal_eval/dc100_en_utils.py
lmms_eval/tasks/internal_eval/dc200_cn_utils.py
lmms_eval/tasks/internal_eval/utils.py
lmms_eval/tasks/jmmmu/utils.py
lmms_eval/tasks/jmmmu_pro/utils.py
lmms_eval/tasks/k12/utils.py
lmms_eval/tasks/kris_bench/__init__.py
lmms_eval/tasks/kris_bench/prompt.py
lmms_eval/tasks/kris_bench/utils.py
lmms_eval/tasks/lemonade/utils.py
lmms_eval/tasks/librispeech/cn_tn.py
lmms_eval/tasks/librispeech/utils.py
lmms_eval/tasks/librispeech/whisper_normalizer/basic.py
lmms_eval/tasks/librispeech/whisper_normalizer/english.py
lmms_eval/tasks/live_bench/utils.py
lmms_eval/tasks/live_bench/utils_v2.py
lmms_eval/tasks/livexiv_tqa/utils.py
lmms_eval/tasks/livexiv_vqa/utils.py
lmms_eval/tasks/llava-bench-coco/utils.py
lmms_eval/tasks/llava-in-the-wild/utils.py
lmms_eval/tasks/llava-in-the-wild/utils_ko.py
lmms_eval/tasks/llava_interleave_bench/utils.py
lmms_eval/tasks/llava_wilder/utils.py
lmms_eval/tasks/logicvista/reasoning/utils.py
lmms_eval/tasks/longtimescope/utils.py
lmms_eval/tasks/longvideobench/utils.py
lmms_eval/tasks/longvideobench/no_visual/utils.py
lmms_eval/tasks/longvideobench/random_choice/utils.py
lmms_eval/tasks/longvt/utils.py
lmms_eval/tasks/longvt/no_visual/utils.py
lmms_eval/tasks/lsdbench/utils.py
lmms_eval/tasks/lvbench/utils.py
lmms_eval/tasks/lvbench/no_visual/utils.py
lmms_eval/tasks/lvbench/random_choice/utils.py
lmms_eval/tasks/mantis/utils.py
lmms_eval/tasks/mathcanvas/utils.py
lmms_eval/tasks/mathkangaroo/utils.py
lmms_eval/tasks/mathverse/mathverse_evals.py
lmms_eval/tasks/mathverse/utils.py
lmms_eval/tasks/mathverse/reasoning/utils.py
lmms_eval/tasks/mathvision/eval_utils.py
lmms_eval/tasks/mathvision/utils.py
lmms_eval/tasks/mathvision/reasoning/utils.py
lmms_eval/tasks/mathvista/mathvista_evals.py
lmms_eval/tasks/mathvista/utils.py
lmms_eval/tasks/mathvista/reasoning/utils.py
lmms_eval/tasks/medqa/utils.py
lmms_eval/tasks/megabench/evaluator.py
lmms_eval/tasks/megabench/image_video_utils.py
lmms_eval/tasks/megabench/utils.py
lmms_eval/tasks/megabench/breakdown/analysis_utils.py
lmms_eval/tasks/megabench/breakdown/derive_breakdown_results.py
lmms_eval/tasks/megabench/metrics/__init__.py
lmms_eval/tasks/megabench/metrics/aggregation_type.py
lmms_eval/tasks/megabench/metrics/metric_type.py
lmms_eval/tasks/megabench/metrics/response_parse_type.py
lmms_eval/tasks/megabench/metrics/aggregation/mean_agg.py
lmms_eval/tasks/megabench/metrics/aggregation/min_agg.py
lmms_eval/tasks/megabench/metrics/aggregation/unsupported_agg.py
lmms_eval/tasks/megabench/metrics/parsing/answer_str_parse.py
lmms_eval/tasks/megabench/metrics/parsing/dummy_parse.py
lmms_eval/tasks/megabench/metrics/parsing/json_parse.py
lmms_eval/tasks/megabench/metrics/parsing/common/parsers.py
lmms_eval/tasks/megabench/metrics/parsing/common/utils.py
lmms_eval/tasks/megabench/metrics/scoring/ascii_art_vlm_judge.py
lmms_eval/tasks/megabench/metrics/scoring/chess_jaccard.py
lmms_eval/tasks/megabench/metrics/scoring/constrained_generation.py
lmms_eval/tasks/megabench/metrics/scoring/coordinate_sequence_match.py
lmms_eval/tasks/megabench/metrics/scoring/dict_equality.py
lmms_eval/tasks/megabench/metrics/scoring/dict_exact_match_agg_recall.py
lmms_eval/tasks/megabench/metrics/scoring/dict_jaccard_agg_jaccard.py
lmms_eval/tasks/megabench/metrics/scoring/dict_nbbox_iou_tuple_agg_jaccard.py
lmms_eval/tasks/megabench/metrics/scoring/dict_set_equality_agg_jaccard.py
lmms_eval/tasks/megabench/metrics/scoring/exact_str_match.py
lmms_eval/tasks/megabench/metrics/scoring/exact_str_match_case_insensitive.py
lmms_eval/tasks/megabench/metrics/scoring/general_numerical_match.py
lmms_eval/tasks/megabench/metrics/scoring/geo_proximity.py
lmms_eval/tasks/megabench/metrics/scoring/gleu.py
lmms_eval/tasks/megabench/metrics/scoring/jaccard.py
lmms_eval/tasks/megabench/metrics/scoring/latex_expr_equality.py
lmms_eval/tasks/megabench/metrics/scoring/longest_common_list_prefix_ratio.py
lmms_eval/tasks/megabench/metrics/scoring/mse.py
lmms_eval/tasks/megabench/metrics/scoring/multi_ref_phrase.py
lmms_eval/tasks/megabench/metrics/scoring/nbbox_iou.py
lmms_eval/tasks/megabench/metrics/scoring/near_str_match.py
lmms_eval/tasks/megabench/metrics/scoring/nli_entailment.py
lmms_eval/tasks/megabench/metrics/scoring/normalized_similarity_damerau_levenshtein.py
lmms_eval/tasks/megabench/metrics/scoring/number_rel_diff_ratio.py
lmms_eval/tasks/megabench/metrics/scoring/positive_int_match.py
lmms_eval/tasks/megabench/metrics/scoring/program_judge.py
lmms_eval/tasks/megabench/metrics/scoring/sacrebleu_bleu.py
lmms_eval/tasks/megabench/metrics/scoring/sequence_equality.py
lmms_eval/tasks/megabench/metrics/scoring/set_equality.py
lmms_eval/tasks/megabench/metrics/scoring/set_precision.py
lmms_eval/tasks/megabench/metrics/scoring/simple_str_match.py
lmms_eval/tasks/megabench/metrics/scoring/symbolic_planning.py
lmms_eval/tasks/megabench/metrics/scoring/unsupported_scoring.py
lmms_eval/tasks/megabench/metrics/scoring/vlm_as_judge.py
lmms_eval/tasks/megabench/metrics/scoring/xml_nbbox_iou.py
lmms_eval/tasks/megabench/metrics/scoring/xml_norm_point_distance.py
lmms_eval/tasks/megabench/metrics/scoring/xml_norm_point_in_bbox.py
lmms_eval/tasks/megabench/metrics/scoring/common/conversions.py
lmms_eval/tasks/megabench/metrics/scoring/common/metrics.py
lmms_eval/tasks/megabench/metrics/scoring/common/transformations.py
lmms_eval/tasks/mia_bench/utils.py
lmms_eval/tasks/mindcube/utils.py
lmms_eval/tasks/minerva/utils.py
lmms_eval/tasks/mirb/utils.py
lmms_eval/tasks/mix_evals/audio2text/utils.py
lmms_eval/tasks/mix_evals/image2text/utils.py
lmms_eval/tasks/mix_evals/video2text/utils.py
lmms_eval/tasks/mlvu/utils.py
lmms_eval/tasks/mmar/utils.py
lmms_eval/tasks/mmau/utils.py
lmms_eval/tasks/mmbench/cc_utils.py
lmms_eval/tasks/mmbench/cn_utils.py
lmms_eval/tasks/mmbench/en_utils.py
lmms_eval/tasks/mmbench/ko_utils.py
lmms_eval/tasks/mmbench/mmbench_evals.py
lmms_eval/tasks/mmbench/ru_utils.py
lmms_eval/tasks/mmbench/en_reasoning/utils.py
lmms_eval/tasks/mmbench/reasoning/utils.py
lmms_eval/tasks/mme/utils.py
lmms_eval/tasks/mme_cc/utils.py
lmms_eval/tasks/mme_cot/utils.py
lmms_eval/tasks/mme_realworld/utils.py
lmms_eval/tasks/mme_realworld/reasoning/utils.py
lmms_eval/tasks/mme_sci/utils.py
lmms_eval/tasks/mme_sci_image/utils.py
lmms_eval/tasks/mmie/utils.py
lmms_eval/tasks/mmlongbench/utils.py
lmms_eval/tasks/mmlongbench_doc/utils.py
lmms_eval/tasks/mmlu/_generate_configs.py
lmms_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
lmms_eval/tasks/mmlu/flan_n_shot/generative/utils.py
lmms_eval/tasks/mmlu_pro/utils.py
lmms_eval/tasks/mmmu/utils.py
lmms_eval/tasks/mmmu/utils_group_img.py
lmms_eval/tasks/mmmu/reasoning/utils.py
lmms_eval/tasks/mmmu_pro/utils.py
lmms_eval/tasks/mmmu_pro/reasoning/utils.py
lmms_eval/tasks/mmrefine/mmrefine_evals.py
lmms_eval/tasks/mmrefine/prompts.py
lmms_eval/tasks/mmrefine/utils.py
lmms_eval/tasks/mmsearch/constants.py
lmms_eval/tasks/mmsearch/get_final_scores.py
lmms_eval/tasks/mmsearch/lmms_eval_utils.py
lmms_eval/tasks/mmsearch/prompts/prompt.py
lmms_eval/tasks/mmsearch/prompts/prompt_w_imagesearch.py
lmms_eval/tasks/mmsearch/retrieve_content/retriever.py
lmms_eval/tasks/mmsearch/retrieve_content/tokenization/__init__.py
lmms_eval/tasks/mmsearch/retrieve_content/tokenization/tokenizers.py
lmms_eval/tasks/mmsearch/retrieve_content/tokenization/utils.py
lmms_eval/tasks/mmsearch/score/f1_score.py
lmms_eval/tasks/mmsearch/score/req_score.py
lmms_eval/tasks/mmsearch/score/result_summary.py
lmms_eval/tasks/mmsearch/utils/image_utils.py
lmms_eval/tasks/mmsearch/utils/lmms_eval_utils.py
lmms_eval/tasks/mmsearch/utils/prompt_utils.py
lmms_eval/tasks/mmsearch/utils/utils.py
lmms_eval/tasks/mmsearch/utils/web_content_utils.py
lmms_eval/tasks/mmsearch_plus/decrypt_utils.py
lmms_eval/tasks/mmsearch_plus/utils.py
lmms_eval/tasks/mmsi_bench/utils.py
lmms_eval/tasks/mmsi_video/utils.py
lmms_eval/tasks/mmstar/utils.py
lmms_eval/tasks/mmstar/reasoning/utils.py
lmms_eval/tasks/mmsu/utils.py
lmms_eval/tasks/mmt/utils.py
lmms_eval/tasks/mmupd/mmupd_evals.py
lmms_eval/tasks/mmupd/utils.py
lmms_eval/tasks/mmvet/utils.py
lmms_eval/tasks/mmvetv2/utils.py
lmms_eval/tasks/mmvp/utils.py
lmms_eval/tasks/mmvu/utils.py
lmms_eval/tasks/mmworld/utils.py
lmms_eval/tasks/motionbench/utils.py
lmms_eval/tasks/moviechat/utils.py
lmms_eval/tasks/mtvqa/utils.py
lmms_eval/tasks/muchomusic/utils.py
lmms_eval/tasks/muirbench/utils.py
lmms_eval/tasks/multidocvqa/utils.py
lmms_eval/tasks/multilingual-llava-bench-in-the-wild/utils.py
lmms_eval/tasks/multimodal_rewardbench/utils.py
lmms_eval/tasks/mvbench/utils.py
lmms_eval/tasks/naturalbench/utils.py
lmms_eval/tasks/neptune/utils.py
lmms_eval/tasks/nextqa/utils.py
lmms_eval/tasks/nocaps/utils.py
lmms_eval/tasks/ocrbench/upload_ocrbench.py
lmms_eval/tasks/ocrbench/utils.py
lmms_eval/tasks/ocrbench/reasoning/utils.py
lmms_eval/tasks/ocrbench_v2/IoUscore_metric.py
lmms_eval/tasks/ocrbench_v2/TEDS_metric.py
lmms_eval/tasks/ocrbench_v2/__init__.py
lmms_eval/tasks/ocrbench_v2/page_ocr_metric.py
lmms_eval/tasks/ocrbench_v2/parallel.py
lmms_eval/tasks/ocrbench_v2/spotting_metric.py
lmms_eval/tasks/ocrbench_v2/upload_ocrbench_v2.py
lmms_eval/tasks/ocrbench_v2/utils.py
lmms_eval/tasks/ocrbench_v2/vqa_metric.py
lmms_eval/tasks/ocrbench_v2/reasoning/utils.py
lmms_eval/tasks/ocrbench_v2/spotting_eval/__init__.py
lmms_eval/tasks/ocrbench_v2/spotting_eval/rrc_evaluation_funcs_1_1.py
lmms_eval/tasks/ocrbench_v2/spotting_eval/script.py
lmms_eval/tasks/officeqa/utils.py
lmms_eval/tasks/ok_vqa/_generate_config.py
lmms_eval/tasks/ok_vqa/utils.py
lmms_eval/tasks/olympiadbench/cn_utils.py
lmms_eval/tasks/olympiadbench/en_utils.py
lmms_eval/tasks/olympiadbench/olympiadbench_evals.py
lmms_eval/tasks/olympiadbench/testmini_utils.py
lmms_eval/tasks/olympiadbench_mimo/en_utils.py
lmms_eval/tasks/olympiadbench_mimo/olympiadbench_evals.py
lmms_eval/tasks/olympiadbench_mimo/utils.py
lmms_eval/tasks/olympiadbench_mimo/zh_utils.py
lmms_eval/tasks/olympiadbench_mimo/reasoning/utils.py
lmms_eval/tasks/omni_bench/utils.py
lmms_eval/tasks/omnidocbench/utils.py
lmms_eval/tasks/omnispatial/utils.py
lmms_eval/tasks/open_asr/utils.py
lmms_eval/tasks/openai_math/utils.py
lmms_eval/tasks/openhermes/utils.py
lmms_eval/tasks/osi_bench/__init__.py
lmms_eval/tasks/osi_bench/utils.py
lmms_eval/tasks/osworld_g/utils.py
lmms_eval/tasks/ovobench/constant.py
lmms_eval/tasks/ovobench/utils.py
lmms_eval/tasks/ovobench/score_utils/score.py
lmms_eval/tasks/ovr_kinetics/utils.py
lmms_eval/tasks/paibench_u/utils.py
lmms_eval/tasks/people_speech/utils.py
lmms_eval/tasks/perceptiontest/test/utils.py
lmms_eval/tasks/perceptiontest/val/utils.py
lmms_eval/tasks/phyx/phyx_evals.py
lmms_eval/tasks/phyx/utils.py
lmms_eval/tasks/phyx/reasoning/utils.py
lmms_eval/tasks/pixmo_count/utils.py
lmms_eval/tasks/pixmo_count/reasoning/utils.py
lmms_eval/tasks/plm_videobench/eval_utils.py
lmms_eval/tasks/plm_videobench/fgqa/fgqa_utils.py
lmms_eval/tasks/plm_videobench/rcap/rcap_utils.py
lmms_eval/tasks/plm_videobench/rdcap/rdcap_utils.py
lmms_eval/tasks/plm_videobench/rtloc/rtloc_utils.py
lmms_eval/tasks/plm_videobench/sgqa/sgqa_utils.py
lmms_eval/tasks/pointbench/utils.py
lmms_eval/tasks/pope/utils.py
lmms_eval/tasks/prismm_bench/utils.py
lmms_eval/tasks/qbench/utils.py
lmms_eval/tasks/realunify/__init__.py
lmms_eval/tasks/realunify/utils.py
lmms_eval/tasks/realworldqa/utils.py
lmms_eval/tasks/realworldqa/reasoning/utils.py
lmms_eval/tasks/refcoco/_generate_config.py
lmms_eval/tasks/refcoco/utils.py
lmms_eval/tasks/refcoco/utils_rec.py
lmms_eval/tasks/refcoco+/_generate_config.py
lmms_eval/tasks/refcoco+/utils.py
lmms_eval/tasks/refcoco+/utils_rec.py
lmms_eval/tasks/refcocog/_generate_config.py
lmms_eval/tasks/refcocog/utils.py
lmms_eval/tasks/refcocog/utils_rec.py
lmms_eval/tasks/refspatial/utils.py
lmms_eval/tasks/repcount/utils.py
lmms_eval/tasks/saco/compute_metrics.py
lmms_eval/tasks/saco/utils.py
lmms_eval/tasks/safety_redteam/utils.py
lmms_eval/tasks/salbench/utils.py
lmms_eval/tasks/scibench/utils.py
lmms_eval/tasks/scienceqa/utils.py
lmms_eval/tasks/scivideobench/utils.py
lmms_eval/tasks/screenspot/utils.py
lmms_eval/tasks/screenspot/utils_rec.py
lmms_eval/tasks/screenspot_pro/utils.py
lmms_eval/tasks/screenspot_v2/utils.py
lmms_eval/tasks/seedbench/ko_utils.py
lmms_eval/tasks/seedbench/utils.py
lmms_eval/tasks/seedbench/reasoning/utils.py
lmms_eval/tasks/seedbench_2/utils.py
lmms_eval/tasks/seedbench_2_plus/utils.py
lmms_eval/tasks/seedbench_2_plus/reasoning/utils.py
lmms_eval/tasks/seephys/seephys_evals.py
lmms_eval/tasks/seephys/seephys_utils.py
lmms_eval/tasks/simplevqa/utils.py
lmms_eval/tasks/sitebench/merge_results.py
lmms_eval/tasks/sitebench/utils.py
lmms_eval/tasks/sitebench/multi_image_input/utils.py
lmms_eval/tasks/snsbench/metrics.py
lmms_eval/tasks/snsbench/utils.py
lmms_eval/tasks/song_describer/utils.py
lmms_eval/tasks/sparbench/utils.py
lmms_eval/tasks/spatial457/__init__.py
lmms_eval/tasks/spatial457/utils.py
lmms_eval/tasks/spatialtreebench/utils.py
lmms_eval/tasks/spatialtreebench/metrics/rule_metrics.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/__init__.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/parse_output.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/result_init.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/__init__.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/__init__.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/evaluator.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/cogmap/__init__.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/cogmap/cogmap_evaluator.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/cogmap/cogmap_metrics.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/cogmap/graph_operations.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/core/__init__.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/core/base_metrics.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/core/extractors.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/evaluation/core/io_utils.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/utils/__init__.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/utils/io_utils.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/utils/spatial_utils.py
lmms_eval/tasks/spatialtreebench/metrics/mindcube_cogmap/src/utils/text_utils.py
lmms_eval/tasks/spatialviz/utils.py
lmms_eval/tasks/ssv2/utils.py
lmms_eval/tasks/stare/utils.py
lmms_eval/tasks/step2_audio_paralinguistic/utils.py
lmms_eval/tasks/structeditbench/__init__.py
lmms_eval/tasks/structeditbench/utils.py
lmms_eval/tasks/stvqa/utils.py
lmms_eval/tasks/super_gpqa/utils.py
lmms_eval/tasks/synthdog/donut_evaluator.py
lmms_eval/tasks/synthdog/utils.py
lmms_eval/tasks/tau2_bench/utils.py
lmms_eval/tasks/tedlium/utils.py
lmms_eval/tasks/tempcompass/utils.py
lmms_eval/tasks/temporalbench/utils.py
lmms_eval/tasks/textcaps/utils.py
lmms_eval/tasks/textvqa/utils.py
lmms_eval/tasks/timescope/utils.py
lmms_eval/tasks/tomato/utils.py
lmms_eval/tasks/tvbench/utils.py
lmms_eval/tasks/ueval/utils.py
lmms_eval/tasks/uni_mmmu/__init__.py
lmms_eval/tasks/uni_mmmu/utils.py
lmms_eval/tasks/vatex/utils.py
lmms_eval/tasks/vcr_wiki/utils.py
lmms_eval/tasks/vdc/utils.py
lmms_eval/tasks/vending_bench2/utils.py
lmms_eval/tasks/vggsound/utils.py
lmms_eval/tasks/vibe_eval/utils.py
lmms_eval/tasks/video-tt/gpt_utils.py
lmms_eval/tasks/video-tt/utils.py
lmms_eval/tasks/video_detail_description/utils.py
lmms_eval/tasks/videochatgpt/utils.py
lmms_eval/tasks/videoevalpro/utils.py
lmms_eval/tasks/videomathqa/cot_postprocess.py
lmms_eval/tasks/videomathqa/cot_step_evaluation.py
lmms_eval/tasks/videomathqa/utils.py
lmms_eval/tasks/videomme/utils.py
lmms_eval/tasks/videomme/convert_mcq_oe/utils.py
lmms_eval/tasks/videomme/gt_none_option/utils.py
lmms_eval/tasks/videomme/no_visual/utils.py
lmms_eval/tasks/videomme/number_option/utils.py
lmms_eval/tasks/videomme/random_choice/utils.py
lmms_eval/tasks/videomme/revert_oe_mcq/utils.py
lmms_eval/tasks/videomme/video_only_abcd/utils.py
lmms_eval/tasks/videommmu/utils.py
lmms_eval/tasks/videommmu/gt_none_option/utils.py
lmms_eval/tasks/videommmu/no_visual/utils.py
lmms_eval/tasks/videommmu/number_option/utils.py
lmms_eval/tasks/videommmu/random_choice/utils.py
lmms_eval/tasks/viewspatial/utils.py
lmms_eval/tasks/vinoground/utils.py
lmms_eval/tasks/visualwebbench/prompts.py
lmms_eval/tasks/visualwebbench/utils.py
lmms_eval/tasks/visulogic/utils.py
lmms_eval/tasks/vitatecs/utils.py
lmms_eval/tasks/viverbench/utils.py
lmms_eval/tasks/vizwiz_vqa/_generate_config.py
lmms_eval/tasks/vizwiz_vqa/utils.py
lmms_eval/tasks/vl_rewardbench/utils.py
lmms_eval/tasks/vlms_are_biased/__init__.py
lmms_eval/tasks/vlms_are_biased/utils.py
lmms_eval/tasks/vlmsareblind/__init__.py
lmms_eval/tasks/vlmsareblind/utils.py
lmms_eval/tasks/vmcbench/utils.py
lmms_eval/tasks/vocalsound/utils.py
lmms_eval/tasks/voicebench/utils.py
lmms_eval/tasks/voicebench/instruction_following_eval/__init__.py
lmms_eval/tasks/voicebench/instruction_following_eval/instructions.py
lmms_eval/tasks/voicebench/instruction_following_eval/instructions_registry.py
lmms_eval/tasks/voicebench/instruction_following_eval/instructions_util.py
lmms_eval/tasks/voxpopuli/utils.py
lmms_eval/tasks/vpct/utils.py
lmms_eval/tasks/vqav2/utils.py
lmms_eval/tasks/vsibench/utils.py
lmms_eval/tasks/vsibench/multi_image_input/utils.py
lmms_eval/tasks/vstar_bench/__init__.py
lmms_eval/tasks/vstar_bench/utils.py
lmms_eval/tasks/vstar_bench/reasoning/utils.py
lmms_eval/tasks/wavcaps/utils.py
lmms_eval/tasks/websrc/utils.py
lmms_eval/tasks/wemath/wemath_utils.py
lmms_eval/tasks/wemath/reasoning/utils.py
lmms_eval/tasks/wenet_speech/utils.py
lmms_eval/tasks/where2place/utils.py
lmms_eval/tasks/wild_vision_bench/utils.py
lmms_eval/tasks/worldqa/utils.py
lmms_eval/tasks/worldqa/worldqa_mc_evaluator.py
lmms_eval/tasks/worldsense/utils.py
lmms_eval/tasks/worldvqa/utils.py
lmms_eval/tasks/xlrs/mcq_utils.py
lmms_eval/tasks/youcook2/utils.py
lmms_eval/tasks/zerobench/utils.py
lmms_eval/tui/__init__.py
lmms_eval/tui/cli.py
lmms_eval/tui/discovery.py
lmms_eval/tui/server.py
lmms_eval/tui/web/dist/index.html
lmms_eval/tui/web/dist/assets/index--ljchTs1.css
lmms_eval/tui/web/dist/assets/index-B0K15q0v.css
lmms_eval/tui/web/dist/assets/index-CMfU4tYG.css
lmms_eval/tui/web/dist/assets/index-DoGN9NR0.js
lmms_eval/tui/web/dist/assets/index-DolzhZKb.js
lmms_eval/tui/web/dist/assets/index-kuJzFqb0.js