.gitignore
.gitlab-ci.yml
.pre-commit-config.yaml
CITATION.cff
CODE_OF_CONDUCT.md
CONTRIBUTING.md
LICENSE
MANIFEST.in
Makefile
README.md
pyproject.toml
requirements.txt
setup.cfg
setup.py
.github/PULL_REQUEST_TEMPLATE.md
.github/ISSUE_TEMPLATE/bug-report.yml
.github/ISSUE_TEMPLATE/feature-request.yml
.github/ISSUE_TEMPLATE/new-trainer-addition.yml
.github/codeql/custom-queries.qls
.github/workflows/build_documentation.yml
.github/workflows/build_pr_documentation.yml
.github/workflows/clear_cache.yml
.github/workflows/codeQL.yml
.github/workflows/docker-build.yml
.github/workflows/issue_auto_labeller.yml
.github/workflows/pr_style_bot.yml
.github/workflows/slow-tests.yml
.github/workflows/tests.yml
.github/workflows/tests_latest.yml
.github/workflows/trufflehog.yml
.github/workflows/upload_pr_documentation.yml
commands/run_dpo.sh
commands/run_sft.sh
docker/trl-latest-gpu/Dockerfile
docker/trl-source-gpu/Dockerfile
docs/source/_toctree.yml
docs/source/alignprop_trainer.md
docs/source/bco_trainer.md
docs/source/best_of_n.md
docs/source/callbacks.md
docs/source/clis.md
docs/source/community_tutorials.md
docs/source/cpo_trainer.md
docs/source/customization.md
docs/source/data_utils.md
docs/source/dataset_formats.md
docs/source/ddpo_trainer.md
docs/source/deepspeed_integration.md
docs/source/detoxifying_a_lm.md
docs/source/distributing_training.md
docs/source/dpo_trainer.md
docs/source/example_overview.md
docs/source/gkd_trainer.md
docs/source/grpo_trainer.md
docs/source/how_to_train.md
docs/source/index.md
docs/source/installation.md
docs/source/iterative_sft_trainer.md
docs/source/judges.md
docs/source/kto_trainer.md
docs/source/liger_kernel_integration.md
docs/source/logging.md
docs/source/model_utils.md
docs/source/models.md
docs/source/multi_adapter_rl.md
docs/source/nash_md_trainer.md
docs/source/online_dpo_trainer.md
docs/source/orpo_trainer.md
docs/source/others.md
docs/source/peft_integration.md
docs/source/ppo_trainer.md
docs/source/prm_trainer.md
docs/source/quickstart.md
docs/source/reducing_memory_usage.md
docs/source/reward_trainer.md
docs/source/rewards.md
docs/source/rloo_trainer.md
docs/source/script_utils.md
docs/source/sentiment_tuning.md
docs/source/sft_trainer.md
docs/source/speeding_up_training.md
docs/source/training_vlm_sft.md
docs/source/unsloth_integration.md
docs/source/use_model.md
docs/source/using_llama_models.md
docs/source/vllm_integration.md
docs/source/xpo_trainer.md
examples/README.md
examples/accelerate_configs/deepspeed_zero1.yaml
examples/accelerate_configs/deepspeed_zero2.yaml
examples/accelerate_configs/deepspeed_zero3.yaml
examples/accelerate_configs/fsdp1.yaml
examples/accelerate_configs/fsdp2.yaml
examples/accelerate_configs/multi_gpu.yaml
examples/accelerate_configs/single_gpu.yaml
examples/cli_configs/example_config.yaml
examples/datasets/hh-rlhf-helpful-base.py
examples/datasets/lm-human-preferences-descriptiveness.py
examples/datasets/lm-human-preferences-sentiment.py
examples/datasets/math_shepherd.py
examples/datasets/prm800k.py
examples/datasets/rlaif-v.py
examples/datasets/tldr.py
examples/datasets/tldr_preference.py
examples/datasets/ultrafeedback-prompt.py
examples/datasets/ultrafeedback.py
examples/notebooks/README.md
examples/notebooks/best_of_n.ipynb
examples/notebooks/gpt2-sentiment-control.ipynb
examples/notebooks/gpt2-sentiment.ipynb
examples/research_projects/README.md
examples/research_projects/layer_skip/README.md
examples/research_projects/layer_skip/scripts/benchmark_layer_skip.py
examples/research_projects/layer_skip/scripts/config.py
examples/research_projects/layer_skip/scripts/custom_trainer.py
examples/research_projects/layer_skip/scripts/layer_skip_sft.py
examples/research_projects/stack_llama/scripts/README.md
examples/research_projects/stack_llama/scripts/merge_peft_adapter.py
examples/research_projects/stack_llama/scripts/reward_modeling.py
examples/research_projects/stack_llama/scripts/rl_training.py
examples/research_projects/stack_llama/scripts/supervised_finetuning.py
examples/research_projects/stack_llama_2/scripts/README.md
examples/research_projects/stack_llama_2/scripts/dpo_llama2.py
examples/research_projects/stack_llama_2/scripts/requirements.txt
examples/research_projects/stack_llama_2/scripts/sft_llama2.py
examples/research_projects/toxicity/README.md
examples/research_projects/toxicity/scripts/evaluate-toxicity.py
examples/research_projects/toxicity/scripts/gpt-j-6b-toxicity.py
examples/scripts/alignprop.py
examples/scripts/bco.py
examples/scripts/cpo.py
examples/scripts/ddpo.py
examples/scripts/dpo.py
examples/scripts/dpo_online.py
examples/scripts/dpo_vlm.py
examples/scripts/gkd.py
examples/scripts/kto.py
examples/scripts/nash_md.py
examples/scripts/orpo.py
examples/scripts/prm.py
examples/scripts/reward_modeling.py
examples/scripts/sft.py
examples/scripts/sft_gemma3.py
examples/scripts/sft_video_llm.py
examples/scripts/sft_vlm.py
examples/scripts/sft_vlm_gemma3.py
examples/scripts/sft_vlm_smol_vlm.py
examples/scripts/xpo.py
examples/scripts/evals/judge_tldr.py
examples/scripts/ppo/ppo.py
examples/scripts/ppo/ppo_tldr.py
examples/scripts/rloo/rloo.py
examples/scripts/rloo/rloo_tldr.py
mod_trl.egg-info/PKG-INFO
mod_trl.egg-info/SOURCES.txt
mod_trl.egg-info/dependency_links.txt
mod_trl.egg-info/entry_points.txt
mod_trl.egg-info/requires.txt
mod_trl.egg-info/top_level.txt
scripts/add_copyrights.py
scripts/generate_tiny_models.py
scripts/generate_toolcall_dataset.py
scripts/generate_zen_dataset.py
scripts/log_example_reports.py
scripts/log_reports.py
tests/__init__.py
tests/test_activation_offloading.py
tests/test_alignprop_trainer.py
tests/test_bco_trainer.py
tests/test_best_of_n_sampler.py
tests/test_callbacks.py
tests/test_cli.py
tests/test_cli_utils.py
tests/test_collators.py
tests/test_core.py
tests/test_cpo_trainer.py
tests/test_data_collator_completion_only.py
tests/test_data_utils.py
tests/test_dataset_formatting.py
tests/test_ddpo_trainer.py
tests/test_dpo_trainer.py
tests/test_environments.py
tests/test_gkd_trainer.py
tests/test_grpo_trainer.py
tests/test_iterative_sft_trainer.py
tests/test_judges.py
tests/test_kto_trainer.py
tests/test_modeling_geometric_mixture_wrapper.py
tests/test_modeling_value_head.py
tests/test_nash_md_trainer.py
tests/test_online_dpo_trainer.py
tests/test_orpo_trainer.py
tests/test_peft_models.py
tests/test_ppo_trainer.py
tests/test_prm_trainer.py
tests/test_reward_trainer.py
tests/test_rewards.py
tests/test_rich_progress_callback.py
tests/test_rloo_trainer.py
tests/test_sft_trainer.py
tests/test_trainers_args.py
tests/test_utils.py
tests/test_vllm_client_server.py
tests/test_xpo_trainer.py
tests/testing_constants.py
tests/testing_utils.py
tests/data/template.jinja
tests/slow/__init__.py
tests/slow/test_dpo_slow.py
tests/slow/test_grpo_slow.py
tests/slow/test_sft_slow.py
tests/slow/testing_constants.py
trl/__init__.py
trl/cli.py
trl/core.py
trl/data_utils.py
trl/import_utils.py
trl/mergekit_utils.py
trl/accelerate_configs/fsdp1.yaml
trl/accelerate_configs/fsdp2.yaml
trl/accelerate_configs/multi_gpu.yaml
trl/accelerate_configs/single_gpu.yaml
trl/accelerate_configs/zero1.yaml
trl/accelerate_configs/zero2.yaml
trl/accelerate_configs/zero3.yaml
trl/environment/__init__.py
trl/environment/base_environment.py
trl/extras/__init__.py
trl/extras/best_of_n_sampler.py
trl/extras/dataset_formatting.py
trl/extras/injected_process_group.py
trl/extras/profiling.py
trl/extras/vllm_client.py
trl/models/__init__.py
trl/models/activation_offloading.py
trl/models/auxiliary_modules.py
trl/models/modeling_base.py
trl/models/modeling_sd_base.py
trl/models/modeling_value_head.py
trl/models/sd_utils.py
trl/models/utils.py
trl/rewards/__init__.py
trl/rewards/format_rewards.py
trl/scripts/__init__.py
trl/scripts/dpo.py
trl/scripts/env.py
trl/scripts/grpo.py
trl/scripts/kto.py
trl/scripts/sft.py
trl/scripts/utils.py
trl/scripts/vllm_serve.py
trl/templates/lm_model_card.md
trl/trainer/__init__.py
trl/trainer/alignprop_config.py
trl/trainer/alignprop_trainer.py
trl/trainer/bco_config.py
trl/trainer/bco_trainer.py
trl/trainer/callbacks.py
trl/trainer/cpo_config.py
trl/trainer/cpo_trainer.py
trl/trainer/ddpo_config.py
trl/trainer/ddpo_trainer.py
trl/trainer/dpo_config.py
trl/trainer/dpo_trainer.py
trl/trainer/gkd_config.py
trl/trainer/gkd_trainer.py
trl/trainer/grpo_config.py
trl/trainer/grpo_trainer.py
trl/trainer/iterative_sft_config.py
trl/trainer/iterative_sft_trainer.py
trl/trainer/judges.py
trl/trainer/kto_config.py
trl/trainer/kto_trainer.py
trl/trainer/model_config.py
trl/trainer/nash_md_config.py
trl/trainer/nash_md_trainer.py
trl/trainer/online_dpo_config.py
trl/trainer/online_dpo_trainer.py
trl/trainer/orpo_config.py
trl/trainer/orpo_trainer.py
trl/trainer/ppo_config.py
trl/trainer/ppo_trainer.py
trl/trainer/prm_config.py
trl/trainer/prm_trainer.py
trl/trainer/reward_config.py
trl/trainer/reward_trainer.py
trl/trainer/rloo_config.py
trl/trainer/rloo_trainer.py
trl/trainer/sft_config.py
trl/trainer/sft_trainer.py
trl/trainer/utils.py
trl/trainer/xpo_config.py
trl/trainer/xpo_trainer.py