Skip to content
GitHub
View on GitHub

SlimeRecipe

Recipe dataclass for configuring slime GRPO training on Modal.

from modal_training_gym.train_recipes.slime_recipe.recipe import SlimeRecipe

Recipe dataclass for configuring slime GRPO training on Modal.

Inherits from: BaseTrainRecipe

FieldTypeDefaultDescription
gpu_typestr
colocatebool
tensor_model_parallel_sizeint
sequence_parallelbool
rollout_num_gpus_per_engineint
num_rolloutint
rollout_batch_sizeint
rollout_max_response_lenint
rollout_temperaturefloat
save_intervalint
recipe_typeRecipeTypeslime
namestr""
app_tagsdict{}
environmentdict{'PYTHONPATH': '/root/Megatron-LM/', 'CUDA_DEVICE_MAX_CONNECTIONS': '1', 'NCCL_NVLS_ENABLE': '1'}
async_modeboolFalse
wandbWandbConfig | NoneNone
image_overlaycollections.abc.Callable[[modal.image.Image], modal.image.Image] | NoneNone
local_slimestr | NoneNone
memoryint | tuple[int, int] | NoneNone
cloudstr | NoneNone
regionstr | NoneNone
slime_model_scriptstr""
source_hf_checkpointstr | NoneNone
megatron_conversion_hf_checkpointstr | NoneNone
patch_fileslist[str][]
image_run_commandslist[str][]
image_envdict[str, str]{}
train_function_kwargsdict[str, Any]{}
actor_num_nodesint1
actor_num_gpus_per_nodeint8
rollout_num_gpusint | NoneNone
use_criticboolFalse
critic_num_nodesint | NoneNone
critic_num_gpus_per_nodeint | NoneNone
advantage_estimatorstr"grpo"
n_samples_per_promptint2
eps_clipfloat0.2
eps_clip_highfloat0.28
use_kl_lossboolFalse
kl_loss_typestr"low_var_kl"
kl_loss_coeffloat0.0
kl_coeffloat0.0
entropy_coeffloat0.0
calculate_per_token_lossboolFalse
ref_loadstr""
over_sampling_batch_sizeint | NoneNone
dynamic_sampling_filter_pathstr | NoneNone
balance_databoolFalse
rollout_shuffleboolTrue
rollout_top_pfloat1.0
rollout_stop_token_idslist[int] | NoneNone
sglang_mem_fraction_staticfloat0.75
global_batch_sizeint16
lrfloat1e-06
lr_decay_stylestr"constant"
weight_decayfloat0.1
adam_beta1float0.9
adam_beta2float0.98
optimizerstr"adam"
attention_dropoutfloat0.0
hidden_dropoutfloat0.0
attention_softmax_in_fp32boolTrue
accumulate_allreduce_grads_in_fp32boolTrue
use_distributed_optimizerboolFalse
recompute_granularitystr"full"
recompute_methodstr"uniform"
recompute_num_layersint1
use_dynamic_batch_sizeboolTrue
max_tokens_per_gpuint9216
eval_intervalint | NoneNone
n_samples_per_eval_promptint4
eval_max_response_lenint16384
eval_top_pfloat1.0
eval_configdict | NoneNone
savestr"/checkpoints"
loadstr""
no_save_optimboolFalse
megatron_to_hf_modestr""
use_fault_toleranceboolTrue
update_weight_modestr"full"
update_weight_transportstr"nccl"
update_weight_encodingstr"indices"
update_weight_disk_dirstr""
rm_typestr | NoneNone
custom_rm_functioncollections.abc.Callable | NoneNone
custom_generate_functioncollections.abc.Callable | NoneNone
custom_rollout_log_functioncollections.abc.Callable | str | NoneNone
custom_eval_rollout_log_functioncollections.abc.Callable | str | NoneNone
rollout_functioncollections.abc.Callable | str | NoneNone
custom_megatron_before_log_prob_hookcollections.abc.Callable | str | NoneNone
custom_megatron_before_train_step_hookcollections.abc.Callable | str | NoneNone
sglang_enable_dp_attentionboolFalse
sglang_dp_sizeint | NoneNone
sglang_ep_sizeint | NoneNone
sglang_enable_dp_lm_headboolFalse
sglang_disable_custom_all_reduceboolFalse
sglang_cuda_graph_bslist[int] | NoneNone
sglang_max_running_requestsint | NoneNone
extra_configdict | NoneNone
sglang_configdict | NoneNone
sglang_request_paramsdict | NoneNone
apply_chat_template_kwargsdict | str""
train_env_varsdict | str | NoneNone
multimodal_keysdict | str | NoneNone

cli_args(self, dataset: 'DatasetConfig | None' = None, model: 'ModelConfig | None' = None) -> list[str]

Section titled “cli_args(self, dataset: 'DatasetConfig | None' = None, model: 'ModelConfig | None' = None) -> list[str]”

get_base_recipe(model_config: modal_training_gym.common.models.base.ModelConfig) -> 'SlimeRecipe | None'

Section titled “get_base_recipe(model_config: modal_training_gym.common.models.base.ModelConfig) -> 'SlimeRecipe | None'”

Source: modal_training_gym/train_recipes/slime_recipe/recipe.py