-
Notifications
You must be signed in to change notification settings - Fork 120
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Repro
- change
RL/nemo_rl/distributed/worker_groups.py
Lines 781 to 783 in cfb803d
future = getattr(worker, method_name).remote( data=worker_data, **common_kwargs )
future = getattr(worker, method_name).remote(worker_data, **common_kwargs)
- change
RL/nemo_rl/models/policy/megatron_policy_worker.py
Lines 1053 to 1055 in cfb803d
def get_reference_policy_logprobs( self, *, data: BatchedDataDict[Any], micro_batch_size: Optional[int] = None ) -> BatchedDataDict[ReferenceLogprobOutputSpec]:
def get_reference_policy_logprobs(
self, data: BatchedDataDict[Any], micro_batch_size: Optional[int] = None
) -> BatchedDataDict[ReferenceLogprobOutputSpec]:
- run
uv run python examples/run_grpo_math.py --config examples/configs/grpo_math_1B_megatron.yaml
Trackback
▶ Computing logprobs...
Traceback (most recent call last):
File "/home/scratch.yukih_gpu/depot/reinforcer/examples/run_grpo_math.py", line 335, in <module>
main()
File "/home/scratch.yukih_gpu/depot/reinforcer/examples/run_grpo_math.py", line 318, in main
grpo_train(
File "/home/scratch.yukih_gpu/depot/reinforcer/nemo_rl/algorithms/grpo.py", line 660, in grpo_train
fprop_logprobs = policy.get_logprobs(train_data)["logprobs"]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/scratch.yukih_gpu/depot/reinforcer/nemo_rl/models/policy/lm_policy.py", line 208, in get_logprobs
futures = self.worker_group.run_all_workers_sharded_data(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/scratch.yukih_gpu/depot/reinforcer/nemo_rl/distributed/worker_groups.py", line 830, in run_all_workers_sharded_data
future = getattr(worker, method_name).remote(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/scratch.yukih_gpu/depot/reinforcer/.venv/lib/python3.12/site-packages/ray/actor.py", line 216, in remote
return self._remote(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/scratch.yukih_gpu/depot/reinforcer/.venv/lib/python3.12/site-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/scratch.yukih_gpu/depot/reinforcer/.venv/lib/python3.12/site-packages/ray/util/tracing/tracing_helper.py", line 422, in _start_span
return method(self, args, kwargs, *_args, **_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/scratch.yukih_gpu/depot/reinforcer/.venv/lib/python3.12/site-packages/ray/actor.py", line 376, in _remote
return invocation(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/scratch.yukih_gpu/depot/reinforcer/.venv/lib/python3.12/site-packages/ray/actor.py", line 357, in invocation
return actor._actor_method_call(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/scratch.yukih_gpu/depot/reinforcer/.venv/lib/python3.12/site-packages/ray/actor.py", line 1496, in _actor_method_call
list_args = signature.flatten_args(function_signature, args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/scratch.yukih_gpu/depot/reinforcer/.venv/lib/python3.12/site-packages/ray/_private/signature.py", line 126, in flatten_args
validate_args(signature_parameters, args, kwargs)
File "/home/scratch.yukih_gpu/depot/reinforcer/.venv/lib/python3.12/site-packages/ray/_private/signature.py", line 99, in validate_args
raise TypeError(str(exc)) from None
TypeError: too many positional arguments
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working