Skip to content

test_helpsteer3_dataset_initialization flaky failed in CI #610

@yuki-97

Description

@yuki-97

https://github.com/NVIDIA-NeMo/RL/actions/runs/16045333014/job/45275240829

Error: FAILED unit/data/hf_datasets/test_helpsteer.py::test_helpsteer3_dataset_initialization

=================================== FAILURES ===================================
____________________ test_helpsteer3_dataset_initialization ____________________

    def test_helpsteer3_dataset_initialization():
        """Test that HelpSteer3Dataset initializes correctly."""
    
>       dataset = HelpSteer3Dataset()
                  ^^^^^^^^^^^^^^^^^^^

unit/data/hf_datasets/test_helpsteer.py:66: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
../nemo_rl/data/hf_datasets/helpsteer3.py:53: in __init__
    ds = load_dataset("nvidia/HelpSteer3", "preference")
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
/opt/nemo_rl_venv/lib/python3.12/site-packages/datasets/load.py:2062: in load_dataset
    builder_instance = load_dataset_builder(
/opt/nemo_rl_venv/lib/python3.12/site-packages/datasets/load.py:1819: in load_dataset_builder
    builder_instance: DatasetBuilder = builder_cls(
/opt/nemo_rl_venv/lib/python3.12/site-packages/datasets/builder.py:343: in __init__
    self.config, self.config_id = self._create_builder_config(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <datasets.packaged_modules.json.json.JsonHelpSteer3 object at 0x70c53819c410>
config_name = 'preference', custom_features = None, config_kwargs = {}
builder_config = None

    def _create_builder_config(
        self, config_name=None, custom_features=None, **config_kwargs
    ) -> tuple[BuilderConfig, str]:
        """Create and validate BuilderConfig object as well as a unique config id for this config.
        Raises ValueError if there are multiple builder configs and config_name and DEFAULT_CONFIG_NAME are None.
        config_kwargs override the defaults kwargs in config
        """
        builder_config = None
    
        # try default config
        if config_name is None and self.BUILDER_CONFIGS:
            if self.DEFAULT_CONFIG_NAME is not None:
                builder_config = self.builder_configs.get(self.DEFAULT_CONFIG_NAME)
                logger.info(f"No config specified, defaulting to: {self.dataset_name}/{builder_config.name}")
            else:
                if len(self.BUILDER_CONFIGS) > 1:
                    if not config_kwargs:
                        example_of_usage = (
                            f"load_dataset('{self.repo_id or self.dataset_name}', '{self.BUILDER_CONFIGS[0].name}')"
                        )
                        raise ValueError(
                            "Config name is missing."
                            f"\nPlease pick one among the available configs: {list(self.builder_configs.keys())}"
                            + f"\nExample of usage:\n\t`{example_of_usage}`"
                        )
                else:
                    builder_config = self.BUILDER_CONFIGS[0]
                    logger.info(
                        f"No config specified, defaulting to the single config: {self.dataset_name}/{builder_config.name}"
                    )
    
        # try to get config by name
        if isinstance(config_name, str):
            builder_config = self.builder_configs.get(config_name)
            if builder_config is None and self.BUILDER_CONFIGS:
>               raise ValueError(
                    f"BuilderConfig '{config_name}' not found. Available: {list(self.builder_configs.keys())}"
                )
E               ValueError: BuilderConfig 'preference' not found. Available: ['default']

/opt/nemo_rl_venv/lib/python3.12/site-packages/datasets/builder.py:570: ValueError

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions