Skip to content

Registry

dataphy.dataset.registry

Dataset loader registry for Dataphy SDK.

Classes

Functions

register_dataset_loader(format_type: DatasetFormat, loader_class: Type[BaseDatasetLoader])

Register a new dataset loader.

Parameters:

Name Type Description Default
format_type DatasetFormat

The dataset format to register

required
loader_class Type[BaseDatasetLoader]

The loader class to register for this format

required
Source code in src/dataphy/dataset/registry.py
def register_dataset_loader(format_type: DatasetFormat, loader_class: Type[BaseDatasetLoader]):
    """Register a new dataset loader.

    Args:
        format_type: The dataset format to register
        loader_class: The loader class to register for this format
    """
    _DATASET_LOADERS[format_type] = loader_class

get_dataset_loader(format_type: DatasetFormat) -> Type[BaseDatasetLoader]

Get a dataset loader by format.

Parameters:

Name Type Description Default
format_type DatasetFormat

The dataset format to get loader for

required

Returns:

Type Description
Type[BaseDatasetLoader]

The loader class for the specified format

Raises:

Type Description
ValueError

If the format is not supported

Source code in src/dataphy/dataset/registry.py
def get_dataset_loader(format_type: DatasetFormat) -> Type[BaseDatasetLoader]:
    """Get a dataset loader by format.

    Args:
        format_type: The dataset format to get loader for

    Returns:
        The loader class for the specified format

    Raises:
        ValueError: If the format is not supported
    """
    if format_type not in _DATASET_LOADERS:
        raise ValueError(f"Unsupported dataset format: {format_type}")
    return _DATASET_LOADERS[format_type]

create_dataset_loader(dataset_path: Union[str, Path], format_type: Optional[DatasetFormat] = None, **kwargs: Any) -> BaseDatasetLoader

Create a dataset loader instance with automatic format detection.

This is the main entry point for loading datasets. It automatically detects the dataset format based on directory structure and files, or uses the explicitly provided format.

Supported formats: - LeRobot: Robotics datasets with episodes, videos, and parquet data

Parameters:

Name Type Description Default
dataset_path Union[str, Path]

Path to the dataset directory

required
format_type Optional[DatasetFormat]

Dataset format. If None, format will be auto-detected. Use DatasetFormat enum values (e.g., DatasetFormat.LEROBOT)

None
**kwargs Any

Additional arguments passed to the specific loader

{}

Returns:

Name Type Description
BaseDatasetLoader BaseDatasetLoader

Dataset loader instance for the detected/specified format

Raises:

Type Description
ValueError

If dataset format cannot be detected or is unsupported

FileNotFoundError

If dataset path does not exist

Example
from dataphy.dataset.registry import create_dataset_loader, DatasetFormat

# Auto-detect format (recommended)
loader = create_dataset_loader("./my_dataset")

# Explicit format specification
loader = create_dataset_loader(
    "./my_dataset", 
    format_type=DatasetFormat.LEROBOT
)

# Get dataset information
info = loader.get_dataset_info()
episodes = loader.get_episode_ids()
Source code in src/dataphy/dataset/registry.py
def create_dataset_loader(
    dataset_path: Union[str, Path], 
    format_type: Optional[DatasetFormat] = None,
    **kwargs: Any
) -> BaseDatasetLoader:
    """Create a dataset loader instance with automatic format detection.

    This is the main entry point for loading datasets. It automatically detects
    the dataset format based on directory structure and files, or uses the
    explicitly provided format.

    Supported formats:
    - **LeRobot**: Robotics datasets with episodes, videos, and parquet data

    Args:
        dataset_path: Path to the dataset directory
        format_type: Dataset format. If None, format will be auto-detected.
            Use DatasetFormat enum values (e.g., DatasetFormat.LEROBOT)
        **kwargs: Additional arguments passed to the specific loader

    Returns:
        BaseDatasetLoader: Dataset loader instance for the detected/specified format

    Raises:
        ValueError: If dataset format cannot be detected or is unsupported
        FileNotFoundError: If dataset path does not exist

    Example:
        ```python
        from dataphy.dataset.registry import create_dataset_loader, DatasetFormat

        # Auto-detect format (recommended)
        loader = create_dataset_loader("./my_dataset")

        # Explicit format specification
        loader = create_dataset_loader(
            "./my_dataset", 
            format_type=DatasetFormat.LEROBOT
        )

        # Get dataset information
        info = loader.get_dataset_info()
        episodes = loader.get_episode_ids()
        ```
    """
    dataset_path = Path(dataset_path)

    if format_type is None:
        # Auto-detect format
        format_type = _detect_dataset_format(dataset_path)

    loader_class = get_dataset_loader(format_type)
    return loader_class(dataset_path, **kwargs)

list_supported_formats() -> List[DatasetFormat]

List all supported dataset formats.

Returns:

Type Description
List[DatasetFormat]

List of all registered dataset formats

Source code in src/dataphy/dataset/registry.py
def list_supported_formats() -> List[DatasetFormat]:
    """List all supported dataset formats.

    Returns:
        List of all registered dataset formats
    """
    return list(_DATASET_LOADERS.keys())