Skip to content

Base

dataphy.dataset.base

Base dataset loader classes for Dataphy SDK.

Classes

DatasetFormat

Bases: Enum

Supported dataset formats.

Attributes
LEROBOT = 'lerobot' class-attribute instance-attribute

Episode(id: str, data: Dict[str, Any], metadata: Dict[str, Any], length: int) dataclass

Represents a single episode in a dataset.

Attributes
id: str instance-attribute
data: Dict[str, Any] instance-attribute
metadata: Dict[str, Any] instance-attribute
length: int instance-attribute

DatasetInfo(name: str, format: DatasetFormat, num_episodes: int, total_timesteps: int, episode_lengths: List[int], metadata: Dict[str, Any]) dataclass

Information about a dataset.

Attributes
name: str instance-attribute
format: DatasetFormat instance-attribute
num_episodes: int instance-attribute
total_timesteps: int instance-attribute
episode_lengths: List[int] instance-attribute
metadata: Dict[str, Any] instance-attribute

BaseDatasetLoader(dataset_path: Union[str, Path], **kwargs: Any)

Bases: ABC

Base class for dataset loaders.

Source code in src/dataphy/dataset/base.py
def __init__(self, dataset_path: Union[str, Path], **kwargs: Any):
    self.dataset_path = Path(dataset_path)
    self.kwargs = kwargs
    self._info: Optional[DatasetInfo] = None
Attributes
dataset_path = Path(dataset_path) instance-attribute
kwargs = kwargs instance-attribute
Functions
load_info() -> DatasetInfo abstractmethod

Load dataset information.

Source code in src/dataphy/dataset/base.py
@abstractmethod
def load_info(self) -> DatasetInfo:
    """Load dataset information."""
    pass
get_episode(episode_id: str) -> Episode abstractmethod

Get a specific episode by ID.

Source code in src/dataphy/dataset/base.py
@abstractmethod
def get_episode(self, episode_id: str) -> Episode:
    """Get a specific episode by ID."""
    pass
get_episodes(episode_ids: Optional[List[str]] = None) -> List[Episode] abstractmethod

Get multiple episodes.

Source code in src/dataphy/dataset/base.py
@abstractmethod
def get_episodes(self, episode_ids: Optional[List[str]] = None) -> List[Episode]:
    """Get multiple episodes."""
    pass
get_timestep(episode_id: str, timestep: int) -> Dict[str, Any] abstractmethod

Get a specific timestep from an episode.

Source code in src/dataphy/dataset/base.py
@abstractmethod
def get_timestep(self, episode_id: str, timestep: int) -> Dict[str, Any]:
    """Get a specific timestep from an episode."""
    pass
get_timesteps(episode_id: str, start: int, end: int) -> List[Dict[str, Any]] abstractmethod

Get a range of timesteps from an episode.

Source code in src/dataphy/dataset/base.py
@abstractmethod
def get_timesteps(self, episode_id: str, start: int, end: int) -> List[Dict[str, Any]]:
    """Get a range of timesteps from an episode."""
    pass
get_episode_ids() -> List[str]

Get all episode IDs.

Source code in src/dataphy/dataset/base.py
def get_episode_ids(self) -> List[str]:
    """Get all episode IDs."""
    if self._info is None:
        self._info = self.load_info()
    # This is a simplified version - subclasses should override if needed
    return [f"episode_{i}" for i in range(self._info.num_episodes)]
get_random_episode() -> Episode

Get a random episode.

Source code in src/dataphy/dataset/base.py
def get_random_episode(self) -> Episode:
    """Get a random episode."""
    import random
    episode_ids = self.get_episode_ids()
    episode_id = random.choice(episode_ids)
    return self.get_episode(episode_id)
get_random_timestep() -> Tuple[str, int, Dict[str, Any]]

Get a random timestep from a random episode.

Source code in src/dataphy/dataset/base.py
def get_random_timestep(self) -> Tuple[str, int, Dict[str, Any]]:
    """Get a random timestep from a random episode."""
    episode = self.get_random_episode()
    import random
    timestep = random.randint(0, episode.length - 1)
    return episode.id, timestep, self.get_timestep(episode.id, timestep)