Skip to content

Command Line Interface

Command-line interface implementation.

dataphy.cli

Dataphy SDK CLI - Modern command-line interface for robotics data management.

Attributes

console = Console() module-attribute

original_make_metavar = click.core.Parameter.make_metavar module-attribute

app = typer.Typer(help='Dataphy SDK CLI - A comprehensive Python SDK for robotics data I/O, dataset management, and augmentation packs', rich_markup_mode=None, invoke_without_command=True, pretty_exceptions_enable=False, add_completion=False) module-attribute

dataset_app = typer.Typer(help='Dataset management commands', rich_markup_mode=None, pretty_exceptions_enable=False, add_completion=False, invoke_without_command=True) module-attribute

augment_app = typer.Typer(help='Augmentation commands', rich_markup_mode=None, pretty_exceptions_enable=False, add_completion=False, invoke_without_command=True) module-attribute

Functions

version_callback(value: bool)

Display version information in a styled panel.

Source code in src/dataphy/cli.py
def version_callback(value: bool):
    """Display version information in a styled panel."""
    if value:
        from rich.console import Console
        from rich.panel import Panel

        # Hardcode version to avoid heavy imports
        __version__ = "0.1.0"

        console = Console()

        # Create the main content
        content = f"""[bold blue]Dataphy SDK v{__version__}[/bold blue]

[white]A comprehensive Python SDK for robotics data I/O, dataset management, and augmentation packs[/white]

[dim]Author:[/dim] [cyan]Dataphy Team[/cyan]
[dim]GitHub:[/dim] [link=https://github.com/dataphy/dataphy-sdk]https://github.com/dataphy/dataphy-sdk[/link]
[dim]Documentation:[/dim] [link=https://docs.dataphy.ai]https://docs.dataphy.ai[/link]

[dim]✨ Features:[/dim] [green]Dataset Loading[/green] β€’ [green]Vision Augmentation[/green] β€’ [green]Multi-Source Data[/green] β€’ [green]CLI Interface[/green]"""

        # Create a panel with the content
        panel = Panel(
            content,
            title="[bold blue]Dataphy[/bold blue]",
            border_style="blue",
            padding=(1, 2)
        )

        console.print(panel)
        raise typer.Exit()

patched_make_metavar(self, ctx=None) -> str

Fixed make_metavar that accepts optional ctx parameter.

This is a compatibility fix for Click/Typer integration issues.

Parameters:

Name Type Description Default
self

The parameter instance

required
ctx

Optional Click context

None

Returns:

Type Description
str

The metavar string for the parameter

Source code in src/dataphy/cli.py
def patched_make_metavar(self, ctx=None) -> str:
    """Fixed make_metavar that accepts optional ctx parameter.

    This is a compatibility fix for Click/Typer integration issues.

    Args:
        self: The parameter instance
        ctx: Optional Click context

    Returns:
        The metavar string for the parameter
    """
    if ctx is None:
        # Use a dummy context if none provided
        ctx = click.Context(click.Command('dummy'))
    return original_make_metavar(self, ctx)

main(ctx: typer.Context, help_flag: bool = typer.Option(False, '--help', '-h', help='Show help message'))

Dataphy SDK CLI - Modern data toolkit for physical AI.

This is the main entry point for the Dataphy CLI. It provides access to dataset management and augmentation commands through a modern, user-friendly interface.

Parameters:

Name Type Description Default
ctx Context

Typer context for command execution

required
help_flag bool

Whether to show help information

Option(False, '--help', '-h', help='Show help message')
Source code in src/dataphy/cli.py
@app.callback(invoke_without_command=True)
def main(
    ctx: typer.Context,
    help_flag: bool = typer.Option(False, "--help", "-h", help="Show help message")
):
    """Dataphy SDK CLI - Modern data toolkit for physical AI.

    This is the main entry point for the Dataphy CLI. It provides access to
    dataset management and augmentation commands through a modern, user-friendly
    interface.

    Args:
        ctx: Typer context for command execution
        help_flag: Whether to show help information
    """
    if help_flag or ctx.invoked_subcommand is None:
        # Show our fancy help when --help is used or no command is provided
        show_fancy_help()
        raise typer.Exit()

dataset_main(ctx: typer.Context, help_flag: bool = typer.Option(False, '--help', '-h', help='Show help message'))

Dataset management commands.

Entry point for dataset-related operations including loading, inspection, and management of robotics datasets.

Parameters:

Name Type Description Default
ctx Context

Typer context for command execution

required
help_flag bool

Whether to show help information

Option(False, '--help', '-h', help='Show help message')
Source code in src/dataphy/cli.py
@dataset_app.callback(invoke_without_command=True)
def dataset_main(
    ctx: typer.Context,
    help_flag: bool = typer.Option(False, "--help", "-h", help="Show help message")
):
    """Dataset management commands.

    Entry point for dataset-related operations including loading, inspection,
    and management of robotics datasets.

    Args:
        ctx: Typer context for command execution
        help_flag: Whether to show help information
    """
    if help_flag or ctx.invoked_subcommand is None:
        show_dataset_help()
        raise typer.Exit()

augment_main(ctx: typer.Context, help_flag: bool = typer.Option(False, '--help', '-h', help='Show help message'))

Augmentation commands.

Entry point for augmentation-related operations including applying vision transforms, creating augmentation pipelines, and managing augmentation configurations.

Parameters:

Name Type Description Default
ctx Context

Typer context for command execution

required
help_flag bool

Whether to show help information

Option(False, '--help', '-h', help='Show help message')
Source code in src/dataphy/cli.py
@augment_app.callback(invoke_without_command=True)
def augment_main(
    ctx: typer.Context,
    help_flag: bool = typer.Option(False, "--help", "-h", help="Show help message")
):
    """Augmentation commands.

    Entry point for augmentation-related operations including applying vision
    transforms, creating augmentation pipelines, and managing augmentation
    configurations.

    Args:
        ctx: Typer context for command execution
        help_flag: Whether to show help information
    """
    if help_flag or ctx.invoked_subcommand is None:
        show_augment_help()
        raise typer.Exit()

version()

Show version information.

Displays the current version of the Dataphy SDK with additional information about features and capabilities.

Source code in src/dataphy/cli.py
@app.command()
def version():
    """Show version information.

    Displays the current version of the Dataphy SDK with additional
    information about features and capabilities.
    """
    version_callback(True)

show_fancy_help()

Display a modern, styled help menu.

Shows a comprehensive, visually appealing help interface for the Dataphy CLI, including command descriptions, examples, and feature highlights.

Source code in src/dataphy/cli.py
def show_fancy_help():
    """Display a modern, styled help menu.

    Shows a comprehensive, visually appealing help interface for the Dataphy CLI,
    including command descriptions, examples, and feature highlights.
    """
    # Hardcode version to avoid heavy imports
    __version__ = "0.1.0"

    # Main title
    title = Text("πŸ€– Dataphy SDK CLI", style="bold blue")
    subtitle = Text(f"v{__version__} - Physical AI Data Toolkit", style="dim")

    # Description panel
    desc_panel = Panel(
        "[white]A comprehensive Python SDK for robotics data I/O, dataset management, and augmentation packs.\n"
        "Designed for physical AI applications with LeRobot integration.[/white]",
        title="[bold blue]About[/bold blue]",
        border_style="blue",
        padding=(1, 2)
    )

    # Commands table
    commands_table = Table(show_header=True, header_style="bold magenta", box=None)
    commands_table.add_column("πŸ”§ Command", style="cyan", width=20)
    commands_table.add_column("πŸ“ Description", style="white")
    commands_table.add_column("πŸ’‘ Example", style="dim yellow")

    commands_table.add_row(
        "dataset fetch",
        "Download datasets from repositories",
        "dataphy dataset fetch --format lerobot --repo-id USER/REPO"
    )
    commands_table.add_row(
        "dataset info", 
        "Get dataset metadata and statistics",
        "dataphy dataset info --format lerobot --repo-id USER/REPO"
    )
    commands_table.add_row(
        "dataset load",
        "Load and inspect dataset contents",
        "dataphy dataset load --dataset-path ./data --episodes 0,1,2"
    )
    commands_table.add_row(
        "dataset visualize",
        "Interactive 2D visualization",
        "dataphy dataset visualize --format lerobot --dataset-path ./data --episodes 0,1,2"
    )
    commands_table.add_row(
        "augment vision",
        "Apply image augmentations",
        "dataphy augment vision --config aug.yaml --input ./images"
    )
    commands_table.add_row(
        "augment dataset",
        "Augment episodes or create augmented datasets",
        "dataphy augment dataset --dataset-path ./data --config aug.yaml --episodes 0,1,2"
    )
    commands_table.add_row(
        "version",
        "Show version information",
        "dataphy version"
    )

    commands_panel = Panel(
        commands_table,
        title="[bold green]πŸ“Š Available Commands[/bold green]",
        border_style="green",
        padding=(1, 2)
    )

    # Quick start panel
    quickstart = Text()
    quickstart.append("1. ", style="bold cyan")
    quickstart.append("Install: ", style="white")
    quickstart.append("poetry install --extras rerun\n", style="yellow")
    quickstart.append("2. ", style="bold cyan")
    quickstart.append("Fetch data: ", style="white")
    quickstart.append("dataphy dataset fetch --format lerobot --repo-id carpit680/giraffe_clean_desk2\n", style="yellow")
    quickstart.append("3. ", style="bold cyan")
    quickstart.append("Visualize: ", style="white")
    quickstart.append("dataphy dataset visualize --format lerobot --dataset-path ./dataset --episodes 0,1,2", style="yellow")

    quickstart_panel = Panel(
        quickstart,
        title="[bold yellow]πŸš€ Quick Start[/bold yellow]",
        border_style="yellow",
        padding=(1, 2)
    )

    # Links panel
    links_table = Table(show_header=False, box=None, padding=(0, 2))
    links_table.add_column("", style="cyan")
    links_table.add_column("", style="blue")

    links_table.add_row("πŸ“– Documentation:", "https://docs.dataphy.ai")
    links_table.add_row("πŸ™ GitHub:", "https://github.com/dataphy/dataphy-sdk")
    links_table.add_row("πŸ’¬ Issues:", "https://github.com/dataphy/dataphy-sdk/issues")

    links_panel = Panel(
        links_table,
        title="[bold cyan]πŸ”— Links[/bold cyan]",
        border_style="cyan",
        padding=(1, 2)
    )

    # Display everything
    console.print()
    console.print(title, justify="center")
    console.print(subtitle, justify="center")
    console.print()
    console.print(desc_panel)
    console.print()
    console.print(commands_panel)
    console.print()
    console.print(quickstart_panel)
    console.print()
    console.print(links_panel)
    console.print()
    console.print("[dim]πŸ’‘ Use 'dataphy COMMAND --help' for detailed command options[/dim]", justify="center")
    console.print()

show_dataset_help()

Display fancy help for dataset commands.

Source code in src/dataphy/cli.py
def show_dataset_help():
    """Display fancy help for dataset commands."""
    # Hardcode version to avoid heavy imports
    __version__ = "0.1.0"

    title = Text("πŸ“Š Dataset Management", style="bold green")
    subtitle = Text("Fetch, load, inspect, and visualize robotics datasets", style="dim")

    # Commands table
    commands_table = Table(show_header=True, header_style="bold magenta", box=None)
    commands_table.add_column("πŸ”§ Command", style="cyan", width=15)
    commands_table.add_column("πŸ“ Description", style="white", width=35)
    commands_table.add_column("πŸ’‘ Example", style="dim yellow")

    commands_table.add_row(
        "fetch",
        "Download datasets from repositories",
        "dataphy dataset fetch --format lerobot --repo-id USER/REPO --output ./data"
    )
    commands_table.add_row(
        "info", 
        "Get dataset metadata and statistics",
        "dataphy dataset info --format lerobot --repo-id USER/REPO"
    )
    commands_table.add_row(
        "load",
        "Load and inspect dataset contents",
        "dataphy dataset load --dataset-path ./data --info"
    )
    commands_table.add_row(
        "list-formats",
        "List all supported dataset formats",
        "dataphy dataset list-formats"
    )
    commands_table.add_row(
        "visualize",
        "Interactive 2D visualization with rerun.io",
        "dataphy dataset visualize --format lerobot --dataset-path ./data"
    )

    commands_panel = Panel(
        commands_table,
        title="[bold green]πŸ“Š Dataset Commands[/bold green]",
        border_style="green",
        padding=(1, 2)
    )

    # Usage examples
    examples = Text()
    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Fetch LeRobot dataset:\n", style="bold white")
    examples.append("  dataphy dataset fetch --format lerobot --repo-id carpit680/giraffe_clean_desk2 --output ./data\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Load and inspect:\n", style="bold white")
    examples.append("  dataphy dataset load --dataset-path ./data --info\n", style="yellow")
    examples.append("  dataphy dataset load --dataset-path ./data --list-episodes\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Visualize in 2D:\n", style="bold white")
    examples.append("  dataphy dataset visualize --format lerobot --dataset-path ./data", style="yellow")

    examples_panel = Panel(
        examples,
        title="[bold cyan]πŸš€ Common Usage[/bold cyan]",
        border_style="cyan",
        padding=(1, 2)
    )

    console.print()
    console.print(title, justify="center")
    console.print(subtitle, justify="center")
    console.print()
    console.print(commands_panel)
    console.print()
    console.print(examples_panel)
    console.print()
    console.print("[dim]πŸ’‘ Use 'dataphy dataset COMMAND --help' for detailed command options[/dim]", justify="center")
    console.print()

show_augment_help()

Display fancy help for augmentation commands.

Source code in src/dataphy/cli.py
def show_augment_help():
    """Display fancy help for augmentation commands."""
    # Hardcode version to avoid heavy imports
    __version__ = "0.1.0"

    title = Text("🎨 Data Augmentation", style="bold yellow")
    subtitle = Text("Apply visual augmentations to images and robotics datasets", style="dim")

    # Commands table
    commands_table = Table(show_header=True, header_style="bold magenta", box=None)
    commands_table.add_column("πŸ”§ Command", style="cyan", width=15)
    commands_table.add_column("πŸ“ Description", style="white", width=35)
    commands_table.add_column("πŸ’‘ Example", style="dim yellow")

    commands_table.add_row(
        "vision",
        "Apply augmentations to image batches",
        "dataphy augment vision --config aug.yaml --input ./images --preview output.jpg"
    )
    commands_table.add_row(
        "dataset",
        "Augment episodes or entire datasets",
        "dataphy augment dataset --dataset-path ./data --config aug.yaml --episodes 0,1,2"
    )

    commands_panel = Panel(
        commands_table,
        title="[bold yellow]🎨 Augmentation Commands[/bold yellow]",
        border_style="yellow",
        padding=(1, 2)
    )

    # Usage examples
    examples = Text()
    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Preview image augmentations:\n", style="bold white")
    examples.append("  dataphy augment vision --config examples/visionpack.yaml --input ./images --preview grid.jpg\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("List available episodes and cameras:\n", style="bold white")
    examples.append("  dataphy augment dataset --dataset-path ./data --list-episodes\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Augment episode by name:\n", style="bold white")
    examples.append("  dataphy augment dataset --dataset-path ./data --config aug.yaml --episode episode_000000 --cameras observation.images.webcam\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Augment episode by index (0=first episode):\n", style="bold white")
    examples.append("  dataphy augment dataset --dataset-path ./data --config aug.yaml --episode 0 --cameras observation.images.webcam\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Augment all cameras with synchronized views:\n", style="bold white")
    examples.append("  dataphy augment dataset --dataset-path ./data --config aug.yaml --episode 0\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Create augmented dataset (3 versions per episode):\n", style="bold white")
    examples.append("  dataphy augment dataset --dataset-path ./data --output-path ./augmented --config aug.yaml --num-augmented 3\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Augment specific episodes only:\n", style="bold white")
    examples.append("  dataphy augment dataset --dataset-path ./data --output-path ./aug --config aug.yaml --episodes 0,1,2 --num-augmented 2\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Restore from backup:\n", style="bold white")
    examples.append("  dataphy augment dataset --dataset-path ./data --restore episode_000000", style="yellow")

    examples_panel = Panel(
        examples,
        title="[bold cyan]πŸš€ Common Usage[/bold cyan]",
        border_style="cyan",
        padding=(1, 2)
    )

    console.print()
    console.print(title, justify="center")
    console.print(subtitle, justify="center")
    console.print()
    console.print(commands_panel)
    console.print()
    console.print(examples_panel)
    console.print()
    console.print("[dim]πŸ’‘ Use 'dataphy augment COMMAND --help' for detailed command options[/dim]", justify="center")
    console.print()

show_fetch_help()

Display fancy help for dataset fetch command.

Source code in src/dataphy/cli.py
def show_fetch_help():
    """Display fancy help for dataset fetch command."""
    title = Text("πŸ“₯ Dataset Fetch", style="bold green")
    subtitle = Text("Download datasets from various repositories", style="dim")

    # Description panel
    desc = Text()
    desc.append("Download robotics datasets from various sources including Hugging Face Hub, ", style="white")
    desc.append("with support for LeRobot format and flexible filtering options.", style="white")

    desc_panel = Panel(
        desc,
        title="[bold blue]Description[/bold blue]",
        border_style="blue",
        padding=(1, 2)
    )

    # Required arguments
    required_table = Table(show_header=True, header_style="bold red", box=None)
    required_table.add_column("Option", style="cyan", width=15)
    required_table.add_column("Description", style="white", width=40)
    required_table.add_column("Example", style="dim yellow")

    required_table.add_row(
        "--format",
        "Dataset format (currently supports 'lerobot')",
        "--format lerobot"
    )
    required_table.add_row(
        "--output",
        "Local directory to save the downloaded dataset",
        "--output ./my_dataset"
    )

    required_panel = Panel(
        required_table,
        title="[bold red]πŸ”΄ Required Options[/bold red]",
        border_style="red",
        padding=(1, 2)
    )

    # Optional arguments
    optional_table = Table(show_header=True, header_style="bold yellow", box=None)
    optional_table.add_column("Option", style="cyan", width=12)
    optional_table.add_column("Description", style="white", width=25)
    optional_table.add_column("Default", style="green", width=18)
    optional_table.add_column("Example", style="dim yellow", width=20)

    optional_table.add_row(
        "--repo-id",
        "Repository ID for the dataset",
        "carpit680/giraffe_clean_desk2",
        "--repo-id user/dataset-name"
    )
    optional_table.add_row(
        "--split",
        "Dataset split to download",
        "train",
        "--split train"
    )

    optional_table.add_row(
        "--revision",
        "Git revision/branch to use",
        "main",
        "--revision v1.0"
    )

    optional_panel = Panel(
        optional_table,
        title="[bold yellow]🟑 Optional Options[/bold yellow]",
        border_style="yellow",
        padding=(1, 2)
    )

    # Examples
    examples = Text()
    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Basic LeRobot dataset download:\n", style="bold white")
    examples.append("  dataphy dataset fetch --format lerobot --output ./dataset\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Specific repository:\n", style="bold white")
    examples.append("  dataphy dataset fetch --format lerobot --repo-id carpit680/giraffe_clean_desk2 --output ./giraffe\n\n", style="yellow")



    examples_panel = Panel(
        examples,
        title="[bold cyan]πŸš€ Usage Examples[/bold cyan]",
        border_style="cyan",
        padding=(1, 2)
    )

    console.print()
    console.print(title, justify="center")
    console.print(subtitle, justify="center")
    console.print()
    console.print(desc_panel)
    console.print()
    console.print(required_panel)
    console.print()
    console.print(optional_panel)
    console.print()
    console.print(examples_panel)
    console.print()
    console.print("[dim]πŸ’‘ Use 'dataphy dataset --help' to see all dataset commands[/dim]", justify="center")
    console.print()

show_info_help()

Display fancy help for dataset info command.

Source code in src/dataphy/cli.py
def show_info_help():
    """Display fancy help for dataset info command."""
    title = Text("πŸ“Š Dataset Info", style="bold blue")
    subtitle = Text("Get dataset metadata and statistics", style="dim")

    # Description panel
    desc = Text()
    desc.append("Retrieve detailed information about datasets from various sources including ", style="white")
    desc.append("Hugging Face Hub repositories and local datasets. Get metadata, statistics, and dataset structure.", style="white")

    desc_panel = Panel(
        desc,
        title="[bold blue]Description[/bold blue]",
        border_style="blue",
        padding=(1, 2)
    )

    # Required arguments
    required_table = Table(show_header=True, header_style="bold red", box=None)
    required_table.add_column("Option", style="cyan", width=15)
    required_table.add_column("Description", style="white", width=45)
    required_table.add_column("Example", style="dim yellow")

    required_table.add_row(
        "--format",
        "Dataset format (lerobot)",
        "--format lerobot"
    )

    required_panel = Panel(
        required_table,
        title="[bold red]πŸ”΄ Required Options[/bold red]",
        border_style="red",
        padding=(1, 2)
    )

    # Optional arguments
    optional_table = Table(show_header=True, header_style="bold yellow", box=None)
    optional_table.add_column("Option", style="cyan", width=15)
    optional_table.add_column("Description", style="white", width=40)
    optional_table.add_column("Default", style="green", width=20)
    optional_table.add_column("Example", style="dim yellow", width=20)

    optional_table.add_row(
        "--repo-id",
        "Hugging Face repository ID",
        "None",
        "--repo-id user/dataset"
    )

    optional_table.add_row(
        "--dataset-path",
        "Path to local dataset directory",
        "None",
        "--dataset-path ./dataset"
    )

    optional_panel = Panel(
        optional_table,
        title="[bold yellow]🟑 Optional Options[/bold yellow]",
        border_style="yellow",
        padding=(1, 2)
    )

    # Examples
    examples = Text()
    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Get Hugging Face Hub dataset info:\n", style="bold white")
    examples.append("  dataphy dataset info --format lerobot --repo-id carpit680/giraffe_clean_desk2\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Get local dataset info:\n", style="bold white")
    examples.append("  dataphy dataset info --format lerobot --dataset-path ./my_dataset\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Get info for different Hugging Face repository:\n", style="bold white")
    examples.append("  dataphy dataset info --format lerobot --repo-id lerobot/svla_so100_sorting", style="yellow")

    examples_panel = Panel(
        examples,
        title="[bold cyan]πŸš€ Usage Examples[/bold cyan]",
        border_style="cyan",
        padding=(1, 2)
    )

    console.print()
    console.print(title, justify="center")
    console.print(subtitle, justify="center")
    console.print()
    console.print(desc_panel)
    console.print()
    console.print(required_panel)
    console.print()
    console.print(optional_panel)
    console.print()
    console.print(examples_panel)
    console.print()
    console.print("[dim]πŸ’‘ Use 'dataphy dataset --help' to see all dataset commands[/dim]", justify="center")
    console.print()

show_load_help()

Display fancy help for dataset load command.

Source code in src/dataphy/cli.py
def show_load_help():
    """Display fancy help for dataset load command."""
    title = Text("πŸ“ Dataset Load", style="bold green")
    subtitle = Text("Load and inspect dataset contents", style="dim")

    # Description panel
    desc = Text()
    desc.append("Load and inspect datasets from local directories. View dataset information, ", style="white")
    desc.append("list episodes, and examine specific episodes or timesteps.", style="white")

    desc_panel = Panel(
        desc,
        title="[bold blue]Description[/bold blue]",
        border_style="blue",
        padding=(1, 2)
    )

    # Required arguments
    required_table = Table(show_header=True, header_style="bold red", box=None)
    required_table.add_column("Option", style="cyan", width=15)
    required_table.add_column("Description", style="white", width=45)
    required_table.add_column("Example", style="dim yellow")

    required_table.add_row(
        "--dataset-path",
        "Path to dataset directory",
        "--dataset-path ./data"
    )

    required_panel = Panel(
        required_table,
        title="[bold red]πŸ”΄ Required Options[/bold red]",
        border_style="red",
        padding=(1, 2)
    )

    # Optional arguments
    optional_table = Table(show_header=True, header_style="bold yellow", box=None)
    optional_table.add_column("Option", style="cyan", width=15)
    optional_table.add_column("Description", style="white", width=35)
    optional_table.add_column("Default", style="green", width=15)
    optional_table.add_column("Example", style="dim yellow", width=20)

    optional_table.add_row(
        "--format",
        "Dataset format (auto-detected)",
        "auto",
        "--format lerobot"
    )
    optional_table.add_row(
        "--info",
        "Show dataset information",
        "False",
        "--info"
    )
    optional_table.add_row(
        "--list-episodes",
        "List available episodes",
        "False",
        "--list-episodes"
    )
    optional_table.add_row(
        "--episode",
        "Load specific episode (name or index)",
        "None",
        "--episode 0"
    )
    optional_table.add_row(
        "--episodes",
        "Load multiple episodes (comma-separated)",
        "None",
        "--episodes 0,1,2"
    )
    optional_table.add_row(
        "--timestep",
        "Load specific timestep (requires --episode)",
        "None",
        "--timestep 100"
    )

    optional_panel = Panel(
        optional_table,
        title="[bold yellow]🟑 Optional Options[/bold yellow]",
        border_style="yellow",
        padding=(1, 2)
    )

    # Examples
    examples = Text()
    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Show dataset information:\n", style="bold white")
    examples.append("  dataphy dataset load --dataset-path ./data --info\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("List all episodes:\n", style="bold white")
    examples.append("  dataphy dataset load --dataset-path ./data --list-episodes\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Load specific episode by index:\n", style="bold white")
    examples.append("  dataphy dataset load --dataset-path ./data --episode 0\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Load multiple episodes:\n", style="bold white")
    examples.append("  dataphy dataset load --dataset-path ./data --episodes 0,1,2\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Load episodes by name:\n", style="bold white")
    examples.append("  dataphy dataset load --dataset-path ./data --episodes episode_000000,episode_000001\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Load specific timestep:\n", style="bold white")
    examples.append("  dataphy dataset load --dataset-path ./data --episode 5 --timestep 100", style="yellow")

    examples_panel = Panel(
        examples,
        title="[bold cyan]πŸš€ Usage Examples[/bold cyan]",
        border_style="cyan",
        padding=(1, 2)
    )

    console.print()
    console.print(title, justify="center")
    console.print(subtitle, justify="center")
    console.print()
    console.print(desc_panel)
    console.print()
    console.print(required_panel)
    console.print()
    console.print(optional_panel)
    console.print()
    console.print(examples_panel)
    console.print()
    console.print("[dim]πŸ’‘ Use 'dataphy dataset --help' to see all dataset commands[/dim]", justify="center")
    console.print()

show_visualize_help()

Display fancy help for dataset visualize command.

Source code in src/dataphy/cli.py
def show_visualize_help():
    """Display fancy help for dataset visualize command."""
    title = Text("🎬 Dataset Visualize", style="bold magenta")
    subtitle = Text("Interactive 2D visualization with rerun.io", style="dim")

    # Description panel
    desc = Text()
    desc.append("Visualize robotics datasets using interactive 2D viewers powered by rerun.io. ", style="white")
    desc.append("View camera feeds, robot actions, and state data in real-time.", style="white")

    desc_panel = Panel(
        desc,
        title="[bold blue]Description[/bold blue]",
        border_style="blue",
        padding=(1, 2)
    )

    # Required arguments
    required_table = Table(show_header=True, header_style="bold red", box=None)
    required_table.add_column("Option", style="cyan", width=15)
    required_table.add_column("Description", style="white", width=45)
    required_table.add_column("Example", style="dim yellow")

    required_table.add_row(
        "--format",
        "Dataset format (lerobot)",
        "--format lerobot"
    )

    required_panel = Panel(
        required_table,
        title="[bold red]πŸ”΄ Required Options[/bold red]",
        border_style="red",
        padding=(1, 2)
    )

    # Optional arguments
    optional_table = Table(show_header=True, header_style="bold yellow", box=None)
    optional_table.add_column("Option", style="cyan", width=15)
    optional_table.add_column("Description", style="white", width=35)
    optional_table.add_column("Default", style="green", width=15)
    optional_table.add_column("Example", style="dim yellow", width=20)

    optional_table.add_row(
        "--dataset-path",
        "Path to dataset directory",
        "None",
        "--dataset-path ./data"
    )
    optional_table.add_row(
        "--repo-id",
        "Hugging Face repository ID",
        "None",
        "--repo-id user/dataset"
    )
    optional_table.add_row(
        "--episode",
        "Specific episode ID to visualize",
        "None",
        "--episode episode_000000"
    )
    optional_table.add_row(
        "--episodes",
        "Multiple episodes to visualize (comma-separated)",
        "None",
        "--episodes 0,1,2"
    )
    optional_table.add_row(
        "--timestep-range",
        "Timestep range as 'start,end'",
        "all timesteps",
        "--timestep-range 0,100"
    )
    optional_table.add_row(
        "--camera",
        "Camera name to visualize",
        "all cameras",
        "--camera observation.images.laptop"
    )

    optional_panel = Panel(
        optional_table,
        title="[bold yellow]🟑 Optional Options[/bold yellow]",
        border_style="yellow",
        padding=(1, 2)
    )

    # Examples
    examples = Text()
    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Visualize local dataset:\n", style="bold white")
    examples.append("  dataphy dataset visualize --format lerobot --dataset-path ./data\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Visualize from repository:\n", style="bold white")
    examples.append("  dataphy dataset visualize --format lerobot --repo-id carpit680/giraffe_clean_desk2\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Visualize specific episode:\n", style="bold white")
    examples.append("  dataphy dataset visualize --format lerobot --dataset-path ./data --episode episode_000000\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Visualize multiple episodes:\n", style="bold white")
    examples.append("  dataphy dataset visualize --format lerobot --dataset-path ./data --episodes 0,1,2\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Visualize timestep range:\n", style="bold white")
    examples.append("  dataphy dataset visualize --format lerobot --dataset-path ./data --timestep-range 0,100", style="yellow")

    examples_panel = Panel(
        examples,
        title="[bold cyan]πŸš€ Usage Examples[/bold cyan]",
        border_style="cyan",
        padding=(1, 2)
    )

    console.print()
    console.print(title, justify="center")
    console.print(subtitle, justify="center")
    console.print()
    console.print(desc_panel)
    console.print()
    console.print(required_panel)
    console.print()
    console.print(optional_panel)
    console.print()
    console.print(examples_panel)
    console.print()
    console.print("[dim]πŸ’‘ Use 'dataphy dataset --help' to see all dataset commands[/dim]", justify="center")
    console.print()

show_vision_help()

Display fancy help for augment vision command.

Source code in src/dataphy/cli.py
def show_vision_help():
    """Display fancy help for augment vision command."""
    title = Text("🎨 Vision Augmentation", style="bold yellow")
    subtitle = Text("Apply image augmentations to batches of images", style="dim")

    # Description panel
    desc = Text()
    desc.append("Apply VisionPack augmentation pipelines to images with GPU acceleration. ", style="white")
    desc.append("Generate preview grids or process entire image datasets with configurable transforms.", style="white")

    desc_panel = Panel(
        desc,
        title="[bold blue]Description[/bold blue]",
        border_style="blue",
        padding=(1, 2)
    )

    # Required arguments
    required_table = Table(show_header=True, header_style="bold red", box=None)
    required_table.add_column("Option", style="cyan", width=15)
    required_table.add_column("Description", style="white", width=45)
    required_table.add_column("Example", style="dim yellow")

    required_table.add_row(
        "--config",
        "VisionPack YAML configuration file with augmentation pipeline",
        "--config examples/visionpack.yaml"
    )
    required_table.add_row(
        "--input",
        "Input source: directory, file",
        "--input ./images"
    )

    required_panel = Panel(
        required_table,
        title="[bold red]πŸ”΄ Required Options[/bold red]",
        border_style="red",
        padding=(1, 2)
    )

    # Optional arguments
    optional_table = Table(show_header=True, header_style="bold yellow", box=None)
    optional_table.add_column("Option", style="cyan", width=12)
    optional_table.add_column("Description", style="white", width=25)
    optional_table.add_column("Default", style="green", width=10)
    optional_table.add_column("Example", style="dim yellow", width=20)

    optional_table.add_row(
        "--output",
        "Output destination",
        "None",
        "--output ./augmented"
    )
    optional_table.add_row(
        "--preview",
        "Save preview grid image",
        "None",
        "--preview grid.jpg"
    )
    optional_table.add_row(
        "--seed",
        "Random seed",
        "1337",
        "--seed 42"
    )
    optional_table.add_row(
        "--device",
        "Processing device",
        "cuda",
        "--device cpu"
    )
    optional_table.add_row(
        "--batch-size",
        "Batch size for processing",
        "64",
        "--batch-size 32"
    )

    optional_panel = Panel(
        optional_table,
        title="[bold yellow]🟑 Optional Options[/bold yellow]",
        border_style="yellow",
        padding=(1, 2)
    )

    # Examples
    examples = Text()
    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Generate preview grid:\n", style="bold white")
    examples.append("  dataphy augment vision --config aug.yaml --input ./images --preview grid.jpg\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Process images to output directory:\n", style="bold white")
    examples.append("  dataphy augment vision --config aug.yaml --input ./images --output ./augmented\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Custom settings:\n", style="bold white")
    examples.append("  dataphy augment vision --config aug.yaml --input ./images --device cpu --seed 42", style="yellow")

    examples_panel = Panel(
        examples,
        title="[bold cyan]πŸš€ Usage Examples[/bold cyan]",
        border_style="cyan",
        padding=(1, 2)
    )

    console.print()
    console.print(title, justify="center")
    console.print(subtitle, justify="center")
    console.print()
    console.print(desc_panel)
    console.print()
    console.print(required_panel)
    console.print()
    console.print(optional_panel)
    console.print()
    console.print(examples_panel)
    console.print()
    console.print("[dim]πŸ’‘ Use 'dataphy augment --help' to see all augment commands[/dim]", justify="center")
    console.print()

show_augment_dataset_help()

Display fancy help for augment dataset command.

Source code in src/dataphy/cli.py
def show_augment_dataset_help():
    """Display fancy help for augment dataset command."""
    title = Text("🏭 Dataset Augmentation", style="bold magenta")
    subtitle = Text("Augment episodes or entire datasets with VisionPack transforms", style="dim")

    # Description panel
    desc = Text()
    desc.append("Apply VisionPack augmentation pipelines to robotics datasets with two modes: ", style="white")
    desc.append("Episode Mode (in-place) and Full Dataset Mode (new dataset). ", style="white")
    desc.append("Supports LeRobot format with automatic metadata updates.", style="white")

    desc_panel = Panel(
        desc,
        title="[bold blue]Description[/bold blue]",
        border_style="blue",
        padding=(1, 2)
    )

    # Modes table
    modes_table = Table(show_header=True, header_style="bold cyan", box=None)
    modes_table.add_column("Mode", style="cyan", width=15)
    modes_table.add_column("Trigger", style="yellow", width=20)
    modes_table.add_column("Description", style="white", width=50)

    modes_table.add_row(
        "Episode Mode",
        "--episode/--episodes",
        "Augment specific episodes in-place with backups"
    )
    modes_table.add_row(
        "Full Dataset Mode",
        "No episode selection",
        "Create new augmented dataset at --output-path"
    )

    modes_panel = Panel(
        modes_table,
        title="[bold cyan]🎯 Operation Modes[/bold cyan]",
        border_style="cyan",
        padding=(1, 2)
    )

    # Required arguments
    required_table = Table(show_header=True, header_style="bold red", box=None)
    required_table.add_column("Option", style="cyan", width=15)
    required_table.add_column("Description", style="white", width=45)
    required_table.add_column("Example", style="dim yellow")

    required_table.add_row(
        "--dataset-path",
        "Path to dataset directory",
        "--dataset-path ./my_dataset"
    )

    required_panel = Panel(
        required_table,
        title="[bold red]πŸ”΄ Required Options[/bold red]",
        border_style="red",
        padding=(1, 2)
    )

    # Episode selection options
    episode_table = Table(show_header=True, header_style="bold green", box=None)
    episode_table.add_column("Option", style="cyan", width=15)
    episode_table.add_column("Description", style="white", width=40)
    episode_table.add_column("Example", style="dim yellow")

    episode_table.add_row(
        "--episode",
        "Single episode (name or index)",
        "--episode 0 or --episode episode_000000"
    )
    episode_table.add_row(
        "--episodes",
        "Multiple episodes (comma-separated)",
        "--episodes 0,1,2 or --episodes episode_000000,episode_000001"
    )

    episode_panel = Panel(
        episode_table,
        title="[bold green]🎬 Episode Selection[/bold green]",
        border_style="green",
        padding=(1, 2)
    )

    # Output options
    output_table = Table(show_header=True, header_style="bold yellow", box=None)
    output_table.add_column("Option", style="cyan", width=15)
    output_table.add_column("Description", style="white", width=30)
    output_table.add_column("Default", style="green", width=15)
    output_table.add_column("Example", style="dim yellow")

    output_table.add_row(
        "--output-path",
        "Output directory (full dataset mode)",
        "./augmented",
        "--output-path ./new_dataset"
    )
    output_table.add_row(
        "--num-augmented",
        "Augmented versions per episode",
        "1",
        "--num-augmented 3"
    )
    output_table.add_row(
        "--preserve-original",
        "Include original episodes",
        "True",
        "--preserve-original"
    )

    output_panel = Panel(
        output_table,
        title="[bold yellow]πŸ“ Output Options[/bold yellow]",
        border_style="yellow",
        padding=(1, 2)
    )

    # Examples
    examples = Text()
    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Episode Mode - Augment single episode:\n", style="bold white")
    examples.append("  dataphy augment dataset --dataset-path ./data --episode 0 --config aug.yaml\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Episode Mode - Augment multiple episodes:\n", style="bold white")
    examples.append("  dataphy augment dataset --dataset-path ./data --episodes 0,1,2 --config aug.yaml\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("Full Dataset Mode - Create augmented dataset:\n", style="bold white")
    examples.append("  dataphy augment dataset --dataset-path ./data --config aug.yaml --num-augmented 3\n\n", style="yellow")

    examples.append("πŸ”Ή ", style="cyan")
    examples.append("With camera selection and sync:\n", style="bold white")
    examples.append("  dataphy augment dataset --dataset-path ./data --episode 0 --cameras observation.images.webcam --sync-views\n\n", style="yellow")

    examples_panel = Panel(
        examples,
        title="[bold cyan]πŸš€ Usage Examples[/bold cyan]",
        border_style="cyan",
        padding=(1, 2)
    )

    console.print()
    console.print(title, justify="center")
    console.print(subtitle, justify="center")
    console.print()
    console.print(desc_panel)
    console.print()
    console.print(modes_panel)
    console.print()
    console.print(required_panel)
    console.print()
    console.print(episode_panel)
    console.print()
    console.print(output_panel)
    console.print()
    console.print(examples_panel)
    console.print()
    console.print("[dim]πŸ’‘ Use 'dataphy augment dataset --help' for detailed command options[/dim]", justify="center")
    console.print()

show_list_formats_help()

Display fancy help for list-formats command.

Source code in src/dataphy/cli.py
def show_list_formats_help():
    """Display fancy help for list-formats command."""
    title = Text("πŸ—‚οΈ  Dataset Formats", style="bold blue")
    subtitle = Text("List all supported dataset formats and their descriptions", style="dim")

    # Description panel
    desc = Text()
    desc.append("Display all supported dataset formats with descriptions and example usage. ", style="white")
    desc.append("This helps you understand which formats are available for dataset operations.", style="white")

    desc_panel = Panel(
        desc,
        title="[bold blue]Description[/bold blue]",
        border_style="blue",
        padding=(1, 2)
    )

    # Usage
    usage = Text()
    usage.append("πŸ”Ή ", style="cyan")
    usage.append("List all formats:\n", style="bold white")
    usage.append("  dataphy dataset list-formats\n\n", style="yellow")

    usage.append("πŸ”Ή ", style="cyan")
    usage.append("Use with other commands:\n", style="bold white")
    usage.append("  dataphy dataset load --format lerobot --dataset-path ./data\n", style="yellow")
    usage.append("  dataphy dataset fetch --format lerobot --repo-id user/dataset\n", style="yellow")

    usage_panel = Panel(
        usage,
        title="[bold cyan]πŸš€ Usage[/bold cyan]",
        border_style="cyan",
        padding=(1, 2)
    )

    console.print()
    console.print(title, justify="center")
    console.print(subtitle, justify="center")
    console.print()
    console.print(desc_panel)
    console.print()
    console.print(usage_panel)
    console.print()
    console.print("[dim]πŸ’‘ Use 'dataphy dataset list-formats --help' for detailed command options[/dim]", justify="center")
    console.print()

augment_vision(help_flag: bool = typer.Option(False, '--help', '-h', help='Show help message'), config: str = typer.Option(None, '--config', help='VisionPack YAML config'), input_path: str = typer.Option(None, '--input', help='Input source URI (dir, file)'), output_path: str = typer.Option(None, '--output', help='Output sink URI (dir)'), seed: int = typer.Option(1337, '--seed', help='Global seed'), batch_size: int = typer.Option(64, '--batch-size'), num_workers: int = typer.Option(8, '--num-workers'), device: str = typer.Option('cuda', '--device'), preview: str = typer.Option(None, '--preview', help='Write preview grid image to path'), limit: int = typer.Option(0, '--limit', help='Process first N batches only (0=all)'))

Source code in src/dataphy/cli.py
@augment_app.command("vision")
def augment_vision(
    help_flag: bool = typer.Option(False, "--help", "-h", help="Show help message"),
    config: str = typer.Option(None, "--config", help="VisionPack YAML config"),
    input_path: str = typer.Option(None, "--input", help="Input source URI (dir, file)"),
    output_path: str = typer.Option(None, "--output", help="Output sink URI (dir)"),
    seed: int = typer.Option(1337, "--seed", help="Global seed"),
    batch_size: int = typer.Option(64, "--batch-size"),
    num_workers: int = typer.Option(8, "--num-workers"),
    device: str = typer.Option("cuda", "--device"),
    preview: str = typer.Option(None, "--preview", help="Write preview grid image to path"),
    limit: int = typer.Option(0, "--limit", help="Process first N batches only (0=all)"),
):
    if help_flag:
        show_vision_help()
        raise typer.Exit()

    # Validate required parameters
    if not config:
        console.print("[red]❌ Error:[/red] --config is required")
        console.print("[dim]πŸ’‘ Use 'dataphy augment vision --help' for help[/dim]")
        raise typer.Exit(code=1)

    if not input_path:
        console.print("[red]❌ Error:[/red] --input is required")
        console.print("[dim]πŸ’‘ Use 'dataphy augment vision --help' for help[/dim]")
        raise typer.Exit(code=1)

    # Lazy imports for heavy dependencies
    from dataphy.visionpack.pipeline import build_pipeline
    from dataphy.io.loader import load_batches

    pipe = build_pipeline(config, device=device, seed=seed)
    batches = load_batches(input_path, batch_size=batch_size, num_workers=num_workers)

    if preview:
        # take one batch, run pipe, write grid
        import numpy as np
        from PIL import Image
        b = next(iter(batches))
        out = pipe(b)
        grid = _make_grid(out["images"])  # images: BxCxHxW [0..1]
        Image.fromarray((grid * 255).astype(np.uint8)).save(preview)

        # Fancy success message
        success_panel = Panel(
            f"[green]βœ… Preview saved successfully![/green]\n[white]Location:[/white] [cyan]{preview}[/cyan]",
            title="[bold green]🎨 Vision Augmentation Preview[/bold green]",
            border_style="green"
        )
        console.print(success_panel)
        raise typer.Exit()

    if not output_path:
        ok = Confirm.ask("No --output provided. Stream to /dev/null?", default=False)
        if not ok:
            raise typer.Exit(code=1)

    # Lazy import for write_batches
    from dataphy.io.writer import write_batches

    write_batches(batches, pipe, output_path, limit=limit)
    console.print("[green]βœ… Done[/green]")

dataset_fetch(help_flag: bool = typer.Option(False, '--help', '-h', help='Show help message'), dataset_format: str = typer.Option(None, '--format', help='Dataset format (lerobot)'), repo_id: str = typer.Option('carpit680/giraffe_clean_desk2', '--repo-id', help='Repository ID (for lerobot format)'), split: str = typer.Option('train', '--split', help='Dataset split (train, val, test)'), output: str = typer.Option(None, '--output', help='Local output directory'), revision: str = typer.Option('main', '--revision', help='Git revision'))

Fetch datasets from various sources.

Source code in src/dataphy/cli.py
@dataset_app.command("fetch")
def dataset_fetch(
    help_flag: bool = typer.Option(False, "--help", "-h", help="Show help message"),
    dataset_format: str = typer.Option(None, "--format", help="Dataset format (lerobot)"),
    repo_id: str = typer.Option("carpit680/giraffe_clean_desk2", "--repo-id", help="Repository ID (for lerobot format)"),
    split: str = typer.Option("train", "--split", help="Dataset split (train, val, test)"),
    output: str = typer.Option(None, "--output", help="Local output directory"),
    revision: str = typer.Option("main", "--revision", help="Git revision"),
):
    """Fetch datasets from various sources."""
    if help_flag:
        show_fetch_help()
        raise typer.Exit()

    # Validate required parameters
    if not dataset_format:
        console.print("[red]Error:[/red] --format is required")
        console.print("[dim]Use 'dataphy dataset fetch --help' for help[/dim]")
        raise typer.Exit(code=1)

    if not output:
        console.print("[red]Error:[/red] --output is required")
        console.print("[dim]Use 'dataphy dataset fetch --help' for help[/dim]")
        raise typer.Exit(code=1)

    if dataset_format.lower() == "lerobot":
        try:
            from dataphy.sources.lerobot import fetch, RepositoryNotFoundError, RevisionNotFoundError, NetworkError
            source = {
                "repo_id": repo_id,
                "split": split,
                "revision": revision,
            }
            result = fetch(source=source, output_dir=output, runner="DirectRunner")
            console.print(f"[green]LeRobot dataset fetched to[/green] {result}")
        except ImportError:
            console.print("[red]Error:[/red] huggingface_hub not installed. Install with: pip install huggingface_hub")
            raise typer.Exit(code=1)
        except RepositoryNotFoundError as e:
            console.print(f"[red]Repository not found:[/red] {e}")
            console.print("[yellow]Tip:[/yellow] Check that the repository ID is correct and the repository exists on Hugging Face Hub")
            console.print(f"[dim]Repository ID:[/dim] {repo_id}")
            console.print("[yellow]Common solutions:[/yellow]")
            console.print("  β€’ Verify the repository ID format: 'username/repository-name'")
            console.print("  β€’ Check if the repository is public or you have access to it")
            console.print("  β€’ Try browsing to https://huggingface.co/{repo_id} in your browser")
            raise typer.Exit(code=1)
        except RevisionNotFoundError as e:
            console.print(f"[red]Revision not found:[/red] {e}")
            console.print("[yellow]Tip:[/yellow] Check that the revision exists in the repository")
            console.print(f"[dim]Repository ID:[/dim] {repo_id}")
            console.print(f"[dim]Revision:[/dim] {revision}")
            raise typer.Exit(code=1)
        except NetworkError as e:
            console.print(f"[red]Network error:[/red] {e}")
            console.print("[yellow]Tip:[/yellow] Check your internet connection and try again")
            raise typer.Exit(code=1)
        except PermissionError as e:
            console.print(f"[red]Permission denied:[/red] {e}")
            console.print("[yellow]Tip:[/yellow] The repository might be private. Check if you have access to it")
            raise typer.Exit(code=1)
        except Exception as e:
            console.print(f"[red]Unexpected error:[/red] {e}")
            console.print("[yellow]Tip:[/yellow] Check the repository ID and try again")
            raise typer.Exit(code=1)
    else:
        console.print(f"[red]Error:[/red] Unsupported format '{format}'. Supported formats: lerobot")
        raise typer.Exit(code=1)

dataset_info(help_flag: bool = typer.Option(False, '--help', '-h', help='Show help message'), format: str = typer.Option(None, '--format', help='Dataset format (lerobot)'), dataset_path: str = typer.Option(None, '--dataset-path', help='Path to dataset directory'), repo_id: str = typer.Option(None, '--repo-id', help='Hugging Face repository ID (for lerobot format)'))

Get information about datasets from various sources.

Source code in src/dataphy/cli.py
@dataset_app.command("info")
def dataset_info(
    help_flag: bool = typer.Option(False, "--help", "-h", help="Show help message"),
    format: str = typer.Option(None, "--format", help="Dataset format (lerobot)"),
    dataset_path: str = typer.Option(None, "--dataset-path", help="Path to dataset directory"),
    repo_id: str = typer.Option(None, "--repo-id", help="Hugging Face repository ID (for lerobot format)"),
):
    """Get information about datasets from various sources."""
    if help_flag:
        show_info_help()
        raise typer.Exit()

    if not format:
        console.print("[red]Error:[/red] --format is required")
        console.print("[dim]Use 'dataphy dataset info --help' for help[/dim]")
        raise typer.Exit(code=1)

    if not dataset_path and not repo_id:
        console.print("[red]Error:[/red] Either --dataset-path or --repo-id is required")
        console.print("[dim]Use 'dataphy dataset info --help' for help[/dim]")
        raise typer.Exit(code=1)

    if dataset_path and repo_id:
        console.print("[red]Error:[/red] Cannot specify both --dataset-path and --repo-id")
        console.print("[dim]Use 'dataphy dataset info --help' for help[/dim]")
        raise typer.Exit(code=1)

    if format.lower() == "lerobot":
        if repo_id:
            # Handle Hugging Face Hub dataset
            try:
                from dataphy.sources.lerobot import get_dataset_info, RepositoryNotFoundError, RevisionNotFoundError, NetworkError
                # Extract dataset name from repo_id (e.g., "lerobot/lerobot-100k" -> "lerobot-100k")
                dataset_name = repo_id.split("/")[-1]
                info = get_dataset_info(repo_id, dataset_name)
                if info:
                    console.print(f"[green]Dataset info for {repo_id}:[/green]")
                    print(f"  Available splits: {', '.join(info['available_splits'])}")
                    print(f"  File types: {', '.join(info['file_types'])}")
                    print(f"  Total files: {info['total_files']}")

                    # Display detailed information if available
                    if info.get('detailed_info'):
                        detailed = info['detailed_info']
                        console.print("\n[blue]Detailed Information:[/blue]")
                        print(f"  Robot type: {detailed.get('robot_type', 'N/A')}")
                        print(f"  Total episodes: {detailed.get('total_episodes', 'N/A')}")
                        print(f"  Total frames: {detailed.get('total_frames', 'N/A')}")
                        print(f"  Total videos: {detailed.get('total_videos', 'N/A')}")
                        print(f"  FPS: {detailed.get('fps', 'N/A')}")
                        print(f"  Codebase version: {detailed.get('codebase_version', 'N/A')}")

                        # Show available features
                        if detailed.get('features'):
                            print(f"  Available features: {', '.join(detailed['features'].keys())}")
                else:
                    console.print(f"[red]Error:[/red] Could not retrieve dataset info for {repo_id}")
                    raise typer.Exit(code=1)
            except ImportError:
                console.print("[red]Error:[/red] huggingface_hub not installed. Install with: pip install huggingface_hub")
                raise typer.Exit(code=1)
            except RepositoryNotFoundError as e:
                console.print(f"[red]Repository not found:[/red] {e}")
                console.print("[yellow]Tip:[/yellow] Check that the repository ID is correct and the repository exists on Hugging Face Hub")
                console.print(f"[dim]Repository ID:[/dim] {repo_id}")
                console.print("[yellow]Common solutions:[/yellow]")
                console.print("  β€’ Verify the repository ID format: 'username/repository-name'")
                console.print("  β€’ Check if the repository is public or you have access to it")
                console.print("  β€’ Try browsing to https://huggingface.co/{repo_id} in your browser")
                raise typer.Exit(code=1)
            except NetworkError as e:
                console.print(f"[red]Network error:[/red] {e}")
                console.print("[yellow]Tip:[/yellow] Check your internet connection and try again")
                raise typer.Exit(code=1)
            except PermissionError as e:
                console.print(f"[red]Permission denied:[/red] {e}")
                console.print("[yellow]Tip:[/yellow] The repository might be private. Check if you have access to it")
                raise typer.Exit(code=1)
            except Exception as e:
                console.print(f"[red]Unexpected error:[/red] {e}")
                console.print("[yellow]Tip:[/yellow] Check the repository ID and try again")
                raise typer.Exit(code=1)
        else:
            # Handle local dataset
            try:
                from dataphy.dataset.registry import DatasetFormat, create_dataset_loader
                from pathlib import Path

                # Validate dataset path exists
                if not Path(dataset_path).exists():
                    console.print(f"[red]Error:[/red] Dataset path not found: {dataset_path}")
                    raise typer.Exit(code=1)

                # Create dataset loader
                loader = create_dataset_loader(dataset_path, format_type=DatasetFormat.LEROBOT)

                # Get dataset information
                dataset_info = loader.get_dataset_info()
                episodes = loader.get_episode_ids()

                console.print(f"[green]Dataset info for {dataset_path}:[/green]")
                print(f"  Format: {dataset_info.get('format', 'Unknown')}")
                print(f"  Total episodes: {len(episodes)}")
                print(f"  Total timesteps: {dataset_info.get('total_timesteps', 'Unknown')}")

                if dataset_info.get('features'):
                    print(f"  Features: {', '.join(dataset_info['features'])}")

                # Show sample episodes
                if episodes:
                    sample_episodes = episodes[:5]
                    print(f"  Sample episodes: {', '.join(sample_episodes)}")
                    if len(episodes) > 5:
                        print(f"  ... and {len(episodes) - 5} more episodes")

                # Show detailed information if available
                if hasattr(dataset_info, 'metadata') and dataset_info.metadata:
                    console.print("\n[blue]Detailed Information:[/blue]")
                    metadata = dataset_info.metadata
                    print(f"  Robot type: {metadata.get('robot_type', 'N/A')}")
                    print(f"  Codebase version: {metadata.get('codebase_version', 'N/A')}")
                    print(f"  FPS: {metadata.get('fps', 'N/A')}")

                    # Show available features
                    if metadata.get('features'):
                        print(f"  Available features: {', '.join(metadata['features'].keys())}")

            except Exception as e:
                console.print(f"[red]Error:[/red] Could not load dataset info: {e}")
                console.print("[yellow]Tip:[/yellow] Check that the dataset path is valid and contains a LeRobot dataset")
                raise typer.Exit(code=1)
    else:
        console.print(f"[red]❌ Error:[/red] Unsupported format '{format}'. Supported formats: lerobot")
        raise typer.Exit(code=1)

dataset_load(help_flag: bool = typer.Option(False, '--help', '-h', help='Show help message'), dataset_path: str = typer.Option(None, '--dataset-path', help='Path to dataset directory'), format: str = typer.Option(None, '--format', help='Dataset format (auto-detected if not specified)'), info: bool = typer.Option(False, '--info', help='Show dataset information'), list_episodes: bool = typer.Option(False, '--list-episodes', help='List available episodes'), episode: str = typer.Option(None, '--episode', help="Load specific episode (name: 'episode_000000' or index: '0', '1', '2'...)"), episodes: str = typer.Option(None, '--episodes', help="Load multiple episodes (comma-separated, e.g., '0,1,2' or 'episode_000000,episode_000001')"), timestep: int = typer.Option(None, '--timestep', help='Load specific timestep (requires single --episode)'))

Load and inspect datasets.

Source code in src/dataphy/cli.py
@dataset_app.command("load")
def dataset_load(
    help_flag: bool = typer.Option(False, "--help", "-h", help="Show help message"),
    dataset_path: str = typer.Option(None, "--dataset-path", help="Path to dataset directory"),
    format: str = typer.Option(None, "--format", help="Dataset format (auto-detected if not specified)"),
    info: bool = typer.Option(False, "--info", help="Show dataset information"),
    list_episodes: bool = typer.Option(False, "--list-episodes", help="List available episodes"),
    episode: str = typer.Option(None, "--episode", help="Load specific episode (name: 'episode_000000' or index: '0', '1', '2'...)"),
    episodes: str = typer.Option(None, "--episodes", help="Load multiple episodes (comma-separated, e.g., '0,1,2' or 'episode_000000,episode_000001')"),
    timestep: int = typer.Option(None, "--timestep", help="Load specific timestep (requires single --episode)"),
):
    """Load and inspect datasets."""
    if help_flag:
        show_load_help()
        raise typer.Exit()

    if not dataset_path:
        console.print("[red]Error:[/red] --dataset-path is required")
        console.print("[dim]Use 'dataphy dataset load --help' for help[/dim]")
        raise typer.Exit(code=1)
    try:
        from dataphy.dataset.registry import DatasetFormat, create_dataset_loader

        # Parse format if provided
        format_enum = None
        if format:
            try:
                format_enum = DatasetFormat(format.lower())
            except ValueError:
                console.print(f"[red]❌ Error:[/red] Unknown format '{format}'. Supported formats: {[f.value for f in DatasetFormat]}")
                raise typer.Exit(code=1)

        # Create dataset loader
        loader = create_dataset_loader(dataset_path, format_type=format_enum)

        if info:
            # Show dataset information
            dataset_info = loader.get_dataset_info()
            console.print("[green]πŸ“Š Dataset Information:[/green]")
            print(f"  Format: {dataset_info.get('format', 'Unknown')}")
            print(f"  Total episodes: {dataset_info.get('total_episodes', 'Unknown')}")
            print(f"  Total timesteps: {dataset_info.get('total_timesteps', 'Unknown')}")
            if dataset_info.get('features'):
                print(f"  Features: {', '.join(dataset_info['features'])}")

        elif list_episodes:
            # List available episodes
            episodes = loader.list_episodes()
            console.print("[green]πŸ“‹ Available episodes:[/green]")
            for ep_id in episodes:
                print(f"  - {ep_id}")

        elif episode or episodes:
            # Handle episode/episodes parameter - support single and multiple episodes
            if episode and episodes:
                console.print("[red]❌ Error:[/red] Cannot specify both --episode and --episodes")
                raise typer.Exit(code=1)

            if timestep is not None and episodes:
                console.print("[red]❌ Error:[/red] --timestep can only be used with single --episode")
                raise typer.Exit(code=1)

            # Get all episodes for index conversion
            all_episodes = loader.list_episodes()

            # Determine target episodes
            target_episodes = []
            if episode:
                target_episodes = [episode]
            elif episodes:
                target_episodes = [ep.strip() for ep in episodes.split(",")]

            # Convert episode indices to names and validate
            final_episodes = []
            for ep in target_episodes:
                episode_id = ep

                # Check if episode is a numeric index and convert to episode name
                try:
                    episode_index = int(ep)
                    if episode_index < 0 or episode_index >= len(all_episodes):
                        console.print(f"[red]❌ Error:[/red] Episode index {episode_index} out of range. Available episodes: 0-{len(all_episodes)-1}")
                        raise typer.Exit(code=1)
                    episode_id = all_episodes[episode_index]
                    console.print(f"[green]πŸ“‹ Using episode index {episode_index}:[/green] {episode_id}")
                except ValueError:
                    # ep is already a name, use as-is
                    pass

                # Validate episode exists
                if episode_id not in all_episodes:
                    console.print(f"[red]❌ Error:[/red] Episode '{episode_id}' not found")
                    console.print(f"[dim]Available episodes: {all_episodes[:5]}...[/dim]")
                    raise typer.Exit(code=1)

                final_episodes.append(episode_id)

            # Load episodes
            for i, episode_id in enumerate(final_episodes):
                if len(final_episodes) > 1:
                    console.print(f"\n[cyan]πŸ“ Episode {i+1}/{len(final_episodes)}:[/cyan] {episode_id}")

                if timestep is not None:
                    # Load specific timestep (only for single episode)
                    data = loader.get_timestep(episode_id, timestep)
                    console.print(f"[green]⏱️  Loaded timestep {timestep} from episode {episode_id}:[/green]")
                    print(f"  Keys: {list(data.keys())}")
                    for key, value in data.items():
                        if hasattr(value, 'shape'):
                            print(f"  {key}: shape {value.shape}, dtype {value.dtype}")
                        else:
                            print(f"  {key}: {type(value).__name__}")
                else:
                    # Load entire episode
                    episode_data = loader.get_episode(episode_id)
                    data = episode_data.data
                    console.print(f"[green]πŸ“ Loaded episode {episode_id}:[/green]")
                    console.print(f"  Episode length: {episode_data.length} timesteps")
                    console.print(f"  Available data keys: {list(data.keys())}")

                    # Show sample data for each key
                    for key, value in data.items():
                        if isinstance(value, list) and len(value) > 0:
                            sample_value = value[0]
                            if hasattr(sample_value, 'shape'):
                                console.print(f"  {key}: {len(value)} items, sample shape {sample_value.shape}, dtype {sample_value.dtype}")
                            else:
                                console.print(f"  {key}: {len(value)} items, sample type {type(sample_value).__name__}")
                        else:
                            console.print(f"  {key}: {type(value).__name__}")

            if len(final_episodes) > 1:
                console.print(f"\n[green]βœ… Loaded {len(final_episodes)} episodes successfully[/green]")
        else:
            console.print("[yellow]⚠️  No action specified. Use --info, --list-episodes, or --episode to interact with the dataset.[/yellow]")

    except Exception as e:
        console.print(f"[red]❌ Error loading dataset:[/red] {e}")
        raise typer.Exit(code=1)

dataset_list_formats(help_flag: bool = typer.Option(False, '--help', '-h', help='Show help message'))

List supported dataset formats.

Source code in src/dataphy/cli.py
@dataset_app.command("list-formats")
def dataset_list_formats(
    help_flag: bool = typer.Option(False, "--help", "-h", help="Show help message")
):
    """List supported dataset formats."""
    if help_flag:
        show_list_formats_help()
        raise typer.Exit()

    try:
        from dataphy.dataset.registry import list_supported_formats
        formats = list_supported_formats()

        # Create a fancy table for formats
        formats_table = Table(title="πŸ—‚οΈ  Supported Dataset Formats", show_header=True, header_style="bold magenta")
        formats_table.add_column("Format", style="cyan", width=15)
        formats_table.add_column("Description", style="white")
        formats_table.add_column("Example Usage", style="dim yellow")

        format_descriptions = {
            "lerobot": ("LeRobot robotics datasets", "dataphy dataset load --format lerobot"),

        }

        for format_type in formats:
            desc, example = format_descriptions.get(format_type.value, ("General format", "dataphy dataset load"))
            formats_table.add_row(format_type.value, desc, example)

        console.print()
        console.print(formats_table)
        console.print()

    except Exception as e:
        console.print(f"[red]❌ Error listing formats:[/red] {e}")
        raise typer.Exit(code=1)

dataset_visualize(help_flag: bool = typer.Option(False, '--help', '-h', help='Show help message'), format: str = typer.Option(None, '--format', help='Dataset format (lerobot)'), dataset_path: str = typer.Option(None, '--dataset-path', help='Path to dataset directory'), repo_id: str = typer.Option(None, '--repo-id', help='Hugging Face repository ID (for lerobot format)'), episode: str = typer.Option(None, '--episode', help="Specific episode ID to visualize (name: 'episode_000000' or index: '0', '1', '2'...)"), episodes: str = typer.Option(None, '--episodes', help="Multiple episodes to visualize (comma-separated, e.g., '0,1,2' or 'episode_000000,episode_000001')"), timestep_range: str = typer.Option(None, '--timestep-range', help="Timestep range as 'start,end' (default: all timesteps)"), camera: str = typer.Option(None, '--camera', help='Camera name to visualize (for multi-camera datasets)'))

Visualize datasets using format-specific viewers.

Source code in src/dataphy/cli.py
@dataset_app.command("visualize")
def dataset_visualize(
    help_flag: bool = typer.Option(False, "--help", "-h", help="Show help message"),
    format: str = typer.Option(None, "--format", help="Dataset format (lerobot)"),
    dataset_path: str = typer.Option(None, "--dataset-path", help="Path to dataset directory"),
    repo_id: str = typer.Option(None, "--repo-id", help="Hugging Face repository ID (for lerobot format)"),
    episode: str = typer.Option(None, "--episode", help="Specific episode ID to visualize (name: 'episode_000000' or index: '0', '1', '2'...)"),
    episodes: str = typer.Option(None, "--episodes", help="Multiple episodes to visualize (comma-separated, e.g., '0,1,2' or 'episode_000000,episode_000001')"),
    timestep_range: str = typer.Option(None, "--timestep-range", help="Timestep range as 'start,end' (default: all timesteps)"),
    camera: str = typer.Option(None, "--camera", help="Camera name to visualize (for multi-camera datasets)"),
):
    """Visualize datasets using format-specific viewers."""
    if help_flag:
        show_visualize_help()
        raise typer.Exit()

    if not format:
        console.print("[red]Error:[/red] --format is required")
        console.print("[dim]Use 'dataphy dataset visualize --help' for help[/dim]")
        raise typer.Exit(code=1)
    if format.lower() == "lerobot":
        try:
            from dataphy.visualization.lerobot import visualize_lerobot_dataset
            from dataphy.dataset.registry import create_dataset_loader

            # Handle repo-id by fetching dataset first if needed
            actual_dataset_path = dataset_path
            if repo_id and not dataset_path:
                # Auto-fetch dataset to temporary location for visualization
                import tempfile
                import os
                from dataphy.sources.lerobot import fetch, RepositoryNotFoundError, RevisionNotFoundError, NetworkError

                console.print(f"[green]πŸ“₯ Fetching dataset from HuggingFace Hub:[/green] {repo_id}")
                temp_dir = tempfile.mkdtemp(prefix="dataphy_viz_")
                try:
                    source = {
                        "repo_id": repo_id,
                        "split": "train",
                        "revision": "main",
                    }
                    actual_dataset_path = fetch(source=source, output_dir=temp_dir, runner="DirectRunner")
                    console.print(f"[green]βœ… Dataset cached at:[/green] {actual_dataset_path}")
                except RepositoryNotFoundError as e:
                    console.print(f"[red]❌ Repository not found:[/red] {e}")
                    console.print(f"[yellow]Tip:[/yellow] Check that {repo_id} exists on Hugging Face Hub")
                    raise typer.Exit(code=1)
                except (NetworkError, Exception) as e:
                    console.print(f"[red]❌ Error fetching dataset:[/red] {e}")
                    raise typer.Exit(code=1)

            # Handle episode selection - support both single and multiple episodes
            target_episodes = []

            if episode or episodes:
                # Create loader to get episode list for index conversion
                loader = create_dataset_loader(actual_dataset_path)
                all_episodes = loader.get_episode_ids()

                if episode and episodes:
                    console.print("[red]❌ Error:[/red] Cannot specify both --episode and --episodes")
                    raise typer.Exit(code=1)

                if episode:
                    target_episodes = [episode]
                elif episodes:
                    target_episodes = [ep.strip() for ep in episodes.split(",")]

                # Convert episode indices to names
                final_episodes = []
                for ep in target_episodes:
                    episode_id = ep

                    # Check if episode is a numeric index and convert to episode name
                    try:
                        episode_index = int(ep)
                        if episode_index < 0 or episode_index >= len(all_episodes):
                            console.print(f"[red]❌ Error:[/red] Episode index {episode_index} out of range. Available episodes: 0-{len(all_episodes)-1}")
                            raise typer.Exit(code=1)
                        episode_id = all_episodes[episode_index]
                        console.print(f"[green]πŸ“‹ Using episode index {episode_index}:[/green] {episode_id}")
                    except ValueError:
                        # ep is already a name, use as-is
                        pass

                    # Validate episode exists
                    if episode_id not in all_episodes:
                        console.print(f"[red]❌ Error:[/red] Episode '{episode_id}' not found")
                        console.print(f"[dim]Available episodes: {all_episodes[:5]}...[/dim]")
                        raise typer.Exit(code=1)

                    final_episodes.append(episode_id)

                # Visualize episodes
                if len(final_episodes) == 1:
                    console.print(f"[green]πŸ” Visualizing episode:[/green] {final_episodes[0]}")
                    visualize_lerobot_dataset(
                        dataset_path=actual_dataset_path,
                        repo_id=None,  # Use local path now
                        episode_id=final_episodes[0],
                        timestep_range=timestep_range,
                        camera=camera
                    )
                else:
                    console.print(f"[green]πŸ” Visualizing {len(final_episodes)} episodes:[/green] {final_episodes}")
                    console.print("[dim]πŸ’‘ Tip: Each episode will open in a separate viewer[/dim]")
                    for i, ep_id in enumerate(final_episodes):
                        console.print(f"[cyan]Opening episode {i+1}/{len(final_episodes)}:[/cyan] {ep_id}")
                        visualize_lerobot_dataset(
                            dataset_path=actual_dataset_path,
                            repo_id=None,  # Use local path now
                            episode_id=ep_id,
                            timestep_range=timestep_range,
                            camera=camera
                        )
            else:
                # No specific episodes - visualize all
                visualize_lerobot_dataset(
                    dataset_path=actual_dataset_path,
                    repo_id=None if actual_dataset_path != dataset_path else repo_id,  # Use local path if we fetched it
                    episode_id=None,
                    timestep_range=timestep_range,
                    camera=camera
                )
        except ImportError:
            console.print("[red]❌ Error:[/red] Missing dependencies for LeRobot visualization. Install with: pip install rerun-sdk")
            raise typer.Exit(code=1)
        except Exception as e:
            console.print(f"[red]❌ Error visualizing LeRobot dataset:[/red] {e}")
            raise typer.Exit(code=1)
    else:
        console.print(f"[red]❌ Error:[/red] Unsupported format '{format}' for visualization. Supported formats: lerobot")
        raise typer.Exit(code=1)

augment_dataset(help_flag: bool = typer.Option(False, '--help', '-h', help='Show help message'), dataset_path: str = typer.Option(None, '--dataset-path', help='Path to dataset directory'), config: str = typer.Option(None, '--config', help='VisionPack pipeline YAML config'), episode: str = typer.Option(None, '--episode', help="Single episode to augment (name: 'episode_000000' or index: '0', '1', '2'...)"), episodes: str = typer.Option(None, '--episodes', help="Multiple episodes to augment (comma-separated, e.g., '0,1,2' or 'episode_000000,episode_000001')"), output_path: str = typer.Option('./augmented', '--output-path', help='Output path for new dataset (full dataset mode only, default: ./augmented)'), num_augmented: int = typer.Option(1, '--num-augmented', help='Number of augmented versions per episode (default: 1)'), cameras: str = typer.Option(None, '--cameras', help="Camera streams to augment (comma-separated, e.g., 'observation.images.webcam')"), preserve_original: bool = typer.Option(True, '--preserve-original/--no-preserve-original', help='Include original episodes in output dataset (full dataset mode)'), sync_views: bool = typer.Option(False, '--sync-views', help='Use synchronized augmentation across cameras'), random_seed: int = typer.Option(None, '--seed', help='Random seed for reproducible augmentation'), no_backup: bool = typer.Option(False, '--no-backup', help='Skip creating backup of original files (episode mode only)'), restore: str = typer.Option(None, '--restore', help='Restore episode from backup (provide episode ID)'), format: str = typer.Option(None, '--format', help='Dataset format (auto-detected if not specified)'), list_episodes: bool = typer.Option(False, '--list-episodes', help='List available episodes and cameras'), dry_run: bool = typer.Option(False, '--dry-run', help='Show what would be augmented without actually doing it'))

Augment episodes or entire datasets with VisionPack transforms.

🎯 MODES: β€’ Episode Mode: Specify --episode or --episodes to augment specific episodes in-place β€’ Full Dataset Mode: No episode selection creates a new augmented dataset

πŸ”§ EPISODE MODE (--episode or --episodes provided): β€’ Modifies original dataset with backups β€’ Camera stream selection and synchronized views β€’ Automatic backup and restore functionality β€’ In-place video repackaging in original format

🏭 FULL DATASET MODE (no episode selection): β€’ Creates entirely new dataset at --output-path (default: ./augmented) β€’ Multiple augmented versions per episode (--num-augmented) β€’ Preserves original episodes alongside augmented ones β€’ Automatic LeRobot version detection and metadata updates

The augmentation uses VisionPack transforms including spatial, photometric, texture, and occlusion variations optimized for robotics applications.

Source code in src/dataphy/cli.py
@augment_app.command("dataset")
def augment_dataset(
    help_flag: bool = typer.Option(False, "--help", "-h", help="Show help message"),
    dataset_path: str = typer.Option(None, "--dataset-path", help="Path to dataset directory"),
    config: str = typer.Option(None, "--config", help="VisionPack pipeline YAML config"),

    # Episode selection (determines mode)
    episode: str = typer.Option(None, "--episode", help="Single episode to augment (name: 'episode_000000' or index: '0', '1', '2'...)"),
    episodes: str = typer.Option(None, "--episodes", help="Multiple episodes to augment (comma-separated, e.g., '0,1,2' or 'episode_000000,episode_000001')"),

    # Output control
    output_path: str = typer.Option("./augmented", "--output-path", help="Output path for new dataset (full dataset mode only, default: ./augmented)"),
    num_augmented: int = typer.Option(1, "--num-augmented", help="Number of augmented versions per episode (default: 1)"),

    # Camera and processing options
    cameras: str = typer.Option(None, "--cameras", help="Camera streams to augment (comma-separated, e.g., 'observation.images.webcam')"),
    preserve_original: bool = typer.Option(True, "--preserve-original/--no-preserve-original", help="Include original episodes in output dataset (full dataset mode)"),
    sync_views: bool = typer.Option(False, "--sync-views", help="Use synchronized augmentation across cameras"),
    random_seed: int = typer.Option(None, "--seed", help="Random seed for reproducible augmentation"),

    # Legacy episode-mode options
    no_backup: bool = typer.Option(False, "--no-backup", help="Skip creating backup of original files (episode mode only)"),
    restore: str = typer.Option(None, "--restore", help="Restore episode from backup (provide episode ID)"),

    # General options
    format: str = typer.Option(None, "--format", help="Dataset format (auto-detected if not specified)"),
    list_episodes: bool = typer.Option(False, "--list-episodes", help="List available episodes and cameras"),
    dry_run: bool = typer.Option(False, "--dry-run", help="Show what would be augmented without actually doing it"),
):
    """
    Augment episodes or entire datasets with VisionPack transforms.

    🎯 MODES:
    β€’ Episode Mode: Specify --episode or --episodes to augment specific episodes in-place
    β€’ Full Dataset Mode: No episode selection creates a new augmented dataset

    πŸ”§ EPISODE MODE (--episode or --episodes provided):
    β€’ Modifies original dataset with backups
    β€’ Camera stream selection and synchronized views
    β€’ Automatic backup and restore functionality
    β€’ In-place video repackaging in original format

    🏭 FULL DATASET MODE (no episode selection):
    β€’ Creates entirely new dataset at --output-path (default: ./augmented)
    β€’ Multiple augmented versions per episode (--num-augmented)
    β€’ Preserves original episodes alongside augmented ones
    β€’ Automatic LeRobot version detection and metadata updates

    The augmentation uses VisionPack transforms including spatial, photometric,
    texture, and occlusion variations optimized for robotics applications.
    """
    if help_flag:
        show_augment_dataset_help()
        raise typer.Exit()

    # Validate required parameters
    if not dataset_path:
        console.print("[red]Error:[/red] --dataset-path is required")
        console.print("[dim]Use 'dataphy augment dataset --help' for help[/dim]")
        raise typer.Exit(code=1)

    try:
        from dataphy.dataset.episode_augmentor import EpisodeAugmentor
        from dataphy.dataset.augmentor import DatasetAugmentor, AugmentationConfig
        from dataphy.dataset.registry import DatasetFormat, create_dataset_loader

        # Parse format if provided
        format_enum = None
        if format:
            try:
                format_enum = DatasetFormat(format.lower())
            except ValueError:
                console.print(f"[red]❌ Error:[/red] Unknown format '{format}'. Supported formats: {[f.value for f in DatasetFormat]}")
                raise typer.Exit(code=1)

        # Create dataset loader
        loader = create_dataset_loader(dataset_path, format_type=format_enum)

        # Determine mode based on episode parameters
        episode_mode = episode is not None or episodes is not None
        full_dataset_mode = not episode_mode

        # Handle special operations first
        if restore:
            augmentor = EpisodeAugmentor(loader)
            all_episodes = augmentor.list_episodes()

            # Convert episode index to name if needed
            restore_episode = restore
            try:
                episode_index = int(restore)
                # Convert index to episode name
                if episode_index < 0 or episode_index >= len(all_episodes):
                    console.print(f"[red]❌ Error:[/red] Episode index {episode_index} out of range. Available episodes: 0-{len(all_episodes)-1}")
                    raise typer.Exit(code=1)
                restore_episode = all_episodes[episode_index]
                console.print(f"[green]πŸ“‹ Using episode index {episode_index}:[/green] {restore_episode}")
            except ValueError:
                # restore is already a name, use as-is
                pass

            console.print(f"[yellow]πŸ”„ Restoring episode:[/yellow] {restore_episode}")
            augmentor.restore_episode(restore_episode)
            console.print(f"[green]βœ… Episode {restore_episode} restored from backup[/green]")
            return

        if list_episodes:
            augmentor = EpisodeAugmentor(loader)
            all_episodes = augmentor.list_episodes()
            backups = augmentor.list_backups()

            console.print(f"[green]πŸ“Š Dataset Information:[/green]")
            print(f"  Total episodes: {len(all_episodes)}")
            print(f"  Episodes with backups: {len(backups)}")

            console.print(f"\n[green]πŸ“‹ Available Episodes:[/green]")
            for i, ep in enumerate(all_episodes[:10]):  # Show first 10
                backup_status = " [dim](backed up)[/dim]" if ep in backups else ""
                console.print(f"  {i:2d}. {ep}{backup_status}")

            if len(all_episodes) > 10:
                console.print(f"  ... and {len(all_episodes) - 10} more episodes")

            # Show cameras for first episode
            if all_episodes:
                first_episode = all_episodes[0]
                cameras_list = augmentor.get_available_cameras(first_episode)
                console.print(f"\n[green]πŸ“Ή Available Cameras ({first_episode}):[/green]")
                for cam in cameras_list:
                    console.print(f"  β€’ {cam}")

            return

        # Validate config is provided (required for both modes)
        if not config:
            console.print("[red]❌ Error:[/red] --config is required for augmentation")
            if episode_mode:
                console.print("[dim]Use --list-episodes to see available episodes[/dim]")
            raise typer.Exit(code=1)

        # Route to appropriate mode
        if episode_mode:
            # EPISODE MODE: Single or multiple specific episodes
            return _handle_episode_mode(
                loader, dataset_path, config, episode, episodes, cameras, 
                no_backup, sync_views, num_augmented, dry_run
            )
        else:
            # FULL DATASET MODE: Create new augmented dataset
            return _handle_full_dataset_mode(
                loader, dataset_path, output_path, config, num_augmented, 
                cameras, preserve_original, sync_views, random_seed, dry_run
            )

    except Exception as e:
        console.print(f"[red]❌ Error augmenting dataset:[/red] {e}")
        raise typer.Exit(code=1)