Skip to content

Kaggle

dataphy.sources.kaggle

Functions

fetch(source: Dict, output_dir: str, include: str, runner: str = 'DirectRunner')

Source code in src/dataphy/sources/kaggle.py
def fetch(source: Dict, output_dir: str, include: str, runner: str = "DirectRunner"):
    ds = source.get("dataset")  # e.g., user/dataset
    comp = source.get("competition")
    outdir = pathlib.Path(output_dir)
    outdir.mkdir(parents=True, exist_ok=True)
    if ds:
        subprocess.check_call(["kaggle", "datasets", "download", "-d", ds, "-p", str(outdir), "-unzip"])   
    elif comp:
        subprocess.check_call(["kaggle", "competitions", "download", "-c", comp, "-p", str(outdir)])
    else:
        raise ValueError("Provide 'dataset' or 'competition' in source")
    return str(outdir)