Skip to content

Github

dataphy.sources.github

Functions

fetch(source: Dict, output_dir: str, include: str, runner: str = 'DirectRunner')

Source code in src/dataphy/sources/github.py
def fetch(source: Dict, output_dir: str, include: str, runner: str = "DirectRunner"):
    repo = source.get("repo")  # e.g., https://github.com/user/repo.git
    ref = source.get("ref", "main")
    sparse_paths = source.get("paths", [])
    outdir = pathlib.Path(output_dir)
    outdir.mkdir(parents=True, exist_ok=True)

    subprocess.check_call(["git", "init"], cwd=str(outdir))
    subprocess.check_call(["git", "remote", "add", "origin", repo], cwd=str(outdir))
    subprocess.check_call(["git", "config", "core.sparseCheckout", "true"], cwd=str(outdir))
    if sparse_paths:
        (outdir/".git/info/sparse-checkout").write_text("\n".join(sparse_paths))
    subprocess.check_call(["git", "pull", "--depth=1", "origin", ref], cwd=str(outdir))
    return str(outdir)