Skip to content

Atlas

dataphy.catalog.atlas

Functions

get_dataset_record(name: str) -> Dict[str, Any]

Lookup dataset by "username/dataset" in MongoDB Atlas. Returns a plain dict with at least {provider, source, filters}.

Source code in src/dataphy/catalog/atlas.py
def get_dataset_record(name: str) -> Dict[str, Any]:
    """Lookup dataset by "username/dataset" in MongoDB Atlas.
    Returns a plain dict with at least {provider, source, filters}.
    """
    uri = os.getenv("MONGODB_URI")
    if MongoClient is None or not uri:
        raise RuntimeError("pymongo not installed or MONGODB_URI not set; install extra [mongo] and export MONGODB_URI")

    client = MongoClient(uri, serverSelectionTimeoutMS=5000)
    db = client[_DB]
    coll = db[_COLL]
    user, ds = name.split("/", 1)
    doc = coll.find_one({"owner": user, "name": ds})
    if not doc:
        raise KeyError(f"Dataset not found: {name}")

    # Normalize minimal shape
    out = {
        "provider": doc.get("provider"),
        "source": doc.get("source", {}),
        "filters": doc.get("filters", {}),
    }
    return out