Source code for cas.cxg_utils

"""
cxg_utils.py

This module provides utility functions for working with AnnData datasets in the context of the CellxGene Census library.
"""

import logging
import os
from typing import Optional

import cellxgene_census


[docs] def download_dataset_with_id(dataset_id: str, file_path: Optional[str] = None) -> str: """ Download an AnnData dataset with the specified ID. Args: dataset_id (str): The ID of the dataset to download. file_path (Optional[str], optional): The file path to save the downloaded AnnData. If not provided, the dataset will be saved in the current working directory with the dataset_id as the file name. Supports both absolute and relative paths. Returns: str: The path to the downloaded AnnData dataset """ default_file_name = f"{dataset_id}.h5ad" anndata_file_path = default_file_name if file_path is None else file_path anndata_file_path = os.path.abspath(anndata_file_path) # Check if the file already exists if os.path.exists(anndata_file_path): print(f"File '{anndata_file_path}' already exists. Skipping download.") return anndata_file_path # Ensure the directory exists directory = os.path.dirname(anndata_file_path) if directory and not os.path.exists(directory): os.makedirs(directory, exist_ok=True) logging.info(f"Downloading dataset with ID '{dataset_id}'...") cellxgene_census.download_source_h5ad(dataset_id, to_path=anndata_file_path) logging.info(f"Download complete. File saved at '{anndata_file_path}'.") return anndata_file_path