Source code for xdatasets.tutorial

import uuid
from functools import reduce
from html import escape

from IPython.core.display import HTML
from xarray.core.formatting_html import _icon, _mapping_section, _obj_repr


catalog_path = "https://raw.githubusercontent.com/hydrocloudservices/catalogs/main/catalogs/main.yaml"



[docs]
def open_dataset(
    name: str,
    **kws,  # noqa: F841
):
    r"""
    Open a dataset from the online public repository (requires internet).

    Available datasets:
    * ``"era5_reanalysis_single_levels"``: ERA5 reanalysis subset (t2m and tp)
    * ``"cehq"``: CEHQ flow and water levels observations

    Parameters
    ----------
    name : str
        Name of the file containing the dataset.
        e.g. 'era5_reanalysis_single_levels'.
    \*\*kws : dict, optional
        Currently not used.

    See Also
    --------
    xarray.open_dataset
    """
    try:
        import intake
    except ImportError as e:
        raise ImportError(
            "tutorial.open_dataset depends on intake and intake-xarray to download and manage datasets."
            " To proceed please install intake and intake-xarray.",
        ) from e

    cat = intake.open_catalog(catalog_path)
    dataset_info = [
        (category, dataset_name) for category in cat._entries.keys() for dataset_name in cat[category]._entries.keys() if dataset_name == name
    ]

    data = reduce(lambda array, index: array[index], dataset_info, cat)

    if data.describe()["driver"][0] == "geopandasfile":
        data = data.read()
    elif data.describe()["driver"][0] == "zarr":
        data = data.to_dask()
    else:
        raise NotImplementedError(
            f"Dataset {name} is not available. Please request further datasets to our github issues pages",
        )
    return data




[docs]
def summarize_coords(variables):
    li_items = []
    for k in variables:
        li_content = summarize_variable(k, is_index=False)
        li_items.append(f"<li class='xr-var-item'>{li_content}</li>")

    vars_li = "".join(li_items)

    return f"<ul class='xr-var-list'>{vars_li}</ul>"




[docs]
def summarize_variable(name, is_index=False, dtype=None):  # noqa: F841
    cssclass_idx = " class='xr-has-index'" if is_index else ""
    name = escape(str(name))

    # "unique" ids required to expand/collapse subsections
    attrs_id = "attrs-" + str(uuid.uuid4())
    data_id = "data-" + str(uuid.uuid4())
    attrs_icon = _icon("icon-file-text2")
    data_icon = _icon("icon-database")

    return (
        f"<div class='xr-var-preview'><span{cssclass_idx}>{name}</span></div>"
        f"<input id='{attrs_id}' class='xr-var-attrs-in' "
        f"type='checkbox'>"
        f"<label for='{attrs_id}' title='Show/Hide attributes'>"
        f"{attrs_icon}</label>"
        f"<input id='{data_id}' class='xr-var-data-in' type='checkbox'>"
        f"<label for='{data_id}' title='Show/Hide data repr'>"
        f"{data_icon}</label>"
    )




[docs]
def list_available_datasets():
    """
    Open, load lazily, and close a dataset from the public online repository (requires internet).

    See Also
    --------
    open_dataset
    """
    try:
        import intake
    except ImportError as e:
        raise ImportError(
            "tutorial.open_dataset depends on intake and intake-xarray to download and manage datasets."
            " To proceed please install intake and intake-xarray.",
        ) from e

    cat = intake.open_catalog(catalog_path)

    # This will need refactor if the catalog has more than 2 levels
    # list(itertools.chain.from_iterable([list(cat[name].keys()) for name in cat._entries.keys()]))

    datasets_catalog = {field: list(sorted(cat[field]._entries.keys())) for field in sorted(cat._entries.keys())}

    def add_section(datasets_catalog):
        return [
            _mapping_section(
                datasets,
                name=field.capitalize(),
                details_func=summarize_coords,
                max_items_collapse=25,
                expand_option_name="display_expand_coords",
            )
            for field, datasets in datasets_catalog.items()
        ]

    a = _obj_repr(
        "",
        [f"<div class='xr-obj-type'>{escape('xdatasets.Catalog')}</div>"],
        add_section(datasets_catalog),
    )

    return HTML(a)




[docs]
def load_dataset(*args, **kwargs):
    r"""
    Open, load lazily, and close a dataset from the online repository (requires internet).

    Parameters
    ----------
    \*args : sequence
        A sequence of positional arguments passed to `open_dataset`.
    \*\*kwargs : dict
        A dictionary of keyword arguments passed to `open_dataset`.

    See Also
    --------
    open_dataset
    """
    return open_dataset(*args, **kwargs)