Source code for xdatasets.tutorial

import uuid
from functools import reduce
from html import escape

from IPython.core.display import HTML
from xarray.core.formatting_html import _icon, _mapping_section, _obj_repr


catalog_path = "https://raw.githubusercontent.com/hydrocloudservices/catalogs/main/catalogs/main.yaml"


[docs] def open_dataset( name: str, **kws, # noqa: F841 ): r""" Open a dataset from the online public repository (requires internet). Available datasets: * ``"era5_reanalysis_single_levels"``: ERA5 reanalysis subset (t2m and tp) * ``"cehq"``: CEHQ flow and water levels observations Parameters ---------- name : str Name of the file containing the dataset. e.g. 'era5_reanalysis_single_levels'. \*\*kws : dict, optional Currently not used. See Also -------- xarray.open_dataset """ try: import intake except ImportError as e: raise ImportError( "tutorial.open_dataset depends on intake and intake-xarray to download and manage datasets." " To proceed please install intake and intake-xarray.", ) from e cat = intake.open_catalog(catalog_path) dataset_info = [ (category, dataset_name) for category in cat._entries.keys() for dataset_name in cat[category]._entries.keys() if dataset_name == name ] data = reduce(lambda array, index: array[index], dataset_info, cat) if data.describe()["driver"][0] == "geopandasfile": data = data.read() elif data.describe()["driver"][0] == "zarr": data = data.to_dask() else: raise NotImplementedError( f"Dataset {name} is not available. Please request further datasets to our github issues pages", ) return data
[docs] def summarize_coords(variables): li_items = [] for k in variables: li_content = summarize_variable(k, is_index=False) li_items.append(f"<li class='xr-var-item'>{li_content}</li>") vars_li = "".join(li_items) return f"<ul class='xr-var-list'>{vars_li}</ul>"
[docs] def summarize_variable(name, is_index=False, dtype=None): # noqa: F841 cssclass_idx = " class='xr-has-index'" if is_index else "" name = escape(str(name)) # "unique" ids required to expand/collapse subsections attrs_id = "attrs-" + str(uuid.uuid4()) data_id = "data-" + str(uuid.uuid4()) attrs_icon = _icon("icon-file-text2") data_icon = _icon("icon-database") return ( f"<div class='xr-var-preview'><span{cssclass_idx}>{name}</span></div>" f"<input id='{attrs_id}' class='xr-var-attrs-in' " f"type='checkbox'>" f"<label for='{attrs_id}' title='Show/Hide attributes'>" f"{attrs_icon}</label>" f"<input id='{data_id}' class='xr-var-data-in' type='checkbox'>" f"<label for='{data_id}' title='Show/Hide data repr'>" f"{data_icon}</label>" )
[docs] def list_available_datasets(): """ Open, load lazily, and close a dataset from the public online repository (requires internet). See Also -------- open_dataset """ try: import intake except ImportError as e: raise ImportError( "tutorial.open_dataset depends on intake and intake-xarray to download and manage datasets." " To proceed please install intake and intake-xarray.", ) from e cat = intake.open_catalog(catalog_path) # This will need refactor if the catalog has more than 2 levels # list(itertools.chain.from_iterable([list(cat[name].keys()) for name in cat._entries.keys()])) datasets_catalog = {field: list(sorted(cat[field]._entries.keys())) for field in sorted(cat._entries.keys())} def add_section(datasets_catalog): return [ _mapping_section( datasets, name=field.capitalize(), details_func=summarize_coords, max_items_collapse=25, expand_option_name="display_expand_coords", ) for field, datasets in datasets_catalog.items() ] a = _obj_repr( "", [f"<div class='xr-obj-type'>{escape('xdatasets.Catalog')}</div>"], add_section(datasets_catalog), ) return HTML(a)
[docs] def load_dataset(*args, **kwargs): r""" Open, load lazily, and close a dataset from the online repository (requires internet). Parameters ---------- \*args : sequence A sequence of positional arguments passed to `open_dataset`. \*\*kwargs : dict A dictionary of keyword arguments passed to `open_dataset`. See Also -------- open_dataset """ return open_dataset(*args, **kwargs)