{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Concepts\n", "\n", "## DataArray and Dataset meta data handling\n", "\n", "This section describes details about how coords (and masks) of datasets and data arrays behave when slicing, combining, or inserting." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import scipp as sc\n", "\n", "x = sc.Variable(dims=['x'], values=[1,2,3,4])\n", "da = sc.DataArray(data=x,\n", " coords={'x':x},\n", " masks={'x':sc.less(x, 2 * sc.units.one)})\n", "ds = sc.Dataset(data={'a':da})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Consider a data array `da` and a dataset `ds` with an aligned coord and an aligned mask.\n", "The following conditions must hold:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "assert 'x' in da['x', 0:1].coords # range slice preserves coord\n", "assert 'x' in da['x', 0:1].masks # range slice preserves coord\n", "assert 'x' in da['x', 0].attrs # point slice converts coord to attr\n", "assert 'x' not in da['x', 0].coords\n", "assert 'x' in da['x', 0].attrs\n", "assert 'x' in da['x', 0].masks # point slice preserves masks as aligned" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "assert sc.identical(ds['a']['x', 0:1], ds['x', 0:1]['a'])\n", "assert sc.identical(ds['a']['x', 0], ds['x', 0]['a'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "assert 'x' in ds['a'].coords\n", "assert 'x' in ds['x', 0:1].coords\n", "assert 'x' not in ds['x', 0].coords # cannot have attr (unaligned coord) in dataset\n", "assert 'x' in ds['x', 0:1]['a'].coords\n", "assert 'x' in ds['a']['x', 0].attrs\n", "assert 'x' in ds['x', 0]['a'].attrs\n", "\n", "assert 'x' in ds['a'].masks\n", "assert 'x' in ds['x', 0:1]['a'].masks\n", "assert 'x' in ds['a']['x', 0].masks\n", "assert 'x' in ds['x', 0]['a'].masks" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In operations, coords are compared:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "try:\n", " ok = da['x', 0:1] + da['x', 1:2]\n", "except:\n", " ok = False\n", "assert not ok" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Mismatching attrs (\"unaligned coords\") are dropped:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "assert sc.identical(da + da['x', 1], da + da['x', 1].data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Masks are ORed, there is no concept of \"unaligned masks\":" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "assert not sc.identical(da + da['x', 0], da + da['x', 0].data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A missing attr is interpreted as mismatch to ensure that:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a = da['x', 0]\n", "b = da['x', 1]\n", "c = da['x', 2]\n", "assert sc.identical(a + (b + c), (a + b) + c)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Insertion order does not matter for attrs:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a = da.copy()\n", "a.attrs['attr'] = 1.0 * sc.units.m\n", "b = da.copy()\n", "b.attrs['attr'] = 2.0 * sc.units.m\n", "ds1 = sc.Dataset()\n", "ds2 = sc.Dataset()\n", "ds1['a'] = a\n", "ds1['b'] = b\n", "ds2['b'] = b\n", "ds2['a'] = a\n", "assert sc.identical(ds1, ds2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Insert into dataset with mismatching attrs drops attr:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds = sc.Dataset()\n", "ds.coords['x'] = x['x', 0]\n", "ds['a'] = da['x', 1] # Drops 'x' from 'a' \n", "assert sc.identical(ds.coords['x'], ds['a'].coords['x']) # shadowing is NOT supported" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Masks of dataset items are independent:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds = sc.Dataset()\n", "masked1 = da.copy()\n", "masked1.masks['x'] = sc.less(x, 1 * sc.units.one)\n", "masked2 = da.copy()\n", "masked2.masks['x'] = sc.less(x, 2 * sc.units.one)\n", "assert not sc.identical(masked1, masked2)\n", "ds['a'] = masked1\n", "ds['b'] = masked2\n", "assert not sc.identical(ds['a'].masks['x'], ds['b'].masks['x'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If there is no coord it is preserved for all items.\n", "Adding a coord later makes the `meta` property invalid because of ambiguous name shadowing:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds = sc.Dataset()\n", "ds['a'] = da['x', 0]\n", "ds['b'] = da['x', 1]\n", "assert 'x' not in ds.coords\n", "assert 'x' in ds['a'].attrs\n", "assert 'x' in ds['b'].attrs\n", "ds.coords['x'] = x['x', 0] # introduce shadowing\n", "try:\n", " ds['a'].meta # raises because of shadowing\n", "except:\n", " ok = True\n", "else:\n", " ok = False\n", "assert ok\n", "del ds.coords['x']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "edges = sc.Variable(dims=['x'], values=[1,2,3,4,5])\n", "da.coords['x'] = edges\n", "assert sc.identical(sc.concatenate(da['x', :2], da['x', 2:], 'x'), da)\n", "assert sc.identical(sc.concatenate(da['x', 0], da['x', 1], 'x'), da['x', 0:2])\n", "assert sc.identical(sc.concatenate(da['x', :-1], da['x', -1], 'x'), da)\n", "da_yx = sc.concatenate(da['x', :2], da['x', 2:], 'y') # create 2-D coord\n", "assert sc.identical(da_yx.coords['x'], sc.concatenate(da.coords['x']['x', :3], da.coords['x']['x', 2:], 'y'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "2-D coords for a dimension prevent operations between slices that are not along that dimension:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "da_2d = sc.DataArray(\n", " data=sc.zeros(dims=['y', 'x'], shape=[2, 2]),\n", " coords={\n", " 'x':sc.Variable(dims=['y', 'x'], values=np.array([[1, 2], [3, 4]])),\n", " 'y':sc.Variable(dims=['y'], values=[3, 4])})\n", "\n", "da_2d['x', 0] + da_2d['x', 1] # Same as with 1-D coord: x-coord differs but not aligned due to slice.\n", "try:\n", " # 'y' sliced, so 'x' coord is aligned and yields different values from slices of 2-D coord.\n", " da_2d['y', 0] + da_2d['y', 1] \n", "except RuntimeError:\n", " ok = False\n", "else:\n", " ok = True\n", "assert not ok" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`coords` always refers to (aligned) coords in dataset, cannot add or erase via item since a new coord dict is created when getting a dataset item:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "try:\n", " ds['a'].coords['fail'] = 1.0 * sc.units.m\n", "except sc.DataArrayError:\n", " ok = False\n", "else:\n", " ok = True\n", "assert not ok\n", "assert 'fail' not in ds.coords" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds.coords['xx'] = 1.0 * sc.units.m\n", "assert 'xx' in ds['a'].coords\n", "try:\n", " del ds['a'].coords['xx']\n", "except sc.DataArrayError:\n", " ok = False\n", "else:\n", " ok = True\n", "assert not ok\n", "assert 'xx' in ds.coords" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The same mechanism applies for coords, masks, and attrs of slices:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "try:\n", " da['x', 0].coords['fail'] = 1.0 * sc.units.m\n", "except sc.DataArrayError:\n", " ok = False\n", "else:\n", " ok = True\n", "assert not ok\n", "assert 'fail' not in da.coords" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`meta` contains dataset coordinates as well as item attributes, cannot add or erase, since ambiguous:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "try:\n", " ds['a'].meta['fail'] = 1.0 * sc.units.m\n", "except sc.DataArrayError:\n", " ok = False\n", "else:\n", " ok = True\n", "assert not ok\n", "assert 'fail' not in ds['a'].meta" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds['a'].attrs['attr'] = 1.0 * sc.units.m\n", "assert 'attr' in ds['a'].meta\n", "try:\n", " del ds['a'].meta['attr']\n", "except sc.DataArrayError:\n", " ok = False\n", "else:\n", " ok = True\n", "assert not ok\n", "assert 'attr' in ds['a'].meta" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Attributes are independent for each item, and show up in `meta` of the items:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds['a'].attrs['attr'] = 1.0 * sc.units.m\n", "ds['b'].attrs['attr'] = 2.0 * sc.units.m\n", "assert 'attr' in ds['a'].meta\n", "assert 'attr' in ds['b'].meta\n", "assert 'attr' not in ds.meta\n", "assert not sc.identical(ds['a'].attrs['attr'], ds['b'].attrs['attr'])\n", "del ds['a'].attrs['attr']\n", "del ds['b'].attrs['attr']" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 4 }