Download this Jupyter notebook
Concepts¶
DataArray and Dataset meta data handling¶
This section describes details about how coords (and masks) of datasets and data arrays behave when slicing, combining, or inserting.
[1]:
import numpy as np
import scipp as sc
x = sc.Variable(dims=['x'], values=[1,2,3,4])
da = sc.DataArray(data=x,
coords={'x':x},
masks={'x':sc.less(x, 2 * sc.units.one)})
ds = sc.Dataset(data={'a':da})
Consider a data array da
and a dataset ds
with an aligned coord and an aligned mask. The following conditions must hold:
[2]:
assert 'x' in da['x', 0:1].coords # range slice preserves coord
assert 'x' in da['x', 0:1].masks # range slice preserves coord
assert 'x' in da['x', 0].attrs # point slice converts coord to attr
assert 'x' not in da['x', 0].coords
assert 'x' in da['x', 0].attrs
assert 'x' in da['x', 0].masks # point slice preserves masks as aligned
[3]:
assert sc.identical(ds['a']['x', 0:1], ds['x', 0:1]['a'])
assert sc.identical(ds['a']['x', 0], ds['x', 0]['a'])
[4]:
assert 'x' in ds['a'].coords
assert 'x' in ds['x', 0:1].coords
assert 'x' not in ds['x', 0].coords # cannot have attr (unaligned coord) in dataset
assert 'x' in ds['x', 0:1]['a'].coords
assert 'x' in ds['a']['x', 0].attrs
assert 'x' in ds['x', 0]['a'].attrs
assert 'x' in ds['a'].masks
assert 'x' in ds['x', 0:1]['a'].masks
assert 'x' in ds['a']['x', 0].masks
assert 'x' in ds['x', 0]['a'].masks
In operations, coords are compared:
[5]:
try:
ok = da['x', 0:1] + da['x', 1:2]
except:
ok = False
assert not ok
Mismatching attrs (“unaligned coords”) are dropped:
[6]:
assert sc.identical(da + da['x', 1], da + da['x', 1].data)
Masks are ORed, there is no concept of “unaligned masks”:
[7]:
assert not sc.identical(da + da['x', 0], da + da['x', 0].data)
A missing attr is interpreted as mismatch to ensure that:
[8]:
a = da['x', 0]
b = da['x', 1]
c = da['x', 2]
assert sc.identical(a + (b + c), (a + b) + c)
Insertion order does not matter for attrs:
[9]:
a = da.copy()
a.attrs['attr'] = 1.0 * sc.units.m
b = da.copy()
b.attrs['attr'] = 2.0 * sc.units.m
ds1 = sc.Dataset()
ds2 = sc.Dataset()
ds1['a'] = a
ds1['b'] = b
ds2['b'] = b
ds2['a'] = a
assert sc.identical(ds1, ds2)
Insert into dataset with mismatching attrs drops attr:
[10]:
ds = sc.Dataset()
ds.coords['x'] = x['x', 0]
ds['a'] = da['x', 1] # Drops 'x' from 'a'
assert sc.identical(ds.coords['x'], ds['a'].coords['x']) # shadowing is NOT supported
Masks of dataset items are independent:
[11]:
ds = sc.Dataset()
masked1 = da.copy()
masked1.masks['x'] = sc.less(x, 1 * sc.units.one)
masked2 = da.copy()
masked2.masks['x'] = sc.less(x, 2 * sc.units.one)
assert not sc.identical(masked1, masked2)
ds['a'] = masked1
ds['b'] = masked2
assert not sc.identical(ds['a'].masks['x'], ds['b'].masks['x'])
If there is no coord it is preserved for all items. Adding a coord later makes the meta
property invalid because of ambiguous name shadowing:
[12]:
ds = sc.Dataset()
ds['a'] = da['x', 0]
ds['b'] = da['x', 1]
assert 'x' not in ds.coords
assert 'x' in ds['a'].attrs
assert 'x' in ds['b'].attrs
ds.coords['x'] = x['x', 0] # introduce shadowing
try:
ds['a'].meta # raises because of shadowing
except:
ok = True
else:
ok = False
assert ok
del ds.coords['x']
[13]:
edges = sc.Variable(dims=['x'], values=[1,2,3,4,5])
da.coords['x'] = edges
assert sc.identical(sc.concatenate(da['x', :2], da['x', 2:], 'x'), da)
assert sc.identical(sc.concatenate(da['x', 0], da['x', 1], 'x'), da['x', 0:2])
assert sc.identical(sc.concatenate(da['x', :-1], da['x', -1], 'x'), da)
da_yx = sc.concatenate(da['x', :2], da['x', 2:], 'y') # create 2-D coord
assert sc.identical(da_yx.coords['x'], sc.concatenate(da.coords['x']['x', :3], da.coords['x']['x', 2:], 'y'))
2-D coords for a dimension prevent operations between slices that are not along that dimension:
[14]:
da_2d = sc.DataArray(
data=sc.zeros(dims=['y', 'x'], shape=[2, 2]),
coords={
'x':sc.Variable(dims=['y', 'x'], values=np.array([[1, 2], [3, 4]])),
'y':sc.Variable(dims=['y'], values=[3, 4])})
da_2d['x', 0] + da_2d['x', 1] # Same as with 1-D coord: x-coord differs but not aligned due to slice.
try:
# 'y' sliced, so 'x' coord is aligned and yields different values from slices of 2-D coord.
da_2d['y', 0] + da_2d['y', 1]
except RuntimeError:
ok = False
else:
ok = True
assert not ok
coords
always refers to (aligned) coords in dataset, cannot add or erase via item since a new coord dict is created when getting a dataset item:
[15]:
try:
ds['a'].coords['fail'] = 1.0 * sc.units.m
except sc.DataArrayError:
ok = False
else:
ok = True
assert not ok
assert 'fail' not in ds.coords
[16]:
ds.coords['xx'] = 1.0 * sc.units.m
assert 'xx' in ds['a'].coords
try:
del ds['a'].coords['xx']
except sc.DataArrayError:
ok = False
else:
ok = True
assert not ok
assert 'xx' in ds.coords
The same mechanism applies for coords, masks, and attrs of slices:
[17]:
try:
da['x', 0].coords['fail'] = 1.0 * sc.units.m
except sc.DataArrayError:
ok = False
else:
ok = True
assert not ok
assert 'fail' not in da.coords
meta
contains dataset coordinates as well as item attributes, cannot add or erase, since ambiguous:
[18]:
try:
ds['a'].meta['fail'] = 1.0 * sc.units.m
except sc.DataArrayError:
ok = False
else:
ok = True
assert not ok
assert 'fail' not in ds['a'].meta
[19]:
ds['a'].attrs['attr'] = 1.0 * sc.units.m
assert 'attr' in ds['a'].meta
try:
del ds['a'].meta['attr']
except sc.DataArrayError:
ok = False
else:
ok = True
assert not ok
assert 'attr' in ds['a'].meta
Attributes are independent for each item, and show up in meta
of the items:
[20]:
ds['a'].attrs['attr'] = 1.0 * sc.units.m
ds['b'].attrs['attr'] = 2.0 * sc.units.m
assert 'attr' in ds['a'].meta
assert 'attr' in ds['b'].meta
assert 'attr' not in ds.meta
assert not sc.identical(ds['a'].attrs['attr'], ds['b'].attrs['attr'])
del ds['a'].attrs['attr']
del ds['b'].attrs['attr']