Coverage for install/scipp/core/shape.py: 62%
40 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-04-28 01:28 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-04-28 01:28 +0000
1# SPDX-License-Identifier: BSD-3-Clause
2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3# @author Matthew Andrew
4# ruff: noqa: E501
6from typing import Dict, List, Optional, Sequence, Tuple, Union
8import numpy as np
10from .._scipp import core as _cpp
11from ..typing import VariableLikeType
12from . import data_group
13from ._cpp_wrapper_util import call_func as _call_cpp_func
14from ._sizes import _parse_dims_shape_sizes
15from .concepts import transform_data
18def broadcast(
19 x: VariableLikeType,
20 dims: Optional[Union[List[str], Tuple[str, ...]]] = None,
21 shape: Optional[Sequence[int]] = None,
22 sizes: Optional[Dict[str, int]] = None,
23) -> VariableLikeType:
24 """Broadcast a Variable or a DataArray.
26 If the input is a DataArray, coordinates and attributes are shallow-copied
27 and masks are deep-copied.
29 Note that Scipp operations broadcast automatically, so using this function
30 directly is rarely required.
32 One and only one of these sets of arguments must be given:
34 - ``dims`` and ``shape``
35 - ``sizes``
37 Parameters
38 ----------
39 x: scipp.typing.VariableLike
40 Input data to broadcast.
41 dims:
42 List of new dimensions.
43 shape:
44 New extents in each dimension.
45 sizes:
46 New dimension labels to sizes map.
48 Returns
49 -------
50 : Same type as input
51 New Variable or DataArray with requested dimension labels and shape.
52 """
53 sizes = _parse_dims_shape_sizes(dims=dims, shape=shape, sizes=sizes)
55 def _broadcast(x):
56 return _call_cpp_func(_cpp.broadcast, x, sizes["dims"], sizes["shape"])
58 return transform_data(x, _broadcast)
61def concat(x: Sequence[VariableLikeType], dim: str) -> VariableLikeType:
62 """Concatenate input arrays along the given dimension.
64 Concatenation can happen in two ways:
66 - Along an existing dimension, yielding a new dimension extent
67 given by the sum of the input's extents.
68 - Along a new dimension that is not contained in either of the inputs,
69 yielding an output with one extra dimensions.
71 In the case of a data array or dataset, the coords and masks are also
72 concatenated.
73 Coords and masks for any but the given dimension are required to match
74 and are copied to the output without changes.
76 Parameters
77 ----------
78 x: scipp.typing.VariableLike
79 Sequence of input variables, data arrays, or datasets.
80 dim:
81 Dimension along which to concatenate.
83 Returns
84 -------
85 : Same type as input
86 Concatenation of the inputs.
88 Examples
89 --------
91 >>> a = sc.arange('x', 3)
92 >>> b = 100 * sc.arange('x', 3)
93 >>> c = sc.concat([a, b], dim='x')
94 >>> c
95 <scipp.Variable> (x: 6) int64 [dimensionless] [0, 1, ..., 100, 200]
96 >>> c.values
97 array([ 0, 1, 2, 0, 100, 200])
99 >>> d = sc.concat([a, b], dim='y')
100 >>> d
101 <scipp.Variable> (y: 2, x: 3) int64 [dimensionless] [0, 1, ..., 100, 200]
102 >>> d.values
103 array([[ 0, 1, 2],
104 [ 0, 100, 200]])
106 >>> x = sc.DataArray(sc.arange('x', 3), coords={'x': sc.arange('x', 3)})
107 >>> y = sc.DataArray(100 * sc.arange('x', 3), coords={'x': 100 * sc.arange('x', 3)})
108 >>> z = sc.concat([x, y], dim='x')
109 >>> z
110 <scipp.DataArray>
111 Dimensions: Sizes[x:6, ]
112 Coordinates:
113 * x int64 [dimensionless] (x) [0, 1, ..., 100, 200]
114 Data:
115 int64 [dimensionless] (x) [0, 1, ..., 100, 200]
116 >>> z.values
117 array([ 0, 1, 2, 0, 100, 200])
118 """
119 if x and isinstance(x[0], data_group.DataGroup):
120 return data_group._apply_to_items(concat, x, dim)
121 return _call_cpp_func(_cpp.concat, x, dim)
124def fold(
125 x: VariableLikeType,
126 dim: str,
127 sizes: Optional[Dict[str, int]] = None,
128 dims: Optional[Union[List[str], Tuple[str, ...]]] = None,
129 shape: Optional[Sequence[int]] = None,
130) -> VariableLikeType:
131 """Fold a single dimension of a variable or data array into multiple dims.
133 One and only one of these sets of arguments must be given:
135 - ``dims`` and ``shape``
136 - ``sizes``
138 Parameters
139 ----------
140 x: scipp.typing.VariableLike
141 Variable or DataArray to fold.
142 dim:
143 A single dim label that will be folded into more dims.
144 sizes:
145 A dict mapping new dims to new shapes.
146 dims:
147 A list of new dims labels.
148 shape:
149 A list of new dim shapes.
151 Returns
152 -------
153 : Same type as input
154 Variable or DataArray with requested dimension labels and shape.
156 Raises
157 ------
158 scipp.DimensionError
159 If the volume of the old shape is not equal to the
160 volume of the new shape.
162 Examples
163 --------
165 >>> v = sc.arange('x', 6)
166 >>> v
167 <scipp.Variable> (x: 6) int64 [dimensionless] [0, 1, ..., 4, 5]
168 >>> sc.fold(v, dim='x', sizes={'y': 2, 'z': 3})
169 <scipp.Variable> (y: 2, z: 3) int64 [dimensionless] [0, 1, ..., 4, 5]
170 >>> sc.fold(v, dim='x', sizes={'y': 2, 'z': 3}).values
171 array([[0, 1, 2],
172 [3, 4, 5]])
174 >>> sc.fold(v, dim='x', dims=['y', 'z'], shape=[2, 3])
175 <scipp.Variable> (y: 2, z: 3) int64 [dimensionless] [0, 1, ..., 4, 5]
177 >>> sc.fold(v, dim='x', sizes={'y': 2, 'z': -1})
178 <scipp.Variable> (y: 2, z: 3) int64 [dimensionless] [0, 1, ..., 4, 5]
180 >>> a = sc.DataArray(0.1 * sc.arange('x', 6), coords={'x': sc.arange('x', 6)})
181 >>> sc.fold(a, dim='x', sizes={'y': 2, 'z': 3})
182 <scipp.DataArray>
183 Dimensions: Sizes[y:2, z:3, ]
184 Coordinates:
185 * x int64 [dimensionless] (y, z) [0, 1, ..., 4, 5]
186 Data:
187 float64 [dimensionless] (y, z) [0, 0.1, ..., 0.4, 0.5]
188 >>> sc.fold(a, dim='x', sizes={'y': 2, 'z': 3}).data.values
189 array([[0. , 0.1, 0.2],
190 [0.3, 0.4, 0.5]])
191 >>> sc.fold(a, dim='x', sizes={'y': 2, 'z': 3}).coords['x'].values
192 array([[0, 1, 2],
193 [3, 4, 5]])
194 """
195 sizes = _parse_dims_shape_sizes(dims=dims, shape=shape, sizes=sizes)
197 # Handle potential size of -1.
198 # Note that we implement this here on the Python layer, because one cannot create
199 # a C++ Dimensions object with negative sizes.
200 new_shape = sizes["shape"]
201 minus_one_count = new_shape.count(-1)
202 if minus_one_count > 1:
203 raise _cpp.DimensionError(
204 "Can only have a single -1 in the new requested shape."
205 )
206 if minus_one_count == 1:
207 ind = new_shape.index(-1)
208 new_shape[ind] = 1
209 new_volume = np.prod(new_shape)
210 dim_size = x.sizes[dim] // new_volume
211 if x.sizes[dim] % new_volume != 0:
212 raise ValueError(
213 "-1 in new shape was computed to be {}, but the original "
214 "shape {} cannot be divided by {}.".format(
215 dim_size, x.sizes[dim], dim_size
216 )
217 )
218 new_shape[ind] = dim_size
220 return _call_cpp_func(_cpp.fold, x, dim, sizes["dims"], new_shape)
223def flatten(
224 x: VariableLikeType,
225 dims: Optional[Union[List[str], Tuple[str, ...]]] = None,
226 to: Optional[str] = None,
227) -> VariableLikeType:
228 """Flatten multiple dimensions into a single dimension.
230 If the input has a bin-edge coordinate that cannot be joined together it will not
231 be included in the output.
233 If the input is a DataArray then coords, masks, and attrs that contain at least one
234 of the flattened dimensions will also be flattened. This implies that when
235 flattening all dims, i.e., when ``dims=None``, all coords, masks, and attrs that
236 share *some or all* dimensions with the data will be flattened.
238 Parameters
239 ----------
240 x: scipp.typing.VariableLike
241 Multi-dimensional input to flatten.
242 dims:
243 A list of dim labels that will be flattened.
244 If ``None``, all dimensions will be flattened.
245 If the list is empty, this will effectively add a new inner dimension of length
246 1 to the data (meta data such as coords and masks are not touched in this case).
247 to:
248 A single dim label for the resulting flattened dim.
250 Returns
251 -------
252 : Same type as input
253 Variable or DataArray with requested dimension labels and shape.
255 Raises
256 ------
257 scipp.DimensionError
258 If the input does not have a contiguous memory layout,
259 i.e. flattening would require moving data around.
260 This can be resolved by (deep-)copying the input.
262 Examples
263 --------
265 >>> v = sc.array(dims=['x', 'y'], values=np.arange(6).reshape(2, 3))
266 >>> v
267 <scipp.Variable> (x: 2, y: 3) int64 [dimensionless] [0, 1, ..., 4, 5]
268 >>> sc.flatten(v, to='u')
269 <scipp.Variable> (u: 6) int64 [dimensionless] [0, 1, ..., 4, 5]
270 >>> sc.flatten(v, dims=['x', 'y'], to='u')
271 <scipp.Variable> (u: 6) int64 [dimensionless] [0, 1, ..., 4, 5]
273 >>> v = sc.array(dims=['x', 'y', 'z'], values=np.arange(24).reshape(2, 3, 4))
274 >>> v
275 <scipp.Variable> (x: 2, y: 3, z: 4) int64 [dimensionless] [0, 1, ..., 22, 23]
276 >>> sc.flatten(v, to='u')
277 <scipp.Variable> (u: 24) int64 [dimensionless] [0, 1, ..., 22, 23]
278 >>> sc.flatten(v, dims=['x', 'y'], to='u')
279 <scipp.Variable> (u: 6, z: 4) int64 [dimensionless] [0, 1, ..., 22, 23]
280 >>> sc.flatten(v, dims=['y', 'z'], to='u')
281 <scipp.Variable> (x: 2, u: 12) int64 [dimensionless] [0, 1, ..., 22, 23]
283 >>> a = sc.DataArray(0.1 * sc.array(dims=['x', 'y'], values=np.arange(6).reshape(2, 3)),
284 ... coords={'x': sc.arange('x', 2),
285 ... 'y': sc.arange('y', 3),
286 ... 'xy': sc.array(dims=['x', 'y'],
287 ... values=np.arange(6).reshape(2, 3))})
288 >>> a
289 <scipp.DataArray>
290 Dimensions: Sizes[x:2, y:3, ]
291 Coordinates:
292 * x int64 [dimensionless] (x) [0, 1]
293 * xy int64 [dimensionless] (x, y) [0, 1, ..., 4, 5]
294 * y int64 [dimensionless] (y) [0, 1, 2]
295 Data:
296 float64 [dimensionless] (x, y) [0, 0.1, ..., 0.4, 0.5]
297 >>> sc.flatten(a, to='u')
298 <scipp.DataArray>
299 Dimensions: Sizes[u:6, ]
300 Coordinates:
301 * x int64 [dimensionless] (u) [0, 0, ..., 1, 1]
302 * xy int64 [dimensionless] (u) [0, 1, ..., 4, 5]
303 * y int64 [dimensionless] (u) [0, 1, ..., 1, 2]
304 Data:
305 float64 [dimensionless] (u) [0, 0.1, ..., 0.4, 0.5]
307 """
308 if to is None:
309 # Note that this is a result of the fact that we want to support
310 # calling flatten without kwargs, and that in this case it semantically
311 # makes more sense for the dims that we want to flatten to come first
312 # in the argument list.
313 raise ValueError("The final flattened dimension is required.")
314 return _call_cpp_func(_cpp.flatten, x, dims, to)
317def transpose(
318 x: VariableLikeType, dims: Optional[Union[List[str], Tuple[str, ...]]] = None
319) -> VariableLikeType:
320 """Transpose dimensions of the input.
322 Parameters
323 ----------
324 x: scipp.typing.VariableLike
325 Object to transpose.
326 dims:
327 List of dimensions in desired order.
328 If ``None``, reverses existing order.
330 Returns
331 -------
332 : Same type as input
333 The transpose of the input.
335 Raises
336 ------
337 scipp.DimensionError
338 If ``dims`` are incompatible with the input data.
339 """
340 return _call_cpp_func(_cpp.transpose, x, dims if dims is not None else [])
343def squeeze(
344 x: VariableLikeType, dim: Optional[Union[str, List[str], Tuple[str, ...]]] = None
345) -> VariableLikeType:
346 """Remove dimensions of length 1.
348 This is equivalent to indexing the squeezed dimensions with index 0, that is
349 ``squeeze(x, ['x', 'y'])`` is equivalent to ``x['x', 0]['y', 0]``.
351 Parameters
352 ----------
353 x: scipp.typing.VariableLike
354 Object to remove dimensions from.
355 dim:
356 If given, the dimension(s) to squeeze.
357 If ``None``, all length-1 dimensions are squeezed.
359 Returns
360 -------
361 : Same type as input
362 Input with length-1 dimensions removed.
364 Raises
365 ------
366 scipp.DimensionError
367 If a dimension in ``dim`` does not have length 1.
369 See Also
370 --------
371 scipp.Variable.squeeze, scipp.DataArray.squeeze,
372 scipp.Dataset.squeeze, numpy.squeeze
374 Examples
375 --------
377 >>> v = sc.arange('a', 3).fold('a', {'x': 1, 'y': 3, 'z': 1})
378 >>> v
379 <scipp.Variable> (x: 1, y: 3, z: 1) int64 [dimensionless] [0, 1, 2]
380 >>> sc.squeeze(v)
381 <scipp.Variable> (y: 3) int64 [dimensionless] [0, 1, 2]
382 >>> sc.squeeze(v, 'z')
383 <scipp.Variable> (x: 1, y: 3) int64 [dimensionless] [0, 1, 2]
384 >>> sc.squeeze(v, ['x', 'z'])
385 <scipp.Variable> (y: 3) int64 [dimensionless] [0, 1, 2]
387 Coordinates for squeezed dimensions become unaligned:
389 >>> da = sc.DataArray(v, coords={'x': sc.arange('x', 1),
390 ... 'y': sc.arange('y', 3)})
391 >>> da
392 <scipp.DataArray>
393 Dimensions: Sizes[x:1, y:3, z:1, ]
394 Coordinates:
395 * x int64 [dimensionless] (x) [0]
396 * y int64 [dimensionless] (y) [0, 1, 2]
397 Data:
398 int64 [dimensionless] (x, y, z) [0, 1, 2]
399 >>> sc.squeeze(da)
400 <scipp.DataArray>
401 Dimensions: Sizes[y:3, ]
402 Coordinates:
403 x int64 [dimensionless] () 0
404 * y int64 [dimensionless] (y) [0, 1, 2]
405 Data:
406 int64 [dimensionless] (y) [0, 1, 2]
407 """
408 return _call_cpp_func(_cpp.squeeze, x, (dim,) if isinstance(dim, str) else dim)