Coverage for install/scipp/core/shape.py: 53%
55 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-17 01:51 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-17 01:51 +0000
1# SPDX-License-Identifier: BSD-3-Clause
2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3# @author Matthew Andrew
4# ruff: noqa: E501
6from collections.abc import Sequence
7from typing import overload
9import numpy as np
11from .._scipp import core as _cpp
12from ..typing import VariableLikeType
13from . import data_group
14from ._cpp_wrapper_util import call_func as _call_cpp_func
15from ._sizes import _parse_dims_shape_sizes
16from .concepts import transform_data
17from .cpp_classes import Variable
20@overload
21def broadcast(
22 x: VariableLikeType,
23 *,
24 dims: Sequence[str],
25 shape: Sequence[int],
26) -> VariableLikeType: ...
29@overload
30def broadcast(
31 x: VariableLikeType,
32 *,
33 sizes: dict[str, int],
34) -> VariableLikeType: ...
37def broadcast(
38 x: VariableLikeType,
39 *,
40 dims: Sequence[str] | None = None,
41 shape: Sequence[int] | None = None,
42 sizes: dict[str, int] | None = None,
43) -> VariableLikeType:
44 """Broadcast a Variable or a DataArray.
46 If the input is a DataArray, coordinates and attributes are shallow-copied
47 and masks are deep-copied.
49 Note that Scipp operations broadcast automatically, so using this function
50 directly is rarely required.
52 One and only one of these sets of arguments must be given:
54 - ``dims`` and ``shape``
55 - ``sizes``
57 Parameters
58 ----------
59 x: scipp.typing.VariableLike
60 Input data to broadcast.
61 dims:
62 List of new dimensions.
63 shape:
64 New extents in each dimension.
65 sizes:
66 New dimension labels to sizes map.
68 Returns
69 -------
70 : Same type as input
71 New Variable or DataArray with requested dimension labels and shape.
72 """
73 dims_and_shape = _parse_dims_shape_sizes(dims=dims, shape=shape, sizes=sizes)
74 dims = dims_and_shape["dims"]
75 shape = dims_and_shape["shape"]
77 def _broadcast(data: Variable) -> Variable:
78 return _call_cpp_func(_cpp.broadcast, data, dims, shape) # type: ignore[return-value]
80 return transform_data(x, _broadcast)
83def concat(x: Sequence[VariableLikeType], dim: str) -> VariableLikeType:
84 """Concatenate input arrays along the given dimension.
86 Concatenation can happen in two ways:
88 - Along an existing dimension, yielding a new dimension extent
89 given by the sum of the input's extents.
90 - Along a new dimension that is not contained in either of the inputs,
91 yielding an output with one extra dimensions.
93 In the case of a data array or dataset, the coords and masks are also
94 concatenated.
95 Coords and masks for any but the given dimension are required to match
96 and are copied to the output without changes.
98 Parameters
99 ----------
100 x: scipp.typing.VariableLike
101 Sequence of input variables, data arrays, or datasets.
102 dim:
103 Dimension along which to concatenate.
105 Returns
106 -------
107 : Same type as input
108 Concatenation of the inputs.
110 Examples
111 --------
113 >>> a = sc.arange('x', 3)
114 >>> b = 100 * sc.arange('x', 3)
115 >>> c = sc.concat([a, b], dim='x')
116 >>> c
117 <scipp.Variable> (x: 6) int64 [dimensionless] [0, 1, ..., 100, 200]
118 >>> c.values
119 array([ 0, 1, 2, 0, 100, 200])
121 >>> d = sc.concat([a, b], dim='y')
122 >>> d
123 <scipp.Variable> (y: 2, x: 3) int64 [dimensionless] [0, 1, ..., 100, 200]
124 >>> d.values
125 array([[ 0, 1, 2],
126 [ 0, 100, 200]])
128 >>> x = sc.DataArray(sc.arange('x', 3), coords={'x': sc.arange('x', 3)})
129 >>> y = sc.DataArray(100 * sc.arange('x', 3), coords={'x': 100 * sc.arange('x', 3)})
130 >>> z = sc.concat([x, y], dim='x')
131 >>> z
132 <scipp.DataArray>
133 Dimensions: Sizes[x:6, ]
134 Coordinates:
135 * x int64 [dimensionless] (x) [0, 1, ..., 100, 200]
136 Data:
137 int64 [dimensionless] (x) [0, 1, ..., 100, 200]
138 >>> z.values
139 array([ 0, 1, 2, 0, 100, 200])
140 """
141 if x and isinstance(x[0], data_group.DataGroup):
142 return data_group.apply_to_items(
143 concat,
144 x,
145 dim,
146 )
147 return _call_cpp_func(_cpp.concat, x, dim) # type: ignore[return-value]
150@overload
151def fold(
152 x: VariableLikeType,
153 dim: str,
154 *,
155 dims: Sequence[str],
156 shape: Sequence[int],
157) -> VariableLikeType: ...
160@overload
161def fold(
162 x: VariableLikeType,
163 dim: str,
164 *,
165 sizes: dict[str, int],
166) -> VariableLikeType: ...
169def fold(
170 x: VariableLikeType,
171 dim: str,
172 *,
173 dims: Sequence[str] | None = None,
174 shape: Sequence[int] | None = None,
175 sizes: dict[str, int] | None = None,
176) -> VariableLikeType:
177 """Fold a single dimension of a variable or data array into multiple dims.
179 One and only one of these sets of arguments must be given:
181 - ``dims`` and ``shape``
182 - ``sizes``
184 Parameters
185 ----------
186 x: scipp.typing.VariableLike
187 Variable or DataArray to fold.
188 dim:
189 A single dim label that will be folded into more dims.
190 sizes:
191 A dict mapping new dims to new shapes.
192 dims:
193 A list of new dims labels.
194 shape:
195 A list of new dim shapes.
197 Returns
198 -------
199 : Same type as input
200 Variable or DataArray with requested dimension labels and shape.
202 Raises
203 ------
204 scipp.DimensionError
205 If the volume of the old shape is not equal to the
206 volume of the new shape.
208 Examples
209 --------
211 >>> v = sc.arange('x', 6)
212 >>> v
213 <scipp.Variable> (x: 6) int64 [dimensionless] [0, 1, ..., 4, 5]
214 >>> sc.fold(v, dim='x', sizes={'y': 2, 'z': 3})
215 <scipp.Variable> (y: 2, z: 3) int64 [dimensionless] [0, 1, ..., 4, 5]
216 >>> sc.fold(v, dim='x', sizes={'y': 2, 'z': 3}).values
217 array([[0, 1, 2],
218 [3, 4, 5]])
220 >>> sc.fold(v, dim='x', dims=['y', 'z'], shape=[2, 3])
221 <scipp.Variable> (y: 2, z: 3) int64 [dimensionless] [0, 1, ..., 4, 5]
223 >>> sc.fold(v, dim='x', sizes={'y': 2, 'z': -1})
224 <scipp.Variable> (y: 2, z: 3) int64 [dimensionless] [0, 1, ..., 4, 5]
226 >>> a = sc.DataArray(0.1 * sc.arange('x', 6), coords={'x': sc.arange('x', 6)})
227 >>> sc.fold(a, dim='x', sizes={'y': 2, 'z': 3})
228 <scipp.DataArray>
229 Dimensions: Sizes[y:2, z:3, ]
230 Coordinates:
231 * x int64 [dimensionless] (y, z) [0, 1, ..., 4, 5]
232 Data:
233 float64 [dimensionless] (y, z) [0, 0.1, ..., 0.4, 0.5]
234 >>> sc.fold(a, dim='x', sizes={'y': 2, 'z': 3}).data.values
235 array([[0. , 0.1, 0.2],
236 [0.3, 0.4, 0.5]])
237 >>> sc.fold(a, dim='x', sizes={'y': 2, 'z': 3}).coords['x'].values
238 array([[0, 1, 2],
239 [3, 4, 5]])
240 """
241 dims_and_shape = _parse_dims_shape_sizes(dims=dims, shape=shape, sizes=sizes)
242 dims = dims_and_shape["dims"]
243 new_shape = list(dims_and_shape["shape"])
245 # Handle potential size of -1.
246 # Note that we implement this here on the Python layer, because one cannot create
247 # a C++ Dimensions object with negative sizes.
248 minus_one_count = new_shape.count(-1)
249 if minus_one_count > 1:
250 raise _cpp.DimensionError(
251 "Can only have a single -1 in the new requested shape."
252 )
253 if minus_one_count == 1:
254 if (size := x.sizes[dim]) is None:
255 raise ValueError(
256 f"Dim {dim} has inconsistent size, cannot compute final shape."
257 )
258 ind = new_shape.index(-1)
259 new_shape[ind] = 1
260 new_volume = np.prod(new_shape)
261 dim_size = int(size // new_volume)
262 if size % new_volume != 0:
263 raise ValueError(
264 f"-1 in new shape was computed to be {dim_size}, but the original "
265 f"shape {size} cannot be divided by {dim_size}."
266 )
267 new_shape[ind] = dim_size
269 return _call_cpp_func(_cpp.fold, x, dim, dims, new_shape) # type: ignore[return-value]
272def flatten(
273 x: VariableLikeType,
274 dims: Sequence[str] | None = None,
275 to: str | None = None,
276) -> VariableLikeType:
277 """Flatten multiple dimensions into a single dimension.
279 If the input has a bin-edge coordinate that cannot be joined together it will not
280 be included in the output.
282 If the input is a DataArray then coords, masks, and attrs that contain at least one
283 of the flattened dimensions will also be flattened. This implies that when
284 flattening all dims, i.e., when ``dims=None``, all coords, masks, and attrs that
285 share *some or all* dimensions with the data will be flattened.
287 Parameters
288 ----------
289 x: scipp.typing.VariableLike
290 Multi-dimensional input to flatten.
291 dims:
292 A list of dim labels that will be flattened.
293 If ``None``, all dimensions will be flattened.
294 If the list is empty, this will effectively add a new inner dimension of length
295 1 to the data (meta data such as coords and masks are not touched in this case).
296 to:
297 A single dim label for the resulting flattened dim.
299 Returns
300 -------
301 : Same type as input
302 Variable or DataArray with requested dimension labels and shape.
304 Raises
305 ------
306 scipp.DimensionError
307 If the input does not have a contiguous memory layout,
308 i.e. flattening would require moving data around.
309 This can be resolved by (deep-)copying the input.
311 Examples
312 --------
314 >>> v = sc.array(dims=['x', 'y'], values=np.arange(6).reshape(2, 3))
315 >>> v
316 <scipp.Variable> (x: 2, y: 3) int64 [dimensionless] [0, 1, ..., 4, 5]
317 >>> sc.flatten(v, to='u')
318 <scipp.Variable> (u: 6) int64 [dimensionless] [0, 1, ..., 4, 5]
319 >>> sc.flatten(v, dims=['x', 'y'], to='u')
320 <scipp.Variable> (u: 6) int64 [dimensionless] [0, 1, ..., 4, 5]
322 >>> v = sc.array(dims=['x', 'y', 'z'], values=np.arange(24).reshape(2, 3, 4))
323 >>> v
324 <scipp.Variable> (x: 2, y: 3, z: 4) int64 [dimensionless] [0, 1, ..., 22, 23]
325 >>> sc.flatten(v, to='u')
326 <scipp.Variable> (u: 24) int64 [dimensionless] [0, 1, ..., 22, 23]
327 >>> sc.flatten(v, dims=['x', 'y'], to='u')
328 <scipp.Variable> (u: 6, z: 4) int64 [dimensionless] [0, 1, ..., 22, 23]
329 >>> sc.flatten(v, dims=['y', 'z'], to='u')
330 <scipp.Variable> (x: 2, u: 12) int64 [dimensionless] [0, 1, ..., 22, 23]
332 >>> a = sc.DataArray(0.1 * sc.array(dims=['x', 'y'], values=np.arange(6).reshape(2, 3)),
333 ... coords={'x': sc.arange('x', 2),
334 ... 'y': sc.arange('y', 3),
335 ... 'xy': sc.array(dims=['x', 'y'],
336 ... values=np.arange(6).reshape(2, 3))})
337 >>> a
338 <scipp.DataArray>
339 Dimensions: Sizes[x:2, y:3, ]
340 Coordinates:
341 * x int64 [dimensionless] (x) [0, 1]
342 * xy int64 [dimensionless] (x, y) [0, 1, ..., 4, 5]
343 * y int64 [dimensionless] (y) [0, 1, 2]
344 Data:
345 float64 [dimensionless] (x, y) [0, 0.1, ..., 0.4, 0.5]
346 >>> sc.flatten(a, to='u')
347 <scipp.DataArray>
348 Dimensions: Sizes[u:6, ]
349 Coordinates:
350 * x int64 [dimensionless] (u) [0, 0, ..., 1, 1]
351 * xy int64 [dimensionless] (u) [0, 1, ..., 4, 5]
352 * y int64 [dimensionless] (u) [0, 1, ..., 1, 2]
353 Data:
354 float64 [dimensionless] (u) [0, 0.1, ..., 0.4, 0.5]
356 """
357 if to is None:
358 # Note that this is a result of the fact that we want to support
359 # calling flatten without kwargs, and that in this case it semantically
360 # makes more sense for the dims that we want to flatten to come first
361 # in the argument list.
362 raise ValueError("The final flattened dimension is required.")
363 return _call_cpp_func(_cpp.flatten, x, dims, to) # type: ignore[return-value]
366def transpose(
367 x: VariableLikeType, dims: Sequence[str] | None = None
368) -> VariableLikeType:
369 """Transpose dimensions of the input.
371 Parameters
372 ----------
373 x: scipp.typing.VariableLike
374 Object to transpose.
375 dims:
376 List of dimensions in desired order.
377 If ``None``, reverses existing order.
379 Returns
380 -------
381 : Same type as input
382 The transpose of the input.
384 Raises
385 ------
386 scipp.DimensionError
387 If ``dims`` are incompatible with the input data.
388 """
389 return _call_cpp_func(_cpp.transpose, x, dims if dims is not None else []) # type: ignore[return-value]
392def squeeze(
393 x: VariableLikeType, dim: str | Sequence[str] | None = None
394) -> VariableLikeType:
395 """Remove dimensions of length 1.
397 This is equivalent to indexing the squeezed dimensions with index 0, that is
398 ``squeeze(x, ['x', 'y'])`` is equivalent to ``x['x', 0]['y', 0]``.
400 Parameters
401 ----------
402 x: scipp.typing.VariableLike
403 Object to remove dimensions from.
404 dim:
405 If given, the dimension(s) to squeeze.
406 If ``None``, all length-1 dimensions are squeezed.
408 Returns
409 -------
410 : Same type as input
411 Input with length-1 dimensions removed.
413 Raises
414 ------
415 scipp.DimensionError
416 If a dimension in ``dim`` does not have length 1.
418 See Also
419 --------
420 scipp.Variable.squeeze, scipp.DataArray.squeeze,
421 scipp.Dataset.squeeze, numpy.squeeze
423 Examples
424 --------
426 >>> v = sc.arange('a', 3).fold('a', sizes={'x': 1, 'y': 3, 'z': 1})
427 >>> v
428 <scipp.Variable> (x: 1, y: 3, z: 1) int64 [dimensionless] [0, 1, 2]
429 >>> sc.squeeze(v)
430 <scipp.Variable> (y: 3) int64 [dimensionless] [0, 1, 2]
431 >>> sc.squeeze(v, 'z')
432 <scipp.Variable> (x: 1, y: 3) int64 [dimensionless] [0, 1, 2]
433 >>> sc.squeeze(v, ['x', 'z'])
434 <scipp.Variable> (y: 3) int64 [dimensionless] [0, 1, 2]
436 Coordinates for squeezed dimensions become unaligned:
438 >>> da = sc.DataArray(v, coords={'x': sc.arange('x', 1),
439 ... 'y': sc.arange('y', 3)})
440 >>> da
441 <scipp.DataArray>
442 Dimensions: Sizes[x:1, y:3, z:1, ]
443 Coordinates:
444 * x int64 [dimensionless] (x) [0]
445 * y int64 [dimensionless] (y) [0, 1, 2]
446 Data:
447 int64 [dimensionless] (x, y, z) [0, 1, 2]
448 >>> sc.squeeze(da)
449 <scipp.DataArray>
450 Dimensions: Sizes[y:3, ]
451 Coordinates:
452 x int64 [dimensionless] () 0
453 * y int64 [dimensionless] (y) [0, 1, 2]
454 Data:
455 int64 [dimensionless] (y) [0, 1, 2]
456 """
457 return _call_cpp_func(_cpp.squeeze, x, (dim,) if isinstance(dim, str) else dim) # type: ignore[return-value]