Coverage for install/scipp/core/binning.py: 71%
224 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-04-28 01:28 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-04-28 01:28 +0000
1# SPDX-License-Identifier: BSD-3-Clause
2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3# @author Simon Heybrock
4import itertools
5import warnings
6from numbers import Integral
7from typing import Dict, Optional, Sequence, Union, overload
9from .._scipp import core as _cpp
10from .bin_remapping import combine_bins
11from .cpp_classes import BinEdgeError, CoordError, DataArray, Dataset, DType, Variable
12from .data_group import DataGroup, data_group_overload
13from .math import round as round_
14from .shape import concat
15from .util import VisibleDeprecationWarning
16from .variable import arange, array, epoch, linspace, scalar
19@overload
20def make_histogrammed(
21 x: Union[Variable, DataArray], *, edges: Variable
22) -> DataArray: ...
25@overload
26def make_histogrammed(x: Dataset, *, edges: Variable) -> Dataset: ...
29def make_histogrammed(x, *, edges):
30 """Create dense data by histogramming data into given bins.
32 If the input is binned data, then existing binning dimensions are preserved.
33 Histogramming along an existing binned dimension will replace this binning.
35 Usually :py:func:`scipp.hist` should be preferred.
37 Parameters
38 ----------
39 x:
40 Input data.
41 edges:
42 Bin edges. If these have more than one dimension, binning occurs along
43 the inner dimension.
45 Returns
46 -------
47 :
48 DataArray / Dataset with values equal to the sum
49 of values in each given bin.
51 See Also
52 --------
53 scipp.hist:
54 Recommended interface for histogramming data.
55 scipp.bin:
56 For binning data.
57 """
58 if isinstance(x, Variable):
59 data = scalar(1.0, unit='counts').broadcast(sizes=x.sizes)
60 x = DataArray(data, coords={edges.dim: x})
61 elif isinstance(x, DataArray) and x.bins is not None:
62 dim = edges.dims[-1]
63 if dim not in x.bins.coords:
64 if x.coords.is_edges(dim):
65 raise BinEdgeError(
66 "Cannot histogram data with existing bin edges "
67 "unless event data coordinate for histogramming is available."
68 )
69 return make_histogrammed(x.bins.sum(), edges=edges)
70 return _cpp.histogram(x, edges)
73def make_binned(
74 x: Union[Variable, DataArray],
75 *,
76 edges: Optional[Sequence[Variable]] = None,
77 groups: Optional[Sequence[Variable]] = None,
78 erase: Optional[Sequence[str]] = None,
79) -> DataArray:
80 """Create binned data by binning input along all dimensions given by edges or
81 groups.
83 Usually :py:func:`scipp.bin` or :py:func:`scipp.group` should be preferred,
84 unless the more precise control over which dimensions should be erased is required,
85 or unless grouping and binning at the same time is required.
87 This does not histogram the data, each output bin will contain a "list" of
88 input values.
90 At least one argument of ``edges`` and ``groups`` is required.
92 If the input is binned and certain bins are masked then changing the binning
93 will apply the masks, i.e., masked bins are treated as empty.
95 Warning
96 -------
98 When there is existing binning or grouping, the algorithm assumes that coordinates
99 of the binned data are correct, i.e., compatible with the corresponding
100 coordinate values in the individual bins. If this is not the case then the behavior
101 if UNSPECIFIED. That is, the algorithm may or may not ignore the existing
102 coordinates. If you encounter such as case, remove the conflicting coordinate,
103 e.g., using :py:func:`scipp.DataArray.drop_coords`.
105 Parameters
106 ----------
107 x:
108 Input data.
109 edges:
110 Bin edges, one per dimension to bin in.
111 groups:
112 Keys to group input by one per dimension to group in.
113 erase:
114 Dimension labels to remove from output.
116 Returns
117 -------
118 :
119 Binned ``x``.
121 See Also
122 --------
123 scipp.hist:
124 For histogramming data.
125 scipp.bin:
126 Recommended interface for binning data.
127 scipp.group:
128 Recommended interface for grouping data.
129 scipp.bins:
130 For creating binned data based on explicitly given index ranges.
131 """
132 if erase is None:
133 erase = []
134 if groups is None:
135 groups = []
136 if edges is None:
137 edges = []
138 if isinstance(x, Variable) and x.bins is not None:
139 x = DataArray(x)
140 elif isinstance(x, Variable):
141 coords = [*edges, *groups]
142 if len(coords) != 1:
143 raise ValueError(
144 "Edges for exactly one dimension must be specified when "
145 "binning or histogramming a variable."
146 )
147 data = scalar(1.0, unit='counts').broadcast(sizes=x.sizes).copy()
148 x = DataArray(data, coords={coords[0].dim: x})
149 if _can_operate_on_bins(x, edges, groups, erase):
150 return combine_bins(x, edges=edges, groups=groups, dim=erase)
151 return _cpp.bin(x, edges, groups, erase)
154def _can_operate_on_bins(x, edges, groups, erase) -> bool:
155 if x.bins is None:
156 return False
157 dims = []
158 for coord in itertools.chain(edges, groups):
159 if coord.ndim != 1:
160 return False
161 if coord.dim in x.bins.coords:
162 return False
163 if coord.dim not in x.coords:
164 return False
165 dims += x.coords[coord.dim].dims
166 return set(dims) <= set(erase)
169def _require_coord(name, coord):
170 if coord is None:
171 raise CoordError(f"Coordinate '{name}' not found.")
174def _get_coord(x, name):
175 if isinstance(x, Variable):
176 return x
177 if isinstance(x, Dataset):
178 cmin = None
179 cmax = None
180 for da in x.values():
181 c = _get_coord(da, name)
182 cmin = c.min() if cmin is None else min(cmin, c.min())
183 cmax = c.max() if cmax is None else max(cmin, c.max())
184 coord = concat([cmin, cmax], dim='dummy')
185 else:
186 event_coord = x.bins.deprecated_meta.get(name) if x.bins is not None else None
187 coord = x.deprecated_meta.get(name, event_coord)
188 _require_coord(name, coord)
189 return coord
192def _upper_bound(x: Variable) -> Variable:
193 import numpy as np
195 bound = x.max()
196 if bound.dtype in ('int32', 'int64', 'datetime64'):
197 bound.value += 1
198 else:
199 bound.value = np.nextafter(
200 bound.value, (bound + scalar(1, unit=bound.unit, dtype=bound.dtype)).value
201 )
202 return bound
205def _parse_coords_arg(
206 x: Union[Variable, DataArray, Dataset], name: str, arg: Union[int, Variable]
207) -> Variable:
208 if isinstance(arg, Variable) and name in arg.dims:
209 return arg
210 coord = _get_coord(x, name)
211 start = coord.min()
212 if (
213 not isinstance(x, Variable)
214 and (name in x.coords)
215 and x.coords.is_edges(name, name)
216 ):
217 stop = coord.max() # existing bin-edges, do not extend
218 else:
219 stop = _upper_bound(coord)
220 if start > stop:
221 raise ValueError(
222 (
223 'Empty data range, cannot automatically determine bounds. '
224 'Must provide concrete bin edges.'
225 )
226 )
227 if isinstance(arg, Integral):
228 if start.dtype == DType.datetime64:
229 base = epoch(unit=start.unit)
230 return base + round_(
231 linspace(name, start - base, stop - base, num=arg + 1)
232 ).to(dtype='int64')
233 return linspace(name, start, stop, num=arg + 1).to(
234 dtype=start.dtype, copy=False
235 )
236 step = arg.to(dtype=start.dtype, unit=start.unit)
237 if step.value == 0:
238 raise ValueError("Step size cannot be 0.")
239 return arange(name, start, stop + step, step=step)
242def _make_edges(
243 x: Union[Variable, DataArray, Dataset],
244 arg_dict: Optional[Dict[str, Union[int, Variable]]],
245 kwargs: Dict[str, Union[int, Variable]],
246) -> Dict[str, Variable]:
247 if arg_dict is not None:
248 kwargs = dict(**arg_dict, **kwargs)
249 return {name: _parse_coords_arg(x, name, arg) for name, arg in kwargs.items()}
252def _find_replaced_dims(x, dims):
253 if x.bins is None:
254 return []
255 erase = set()
256 for dim in dims:
257 if (coord := x.coords.get(dim)) is not None:
258 if dim not in coord.dims:
259 erase = erase.union(coord.dims)
260 return [dim for dim in erase if dim not in dims]
263@overload
264def hist(
265 x: Union[Variable, DataArray],
266 arg_dict: Optional[Dict[str, Union[int, Variable]]] = None,
267 /,
268 **kwargs: Union[int, Variable],
269) -> DataArray: ...
272@overload
273def hist(
274 x: Dataset,
275 arg_dict: Optional[Dict[str, Union[int, Variable]]] = None,
276 /,
277 **kwargs: Union[int, Variable],
278) -> Dataset: ...
281@overload
282def hist(
283 x: DataGroup,
284 arg_dict: Optional[Dict[str, Union[int, Variable]]] = None,
285 /,
286 **kwargs: Union[int, Variable],
287) -> DataGroup: ...
290@data_group_overload
291def hist(x, arg_dict=None, /, **kwargs):
292 """Compute a histogram.
294 Bin edges can be specified in three ways:
296 1. When an integer is provided, a 'linspace' with this requested number of
297 bins is created, based on the min and max of the corresponding coordinate.
298 2. A scalar Scipp variable (a value with a unit) is interpreted as a target
299 bin width, and an 'arange' covering the min and max of the corresponding
300 coordinate is created.
301 3. A custom coordinate, given as a Scipp variable with compatible unit.
302 Typically this should have a single dimension matching the target dimension.
304 When histogramming a dimension with an existing dimension-coord, the binning for
305 the dimension is modified, i.e., the input and the output will have the same
306 dimension labels.
308 When histogramming by non-dimension-coords, the output will have new dimensions
309 given by the names of these coordinates. These new dimensions replace the
310 dimensions the input coordinates depend on.
312 Parameters
313 ----------
314 x:
315 Input data.
316 arg_dict:
317 Dictionary mapping dimension labels to binning parameters.
318 **kwargs:
319 Mapping of dimension label to corresponding binning parameters.
321 Returns
322 -------
323 :
324 Histogrammed data.
326 See Also
327 --------
328 scipp.bin:
329 Creating binned data by binning instead of summing all contributions.
330 scipp.binning.make_histogrammed:
331 Lower level function for histogramming that does not automatically
332 replace/erase dimensions.
334 Examples
335 --------
337 Histogram a table by one of its coord columns, specifying (1) number of bins, (2)
338 bin width, or (3) actual binning:
340 >>> from numpy.random import default_rng
341 >>> rng = default_rng(seed=1234)
342 >>> x = sc.array(dims=['row'], unit='m', values=rng.random(100))
343 >>> y = sc.array(dims=['row'], unit='m', values=rng.random(100))
344 >>> data = sc.ones(dims=['row'], unit='K', shape=[100])
345 >>> table = sc.DataArray(data=data, coords={'x': x, 'y': y})
346 >>> table.hist(x=2)
347 <scipp.DataArray>
348 Dimensions: Sizes[x:2, ]
349 Coordinates:
350 * x float64 [m] (x [bin-edge]) [0.00313229, 0.497696, 0.992259]
351 Data:
352 float64 [K] (x) [53, 47]
354 >>> table.hist(x=sc.scalar(0.2, unit='m')).sizes
355 {'x': 5}
357 >>> table.hist(x=sc.linspace('x', 0.2, 0.8, num=10, unit='m')).sizes
358 {'x': 9}
360 Histogram a table by two of its coord columns:
362 >>> table.hist(x=4, y=6).sizes
363 {'x': 4, 'y': 6}
365 Histogram binned data, using existing bins:
367 >>> binned = table.bin(x=10)
368 >>> binned.hist().sizes
369 {'x': 10}
371 Histogram binned data, using new bins along existing dimension:
373 >>> binned = table.bin(x=10)
374 >>> binned.hist(x=20).sizes
375 {'x': 20}
377 Histogram binned data along an additional dimension:
379 >>> binned = table.bin(x=10)
380 >>> binned.hist(y=5).sizes
381 {'x': 10, 'y': 5}
382 """ # noqa: E501
383 edges = _make_edges(x, arg_dict, kwargs)
384 erase = _find_replaced_dims(x, edges)
385 if isinstance(x, Variable) and len(edges) != 1:
386 raise ValueError(
387 "Edges for exactly one dimension must be specified when "
388 "binning or histogramming a variable."
389 )
390 if len(edges) == 0:
391 if x.bins is None:
392 raise TypeError("Data is not binned so bin edges must be provided.")
393 return x.bins.sum()
394 if len(edges) == 1:
395 # TODO Note that this may swap dims, is that ok?
396 out = make_histogrammed(x, edges=next(iter(edges.values())))
397 else:
398 edges = list(edges.values())
399 # If histogramming by the final edges needs to use a non-event coord then we
400 # must not erase that dim, since it removes the coord required for histogramming
401 if isinstance(x, DataArray) and x.bins is not None:
402 hist_dim = edges[-1].dims[-1]
403 if hist_dim not in x.bins.coords:
404 hist_coord_dim = x.coords[hist_dim].dims[-1]
405 erase = [e for e in erase if e != hist_coord_dim]
406 out = make_histogrammed(
407 make_binned(x, edges=edges[:-1], erase=erase), edges=edges[-1]
408 )
409 for dim in erase:
410 if dim in out.dims:
411 out = out.sum(dim)
412 return out
415@overload
416def nanhist(
417 x: Union[Variable, DataArray],
418 arg_dict: Optional[Dict[str, Union[int, Variable]]] = None,
419 /,
420 **kwargs: Union[int, Variable],
421) -> DataArray: ...
424@overload
425def nanhist(
426 x: DataGroup,
427 arg_dict: Optional[Dict[str, Union[int, Variable]]] = None,
428 /,
429 **kwargs: Union[int, Variable],
430) -> DataGroup: ...
433@data_group_overload
434def nanhist(x, arg_dict=None, /, **kwargs):
435 """Compute a histogram, skipping NaN values.
437 Like :py:func:`scipp.hist`, but NaN values are skipped. See there for details and
438 examples.
440 Parameters
441 ----------
442 x:
443 Input data.
444 arg_dict:
445 Dictionary mapping dimension labels to binning parameters.
446 **kwargs:
447 Mapping of dimension label to corresponding binning parameters.
449 Returns
450 -------
451 :
452 Histogrammed data.
453 """
454 edges = _make_edges(x, arg_dict, kwargs)
455 if len(edges) > 0:
456 x = x.bin(edges)
457 if x.bins is None:
458 raise TypeError("Data is not binned so bin edges must be provided.")
459 return x.bins.nansum()
462@overload
463def bin(
464 x: Union[Variable, DataArray],
465 arg_dict: Optional[Dict[str, Union[int, Variable]]] = None,
466 /,
467 **kwargs: Union[int, Variable],
468) -> DataArray: ...
471@overload
472def bin(
473 x: DataGroup,
474 arg_dict: Optional[Dict[str, Union[int, Variable]]] = None,
475 /,
476 **kwargs: Union[int, Variable],
477) -> DataGroup: ...
480@data_group_overload
481def bin(x, arg_dict=None, /, **kwargs):
482 """Create binned data by binning input along all dimensions given by edges.
484 Bin edges can be specified in three ways:
486 1. When an integer is provided, a 'linspace' with this requested number of
487 bins is created, based on the min and max of the corresponding coordinate.
488 2. A scalar Scipp variable (a value with a unit) is interpreted as a target
489 bin width, and an 'arange' covering the min and max of the corresponding
490 coordinate is created.
491 3. A custom coordinate, given as a Scipp variable with compatible unit.
492 Typically, this should have a single dimension matching the target dimension.
494 When binning a dimension with an existing dimension-coord, the binning for
495 the dimension is modified, i.e., the input and the output will have the same
496 dimension labels.
498 When binning by non-dimension-coords, the output will have new dimensions
499 given by the names of these coordinates. These new dimensions replace the
500 dimensions the input coordinates depend on.
502 Warning
503 -------
505 When there is existing binning or grouping, the algorithm assumes that coordinates
506 of the binned data are correct, i.e., compatible with the corresponding
507 coordinate values in the individual bins. If this is not the case then the behavior
508 if UNSPECIFIED. That is, the algorithm may or may not ignore the existing
509 coordinates. If you encounter such as case, remove the conflicting coordinate,
510 e.g., using :py:func:`scipp.DataArray.drop_coords`.
512 Parameters
513 ----------
514 x:
515 Input data.
516 arg_dict:
517 Dictionary mapping dimension labels to binning parameters.
518 **kwargs:
519 Mapping of dimension label to corresponding binning parameters.
521 Returns
522 -------
523 :
524 Binned data.
526 See Also
527 --------
528 scipp.hist:
529 For histogramming data.
530 scipp.group:
531 Creating binned data by grouping, instead of binning based on edges.
532 scipp.binning.make_binned:
533 Lower level function that can bin and group, and does not automatically
534 replace/erase dimensions.
536 Examples
537 --------
539 Bin a table by one of its coord columns, specifying (1) number of bins, (2)
540 bin width, or (3) actual binning:
542 >>> from numpy.random import default_rng
543 >>> rng = default_rng(seed=1234)
544 >>> x = sc.array(dims=['row'], unit='m', values=rng.random(100))
545 >>> y = sc.array(dims=['row'], unit='m', values=rng.random(100))
546 >>> data = sc.ones(dims=['row'], unit='K', shape=[100])
547 >>> table = sc.DataArray(data=data, coords={'x': x, 'y': y})
548 >>> table.bin(x=2).sizes
549 {'x': 2}
551 >>> table.bin(x=sc.scalar(0.2, unit='m')).sizes
552 {'x': 5}
554 >>> table.bin(x=sc.linspace('x', 0.2, 0.8, num=10, unit='m')).sizes
555 {'x': 9}
557 Bin a table by two of its coord columns:
559 >>> table.bin(x=4, y=6).sizes
560 {'x': 4, 'y': 6}
562 Bin binned data, using new bins along existing dimension:
564 >>> binned = table.bin(x=10)
565 >>> binned.bin(x=20).sizes
566 {'x': 20}
568 Bin binned data along an additional dimension:
570 >>> binned = table.bin(x=10)
571 >>> binned.bin(y=5).sizes
572 {'x': 10, 'y': 5}
573 """
574 if arg_dict is None:
575 for name, item in kwargs.items():
576 if name in ('edges', 'groups', 'erase') and isinstance(item, list):
577 warnings.warn(
578 "The 'edges', 'groups', and 'erase' keyword arguments "
579 "are deprecated. Use, e.g., 'sc.bin(da, x=x_edges)' or "
580 "'sc.group(da, groups)'. See the documentation for details.",
581 UserWarning,
582 stacklevel=2,
583 )
584 return make_binned(x, **kwargs)
585 edges = _make_edges(x, arg_dict, kwargs)
586 erase = _find_replaced_dims(x, edges)
587 return make_binned(x, edges=list(edges.values()), erase=erase)
590@overload
591def rebin(
592 x: DataArray,
593 arg_dict: Optional[dict[str, Union[int, Variable]]] = None,
594 deprecated=None,
595 /,
596 **kwargs: Union[int, Variable],
597) -> DataArray: ...
600@overload
601def rebin(
602 x: Dataset,
603 arg_dict: Optional[dict[str, Union[int, Variable]]] = None,
604 deprecated=None,
605 /,
606 **kwargs: Union[int, Variable],
607) -> Dataset: ...
610@overload
611def rebin(
612 x: DataGroup,
613 arg_dict: Optional[dict[str, Union[int, Variable]]] = None,
614 deprecated=None,
615 /,
616 **kwargs: Union[int, Variable],
617) -> DataGroup: ...
620@data_group_overload
621def rebin(x, arg_dict=None, deprecated=None, /, **kwargs):
622 """Rebin a data array or dataset.
624 The coordinate of the input for the dimension to be rebinned must contain bin edges,
625 i.e., the data must be histogrammed.
627 If the input has masks that contain the dimension being rebinned then those
628 masks are applied to the data before rebinning. That is, masked values are treated
629 as zero.
631 Parameters
632 ----------
633 x:
634 Data to rebin.
635 arg_dict:
636 Dictionary mapping dimension labels to binning parameters.
637 **kwargs:
638 Mapping of dimension label to corresponding binning parameters.
640 Returns
641 -------
642 :
643 Data rebinned according to the new bin edges.
645 See Also
646 --------
647 scipp.bin:
648 For changing the binning of binned (as opposed to dense, histogrammed) data.
649 scipp.hist:
650 For histogramming data.
652 Examples
653 --------
655 Rebin a data array along one of its dimensions, specifying (1) number of bins, (2)
656 bin width, or (3) actual binning:
658 >>> from numpy.random import default_rng
659 >>> rng = default_rng(seed=1234)
660 >>> x = sc.array(dims=['row'], unit='m', values=rng.random(100))
661 >>> y = sc.array(dims=['row'], unit='m', values=rng.random(100))
662 >>> data = sc.ones(dims=['row'], unit='K', shape=[100])
663 >>> table = sc.DataArray(data=data, coords={'x': x, 'y': y})
664 >>> da = table.hist(x=100, y=100)
665 >>> da.rebin(x=2).sizes
666 {'x': 2, 'y': 100}
668 >>> da.rebin(x=sc.scalar(0.2, unit='m')).sizes
669 {'x': 5, 'y': 100}
671 >>> da.rebin(x=sc.linspace('x', 0.2, 0.8, num=10, unit='m')).sizes
672 {'x': 9, 'y': 100}
674 Rebin a data array along two of its dimensions:
676 >>> da = table.hist(x=100, y=100)
677 >>> da.rebin(x=4, y=6).sizes
678 {'x': 4, 'y': 6}
679 """
680 if isinstance(arg_dict, str):
681 if deprecated is not None or 'bins' in kwargs:
682 warnings.warn(
683 "The 'bins' keyword argument and positional syntax for setting bin "
684 "edges is deprecated. Use, e.g., 'sc.rebin(da, x=x_edges)'. See the "
685 "documentation for details.",
686 UserWarning,
687 stacklevel=2,
688 )
689 bins = {'bins': deprecated, **kwargs}
690 return _cpp.rebin(x, arg_dict, **bins)
691 edges = _make_edges(x, arg_dict, kwargs)
692 out = x
693 for dim, edge in edges.items():
694 out = _cpp.rebin(out, dim, edge)
695 return out
698def _make_groups(x, arg):
699 import numpy as np
701 if isinstance(arg, Variable):
702 return arg
703 coord = x.bins.coords.get(arg) if x.bins is not None else None
704 if coord is None:
705 coord = x.coords.get(arg)
706 _require_coord(arg, coord)
707 if coord.bins is not None:
708 coord = coord.copy().bins.constituents['data']
710 if coord.values.size == 0:
711 unique = coord.values[0:0]
712 # We are currently using np.unique to find all unique groups. This can be very slow
713 # for large inputs. In many cases groups are in a bounded range of integers, and we
714 # can sometimes bypass a full call to np.unique by checking a sub-range first
715 elif coord.dtype in (DType.int32, DType.int64):
716 min_ = coord.min().value
717 max_ = coord.max().value
718 values = coord.values
719 unique = values[0:0]
720 for pivot in [1000, 100, 10, 1]:
721 if len(unique) == max_ - min_ + 1:
722 break
723 unique = np.unique(values[: len(values) // pivot])
724 else:
725 unique = np.unique(coord.values)
726 return array(dims=[arg], values=unique, unit=coord.unit)
729@overload
730def group(x: DataArray, /, *args: Union[str, Variable]) -> DataArray: ...
733@overload
734def group(x: DataGroup, /, *args: Union[str, Variable]) -> DataGroup: ...
737@data_group_overload
738def group(x, /, *args: Union[str, Variable]):
739 """Create binned data by grouping input by one or more coordinates.
741 Grouping can be specified in two ways: (1) When a string is provided the unique
742 values of the corresponding coordinate are used as groups. (2) When a Scipp variable
743 is provided then the variable's values are used as groups.
745 Note that option (1) may be very slow if the input is very large.
747 When grouping a dimension with an existing dimension-coord, the binning for
748 the dimension is modified, i.e., the input and the output will have the same
749 dimension labels.
751 When grouping by non-dimension-coords, the output will have new dimensions
752 given by the names of these coordinates. These new dimensions replace the
753 dimensions the input coordinates depend on.
755 Warning
756 -------
758 When there is existing binning or grouping, the algorithm assumes that coordinates
759 of the binned data are correct, i.e., compatible with the corresponding
760 coordinate values in the individual bins. If this is not the case then the behavior
761 if UNSPECIFIED. That is, the algorithm may or may not ignore the existing
762 coordinates. If you encounter such as case, remove the conflicting coordinate,
763 e.g., using :py:func:`scipp.DataArray.drop_coords`.
765 Parameters
766 ----------
767 x:
768 Input data.
769 *args:
770 Dimension labels or grouping variables.
772 Returns
773 -------
774 :
775 Binned data.
777 See Also
778 --------
779 scipp.bin:
780 Creating binned data by binning based on edges, instead of grouping.
781 scipp.binning.make_binned:
782 Lower level function that can bin and group, and does not automatically
783 replace/erase dimensions.
785 Examples
786 --------
788 Group a table by one of its coord columns, specifying (1) a coord name or (2)
789 an actual grouping:
791 >>> from numpy.random import default_rng
792 >>> rng = default_rng(seed=1234)
793 >>> x = sc.array(dims=['row'], unit='m', values=rng.random(100))
794 >>> y = sc.array(dims=['row'], unit='m', values=rng.random(100))
795 >>> data = sc.ones(dims=['row'], unit='K', shape=[100])
796 >>> table = sc.DataArray(data=data, coords={'x': x, 'y': y})
797 >>> table.coords['label'] = (table.coords['x'] * 10).to(dtype='int64')
798 >>> table.group('label').sizes
799 {'label': 10}
801 >>> groups = sc.array(dims=['label'], values=[1, 3, 5], unit='m')
802 >>> table.group(groups).sizes
803 {'label': 3}
805 Group a table by two of its coord columns:
807 >>> table.coords['a'] = (table.coords['x'] * 10).to(dtype='int64')
808 >>> table.coords['b'] = (table.coords['y'] * 10).to(dtype='int64')
809 >>> table.group('a', 'b').sizes
810 {'a': 10, 'b': 10}
812 >>> groups = sc.array(dims=['a'], values=[1, 3, 5], unit='m')
813 >>> table.group(groups, 'b').sizes
814 {'a': 3, 'b': 10}
816 Group binned data along an additional dimension:
818 >>> table.coords['a'] = (table.coords['y'] * 10).to(dtype='int64')
819 >>> binned = table.bin(x=10)
820 >>> binned.group('a').sizes
821 {'x': 10, 'a': 10}
822 """
823 groups = [_make_groups(x, name) for name in args]
824 erase = _find_replaced_dims(x, [g.dim for g in groups])
825 return make_binned(x, groups=groups, erase=erase)
828def histogram(
829 x: Union[DataArray, Dataset], *, bins: Variable
830) -> Union[DataArray, Dataset]:
831 """Deprecated. See :py:func:`scipp.hist`."""
832 warnings.warn(
833 "'histogram' is deprecated. Use 'hist' instead.", UserWarning, stacklevel=2
834 )
835 warnings.warn(
836 "'histogram' is deprecated. Use 'hist' instead.",
837 VisibleDeprecationWarning,
838 stacklevel=2,
839 )
840 return make_histogrammed(x, edges=bins)