Line data Source code
1 : // SPDX-License-Identifier: BSD-3-Clause
2 : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3 : /// @file
4 : /// @author Simon Heybrock
5 : #include "scipp/variable/reduction.h"
6 : #include "scipp/core/dtype.h"
7 : #include "scipp/core/element/arithmetic.h"
8 : #include "scipp/core/element/comparison.h"
9 : #include "scipp/core/element/logical.h"
10 : #include "scipp/variable/accumulate.h"
11 : #include "scipp/variable/arithmetic.h"
12 : #include "scipp/variable/astype.h"
13 : #include "scipp/variable/bins.h"
14 : #include "scipp/variable/creation.h"
15 : #include "scipp/variable/special_values.h"
16 : #include "scipp/variable/util.h"
17 : #include "scipp/variable/variable_factory.h"
18 :
19 : #include "operations_common.h"
20 :
21 : using namespace scipp::core;
22 :
23 : namespace scipp::variable {
24 :
25 : namespace {
26 43991 : Variable reduce_to_dims(const Variable &var, const Dimensions &target_dims,
27 : void (*const op)(Variable &, const Variable &),
28 : const FillValue init) {
29 43991 : auto accum = dense_special_like(var, target_dims, init);
30 : // FillValue::ZeroNotBool is not allowed here because it produces a different
31 : // dtype from var (if var has dtype bool). ZeroNotBool and Default are
32 : // semantically equivalent apart from the dtype change. So it can be
33 : // substituted here.
34 43991 : op(accum,
35 131975 : variableFactory().apply_event_masks(
36 43991 : var, (init == FillValue::ZeroNotBool) ? FillValue::Default : init));
37 43989 : return accum;
38 2 : }
39 :
40 35533 : Variable reduce_dim(const Variable &var, const Dim dim,
41 : void (*const op)(Variable &, const Variable &),
42 : const FillValue init) {
43 35533 : auto dims = var.dims();
44 35533 : if (dim != Dim::Invalid)
45 35504 : dims.erase(dim);
46 71054 : return reduce_to_dims(var, dims, op, init);
47 35533 : }
48 :
49 8463 : Variable reduce_bins(const Variable &data,
50 : void (*const op)(Variable &, const Variable &),
51 : const FillValue init) {
52 8463 : return reduce_to_dims(data, data.dims(), op, init);
53 : }
54 : } // namespace
55 :
56 13502 : Variable sum(const Variable &var, const Dim dim) {
57 : // Bool DType is a bit special in that it cannot contain its sum.
58 : // Instead, the sum is stored in an int64_t Variable
59 13502 : return reduce_dim(var, dim, sum_into, FillValue::ZeroNotBool);
60 : }
61 :
62 279 : Variable nansum(const Variable &var, const Dim dim) {
63 : // Bool DType is a bit special in that it cannot contain its sum.
64 : // Instead, the sum is stored in an int64_t Variable
65 279 : return reduce_dim(var, dim, nansum_into, FillValue::ZeroNotBool);
66 : }
67 :
68 91 : Variable any(const Variable &var, const Dim dim) {
69 91 : return reduce_dim(var, dim, any_into, FillValue::False);
70 : }
71 :
72 471 : Variable all(const Variable &var, const Dim dim) {
73 471 : return reduce_dim(var, dim, all_into, FillValue::True);
74 : }
75 :
76 : /// Return the maximum along given dimension.
77 : ///
78 : /// Variances are not considered when determining the maximum. If present, the
79 : /// variance of the maximum element is returned.
80 5532 : Variable max(const Variable &var, const Dim dim) {
81 5532 : return reduce_dim(var, dim, max_into, FillValue::Lowest);
82 : }
83 :
84 : /// Return the maximum along given dimension ignoring NaN values.
85 : ///
86 : /// Variances are not considered when determining the maximum. If present, the
87 : /// variance of the maximum element is returned.
88 5072 : Variable nanmax(const Variable &var, const Dim dim) {
89 5072 : return reduce_dim(var, dim, nanmax_into, FillValue::Lowest);
90 : }
91 :
92 : /// Return the minimum along given dimension.
93 : ///
94 : /// Variances are not considered when determining the minimum. If present, the
95 : /// variance of the minimum element is returned.
96 5516 : Variable min(const Variable &var, const Dim dim) {
97 5516 : return reduce_dim(var, dim, min_into, FillValue::Max);
98 : }
99 :
100 : /// Return the minimum along given dimension ignoring NaN values.
101 : ///
102 : /// Variances are not considered when determining the minimum. If present, the
103 : /// variance of the minimum element is returned.
104 5070 : Variable nanmin(const Variable &var, const Dim dim) {
105 5070 : return reduce_dim(var, dim, nanmin_into, FillValue::Max);
106 : }
107 :
108 100 : Variable mean_impl(const Variable &var, const Dim dim, const Variable &count) {
109 200 : return normalize_impl(sum(var, dim), count);
110 : }
111 :
112 86 : Variable nanmean_impl(const Variable &var, const Dim dim,
113 : const Variable &count) {
114 172 : return normalize_impl(nansum(var, dim), count);
115 : }
116 :
117 : namespace {
118 31 : Variable unmasked_events(const Variable &data) {
119 62 : if (const auto mask_union = variableFactory().irreducible_event_mask(data);
120 31 : mask_union.is_valid()) {
121 : // Trick to get the sizes of bins if masks are present - bin the masks
122 : // using the same dimension & indices as the data, and then sum the
123 : // inverse of the mask to get the number of unmasked entries.
124 24 : return make_bins_no_validate(data.bin_indices(),
125 36 : variableFactory().elem_dim(data), ~mask_union);
126 31 : }
127 19 : return {};
128 : }
129 :
130 110 : template <class... Dim> Variable count(const Variable &var, Dim &&...dim) {
131 110 : if (!is_bins(var)) {
132 : if constexpr (sizeof...(dim) == 0)
133 24 : return var.dims().volume() * units::none;
134 : else
135 73 : return ((var.dims()[dim] * units::none) * ...);
136 : }
137 18 : if (const auto unmasked = unmasked_events(var); unmasked.is_valid()) {
138 5 : return sum(unmasked, dim...);
139 : }
140 16 : const auto [begin, end] = unzip(var.bin_indices());
141 8 : return sum(end - begin, dim...);
142 8 : }
143 :
144 18 : Variable bins_count(const Variable &data) {
145 18 : if (const auto unmasked = unmasked_events(data); unmasked.is_valid()) {
146 7 : return bins_sum(unmasked);
147 18 : }
148 11 : return bin_sizes(data);
149 : }
150 : } // namespace
151 :
152 78 : Variable mean(const Variable &var, const Dim dim) {
153 155 : return mean_impl(var, dim, count(var, dim));
154 : }
155 :
156 72 : Variable nanmean(const Variable &var, const Dim dim) {
157 143 : return nanmean_impl(var, dim, sum(isfinite(var), dim));
158 : }
159 :
160 : /// Return the sum along all dimensions.
161 156 : Variable sum(const Variable &var) {
162 353 : return reduce_all_dims(var, [](auto &&..._) { return sum(_...); });
163 : }
164 :
165 : /// Return the sum along all dimensions, nans treated as zero.
166 47 : Variable nansum(const Variable &var) {
167 131 : return reduce_all_dims(var, [](auto &&..._) { return nansum(_...); });
168 : }
169 :
170 : /// Return the maximum along all dimensions.
171 5374 : Variable max(const Variable &var) {
172 10850 : return reduce_all_dims(var, [](auto &&..._) { return max(_...); });
173 : }
174 :
175 : /// Return the maximum along all dimensions ignoring NaN values.
176 4913 : Variable nanmax(const Variable &var) {
177 9942 : return reduce_all_dims(var, [](auto &&..._) { return nanmax(_...); });
178 : }
179 :
180 : /// Return the minimum along all dimensions.
181 5459 : Variable min(const Variable &var) {
182 10921 : return reduce_all_dims(var, [](auto &&..._) { return min(_...); });
183 : }
184 :
185 : /// Return the minimum along all dimensions ignoring NaN values.
186 4905 : Variable nanmin(const Variable &var) {
187 9932 : return reduce_all_dims(var, [](auto &&..._) { return nanmin(_...); });
188 : }
189 :
190 : /// Return the logical AND along all dimensions.
191 21003 : Variable all(const Variable &var) {
192 21378 : return reduce_all_dims(var, [](auto &&..._) { return all(_...); });
193 : }
194 :
195 : /// Return the logical OR along all dimensions.
196 28 : Variable any(const Variable &var) {
197 70 : return reduce_all_dims(var, [](auto &&..._) { return any(_...); });
198 : }
199 :
200 : /// Return the mean along all dimensions.
201 32 : Variable mean(const Variable &var) {
202 64 : return normalize_impl(sum(var), count(var));
203 : }
204 :
205 : /// Return the mean along all dimensions. Ignoring NaN values.
206 25 : Variable nanmean(const Variable &var) {
207 50 : return normalize_impl(nansum(var), sum(isfinite(var)));
208 : }
209 :
210 : /// Return the sum of all events per bin.
211 8406 : Variable bins_sum(const Variable &data) {
212 8406 : return reduce_bins(data, variable::sum_into, FillValue::ZeroNotBool);
213 : }
214 :
215 : /// Return the sum of all events per bin. Ignoring NaN values.
216 32 : Variable bins_nansum(const Variable &data) {
217 32 : return reduce_bins(data, variable::nansum_into, FillValue::ZeroNotBool);
218 : }
219 :
220 : /// Return the maximum of all events per bin.
221 5 : Variable bins_max(const Variable &data) {
222 5 : return reduce_bins(data, variable::max_into, FillValue::Lowest);
223 : }
224 :
225 : /// Return the maximum of all events per bin. Ignoring NaN values.
226 4 : Variable bins_nanmax(const Variable &data) {
227 4 : return reduce_bins(data, variable::nanmax_into, FillValue::Lowest);
228 : }
229 :
230 : /// Return the minimum of all events per bin.
231 6 : Variable bins_min(const Variable &data) {
232 6 : return reduce_bins(data, variable::min_into, FillValue::Max);
233 : }
234 :
235 : /// Return the minimum of all events per bin. Ignoring NaN values.
236 4 : Variable bins_nanmin(const Variable &data) {
237 4 : return reduce_bins(data, variable::nanmin_into, FillValue::Max);
238 : }
239 :
240 : /// Return the logical AND of all events per bin.
241 3 : Variable bins_all(const Variable &data) {
242 3 : return reduce_bins(data, variable::all_into, FillValue::True);
243 : }
244 :
245 : /// Return the logical OR of all events per bin.
246 3 : Variable bins_any(const Variable &data) {
247 3 : return reduce_bins(data, variable::any_into, FillValue::False);
248 : }
249 :
250 : /// Return the mean of all events per bin.
251 18 : Variable bins_mean(const Variable &data) {
252 36 : return normalize_impl(bins_sum(data), bins_count(data));
253 : }
254 :
255 : /// Return the mean of all events per bin. Ignoring NaN values.
256 6 : Variable bins_nanmean(const Variable &data) {
257 12 : return normalize_impl(bins_nansum(data), bins_sum(isfinite(data)));
258 : }
259 :
260 22289 : void sum_into(Variable &accum, const Variable &var) {
261 22289 : if (accum.dtype() == dtype<float>) {
262 29 : auto x = astype(accum, dtype<double>);
263 29 : sum_into(x, var);
264 29 : copy(astype(x, dtype<float>), accum);
265 29 : } else {
266 22260 : accumulate_in_place(accum, var, element::add_equals, "sum");
267 : }
268 22289 : }
269 :
270 341 : void nansum_into(Variable &summed, const Variable &var) {
271 341 : if (summed.dtype() == dtype<float>) {
272 30 : auto accum = astype(summed, dtype<double>);
273 30 : nansum_into(accum, var);
274 30 : copy(astype(accum, dtype<float>), summed);
275 30 : } else {
276 311 : accumulate_in_place(summed, var, element::nan_add_equals, "nansum");
277 : }
278 341 : }
279 :
280 481 : void all_into(Variable &accum, const Variable &var) {
281 481 : accumulate_in_place(accum, var, core::element::logical_and_equals, "all");
282 480 : }
283 :
284 101 : void any_into(Variable &accum, const Variable &var) {
285 101 : accumulate_in_place(accum, var, core::element::logical_or_equals, "any");
286 100 : }
287 :
288 5559 : void max_into(Variable &accum, const Variable &var) {
289 5559 : accumulate_in_place(accum, var, core::element::max_equals, "max");
290 5559 : }
291 :
292 5076 : void nanmax_into(Variable &accum, const Variable &var) {
293 5076 : accumulate_in_place(accum, var, core::element::nanmax_equals, "max");
294 5076 : }
295 :
296 5531 : void min_into(Variable &accum, const Variable &var) {
297 5531 : accumulate_in_place(accum, var, core::element::min_equals, "min");
298 5531 : }
299 :
300 5074 : void nanmin_into(Variable &accum, const Variable &var) {
301 5074 : accumulate_in_place(accum, var, core::element::nanmin_equals, "min");
302 5074 : }
303 : } // namespace scipp::variable
|