Coverage for install/scipp/core/reduction.py: 64%

110 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-11-17 01:51 +0000

1# SPDX-License-Identifier: BSD-3-Clause 

2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) 

3# @author Simon Heybrock 

4 

5from __future__ import annotations 

6 

7from collections.abc import Callable 

8from typing import Any, NoReturn, cast 

9 

10import numpy as np 

11import numpy.typing as npt 

12 

13from .._scipp import core as _cpp 

14from ..typing import Dims, VariableLike, VariableLikeType 

15from . import concepts 

16from ._cpp_wrapper_util import call_func as _call_cpp_func 

17from .cpp_classes import ( 

18 DataArray, 

19 Dataset, 

20 DimensionError, 

21 DTypeError, 

22 Unit, 

23 Variable, 

24 VariancesError, 

25) 

26from .data_group import DataGroup, data_group_nary 

27from .variable import array 

28 

29 

30def mean(x: VariableLikeType, dim: str | None = None) -> VariableLikeType: 

31 """Arithmetic mean of elements in the input. 

32 

33 If the input has variances, the variances stored in the output are based on 

34 the "standard deviation of the mean", i.e., 

35 :math:`\\sigma_{mean} = \\sigma / \\sqrt{N}`. 

36 :math:`N` is the length of the input dimension. 

37 :math:`\\sigma` is estimated as the average of the standard deviations of 

38 the input elements along that dimension. 

39 

40 See :py:func:`scipp.sum` on how rounding errors for float32 inputs are handled. 

41 

42 Parameters 

43 ---------- 

44 x: scipp.typing.VariableLike 

45 Input data. 

46 dim: 

47 Dimension along which to calculate the mean. If not 

48 given, the mean over all dimensions is calculated. 

49 

50 Returns 

51 ------- 

52 : Same type as x 

53 The mean of the input values. 

54 

55 See Also 

56 -------- 

57 scipp.var: 

58 Compute the variance. 

59 scipp.std: 

60 Compute the standard deviation. 

61 scipp.nanmean: 

62 Ignore NaN's when calculating the mean. 

63 """ 

64 if dim is None: 

65 return _call_cpp_func(_cpp.mean, x) # type: ignore[return-value] 

66 else: 

67 return _call_cpp_func(_cpp.mean, x, dim=dim) # type: ignore[return-value] 

68 

69 

70def nanmean(x: VariableLikeType, dim: str | None = None) -> VariableLikeType: 

71 """Arithmetic mean of elements in the input ignoring NaN's. 

72 

73 If the input has variances, the variances stored in the output are based on 

74 the "standard deviation of the mean", i.e., 

75 :math:`\\sigma_{mean} = \\sigma / \\sqrt{N}`. 

76 :math:`N` is the length of the input dimension. 

77 :math:`\\sigma` is estimated as the average of the standard deviations of 

78 the input elements along that dimension. 

79 

80 See :py:func:`scipp.sum` on how rounding errors for float32 inputs are handled. 

81 

82 Parameters 

83 ---------- 

84 x: scipp.typing.VariableLike 

85 Input data. 

86 dim: 

87 Dimension along which to calculate the mean. If not 

88 given, the nanmean over all dimensions is calculated. 

89 

90 Returns 

91 ------- 

92 : Same type as x 

93 The mean of the input values which are not NaN. 

94 

95 See Also 

96 -------- 

97 scipp.nanvar: 

98 Compute the variance, ignoring NaN's. 

99 scipp.nanstd: 

100 Compute the standard deviation, ignoring NaN's. 

101 scipp.mean: 

102 Compute the mean without special handling of NaN. 

103 """ 

104 if dim is None: 

105 return _call_cpp_func(_cpp.nanmean, x) # type: ignore[return-value] 

106 else: 

107 return _call_cpp_func(_cpp.nanmean, x, dim=dim) # type: ignore[return-value] 

108 

109 

110def median(x: VariableLikeType, dim: Dims = None) -> VariableLikeType: 

111 """Compute the median of the input values. 

112 

113 The median is the middle value of a sorted copy of the input array 

114 along each reduced dimension. 

115 That is, for an array of ``N`` unmasked values, the median is 

116 

117 - odd ``N``: ``x[(N-1)/2]`` 

118 - even ``N``: ``(x[N/2-1] + x[N/2]) / 2`` 

119 

120 Note 

121 ---- 

122 Masks are broadcast to the shape of ``x``. 

123 This can lead to a large temporary memory usage. 

124 

125 Parameters 

126 ---------- 

127 x: scipp.typing.VariableLike 

128 Input data. 

129 dim: 

130 Dimension(s) along which to calculate the median. 

131 If not given, the median over a flattened version of the array is calculated. 

132 

133 Returns 

134 ------- 

135 : Same type as x 

136 The median of the input values. 

137 

138 Raises 

139 ------ 

140 scipp.VariancesError 

141 If the input has variances. 

142 scipp.DTypeError 

143 If the input is binned or does otherwise not support computing medians. 

144 

145 See Also 

146 -------- 

147 scipp.nanmedian: 

148 Ignore NaN's when calculating the median. 

149 

150 Examples 

151 -------- 

152 ``median`` is available as a method: 

153 

154 >>> x = sc.array(dims=['x'], values=[2, 5, 1, 8, 4]) 

155 >>> x.median() 

156 <scipp.Variable> () float64 [dimensionless] 4 

157 >>> x = sc.array(dims=['x'], values=[2, 5, 1, 8]) 

158 >>> x.median() 

159 <scipp.Variable> () float64 [dimensionless] 3.5 

160 

161 The median can be computed along a given dimension: 

162 

163 >>> x = sc.array(dims=['x', 'y'], values=[[1, 3, 6], [2, 7, 4]]) 

164 >>> x.median('y') 

165 <scipp.Variable> (x: 2) float64 [dimensionless] [3, 4] 

166 

167 Masked elements are ignored: 

168 

169 >>> x = sc.DataArray( 

170 ... sc.array(dims=['x'], values=[5, 3, 4, 3]), 

171 ... masks={'m': sc.array(dims=['x'], values=[False, True, False, False])} 

172 ... ) 

173 >>> x.median() 

174 <scipp.DataArray> 

175 Dimensions: Sizes[] 

176 Data: 

177 float64 [dimensionless] () 4 

178 """ 

179 r = _reduce_with_numpy( 

180 x, 

181 dim=dim, 

182 sc_func=cast(Callable[..., VariableLike], median), 

183 np_func=np.median, 

184 np_ma_func=np.ma.median, 

185 unit_func=lambda u: u, 

186 kwargs={}, 

187 ) 

188 return r 

189 

190 

191def nanmedian(x: VariableLikeType, dim: Dims = None) -> VariableLikeType: 

192 """Compute the median of the input values ignoring NaN's. 

193 

194 The median is the middle value of a sorted copy of the input array 

195 along each reduced dimension. 

196 That is, for an array of ``N`` unmasked, non-NaN values, the median is 

197 

198 - odd ``N``: ``x[(N-1)/2]`` 

199 - even ``N``: ``(x[N/2-1] + x[N/2]) / 2`` 

200 

201 Parameters 

202 ---------- 

203 x: scipp.typing.VariableLike 

204 Input data. 

205 dim: 

206 Dimension(s) along which to calculate the median. 

207 If not given, the median over a flattened version of the array is calculated. 

208 

209 Returns 

210 ------- 

211 : Same type as x 

212 The median of the input values. 

213 

214 Raises 

215 ------ 

216 scipp.VariancesError 

217 If the input has variances. 

218 scipp.DTypeError 

219 If the input is binned or does otherwise not support computing medians. 

220 ValueError 

221 If the input has masks. 

222 Mask out NaN's and then use :func:`scipp.median` instead. 

223 

224 See Also 

225 -------- 

226 scipp.median: 

227 Compute the median without special handling of NaN's. 

228 

229 Examples 

230 -------- 

231 ``nanmedian`` is available as a method: 

232 

233 >>> x = sc.array(dims=['x'], values=[2, 5, 1, np.nan, 8, 4]) 

234 >>> x.nanmedian() 

235 <scipp.Variable> () float64 [dimensionless] 4 

236 >>> x = sc.array(dims=['x'], values=[2, np.nan, 5, 1, 8]) 

237 >>> x.nanmedian() 

238 <scipp.Variable> () float64 [dimensionless] 3.5 

239 """ 

240 

241 def _catch_masked(*args: object, **kwargs: object) -> NoReturn: 

242 # Because there is no np.ma.nanmedian 

243 raise ValueError( 

244 'nanmedian does not support masked data arrays. ' 

245 'Consider masking NaN values and calling scipp.median' 

246 ) 

247 

248 return _reduce_with_numpy( 

249 x, 

250 dim=dim, 

251 sc_func=cast(Callable[..., VariableLike], nanmedian), 

252 np_func=np.nanmedian, 

253 np_ma_func=_catch_masked, 

254 unit_func=lambda u: u, 

255 kwargs={}, 

256 ) 

257 

258 

259def var(x: VariableLikeType, dim: Dims = None, *, ddof: int) -> VariableLikeType: 

260 r"""Compute the variance of the input values. 

261 

262 This function computes the variance of the input values which is *not* 

263 the same as the ``x.variances`` property but instead defined as 

264 

265 .. math:: 

266 

267 \mathsf{var}(x) = \frac1{N - \mathsf{ddof}} 

268 \sum_{i=1}^{N}\, {(x_i - \bar{x})}^2 

269 

270 where :math:`x_i` are the unmasked ``values`` of the input and 

271 :math:`\bar{x}` is the mean, see :func:`scipp.mean`. 

272 See the ``ddof`` parameter description for what value to choose. 

273 

274 Note 

275 ---- 

276 Masks are broadcast to the shape of ``x``. 

277 This can lead to a large temporary memory usage. 

278 

279 Parameters 

280 ---------- 

281 x: scipp.typing.VariableLike 

282 Input data. 

283 dim: 

284 Dimension(s) along which to calculate the variance. 

285 If not given, the variance over a flattened version of the array is calculated. 

286 ddof: 

287 'Delta degrees of freedom'. 

288 For sample variances, set ``ddof=1`` to obtain an unbiased estimator. 

289 For normally distributed variables, set ``ddof=0`` to obtain a maximum 

290 likelihood estimate. 

291 See :func:`numpy.var` for more details. 

292 

293 In contrast to NumPy, this is a required parameter in Scipp to 

294 avoid potentially hard-to-find mistakes based on implicit assumptions 

295 about what the input data represents. 

296 

297 Returns 

298 ------- 

299 : Same type as x 

300 The variance of the input values. 

301 

302 Raises 

303 ------ 

304 scipp.VariancesError 

305 If the input has variances. 

306 scipp.DTypeError 

307 If the input is binned or does otherwise not support computing variances. 

308 

309 See Also 

310 -------- 

311 scipp.variances: 

312 Extract the stored variances of a :class:`scipp.Variable`. 

313 scipp.mean: 

314 Compute the arithmetic mean. 

315 scipp.std: 

316 Compute the standard deviation. 

317 scipp.nanvar: 

318 Ignore NaN's when calculating the variance. 

319 

320 Examples 

321 -------- 

322 ``var`` is available as a method: 

323 

324 >>> x = sc.array(dims=['x'], values=[3, 5, 2, 3]) 

325 >>> x.var(ddof=0) 

326 <scipp.Variable> () float64 [dimensionless] 1.1875 

327 >>> x.var(ddof=1) 

328 <scipp.Variable> () float64 [dimensionless] 1.58333 

329 

330 Select a dimension to reduce: 

331 

332 >>> x = sc.array(dims=['x', 'y'], values=[[1, 3, 6], [2, 7, 4]]) 

333 >>> x.var('y', ddof=0) 

334 <scipp.Variable> (x: 2) float64 [dimensionless] [4.22222, 4.22222] 

335 >>> x.var('x', ddof=0) 

336 <scipp.Variable> (y: 3) float64 [dimensionless] [0.25, 4, 1] 

337 """ 

338 return _reduce_with_numpy( 

339 x, 

340 dim=dim, 

341 sc_func=cast(Callable[..., VariableLike], var), 

342 np_func=np.var, 

343 np_ma_func=np.ma.var, 

344 unit_func=lambda u: u**2, 

345 kwargs={'ddof': ddof}, 

346 ) 

347 

348 

349def nanvar(x: VariableLikeType, dim: Dims = None, *, ddof: int) -> VariableLikeType: 

350 r"""Compute the variance of the input values ignoring NaN's. 

351 

352 This function computes the variance of the input values which is *not* 

353 the same as the ``x.variances`` property but instead defined as 

354 

355 .. math:: 

356 

357 \mathsf{nanvar}(x) = \frac1{N - \mathsf{ddof}} 

358 \sum_{i=1}^{N}\, {(x_i - \bar{x})}^2 

359 

360 where :math:`x_i` are the non-NaN ``values`` of the input and 

361 :math:`\bar{x}` is the mean, see :func:`scipp.nanmean`. 

362 See the ``ddof`` parameter description for what value to choose. 

363 

364 Parameters 

365 ---------- 

366 x: scipp.typing.VariableLike 

367 Input data. 

368 dim: 

369 Dimension(s) along which to calculate the variance. 

370 If not given, the variance over a flattened version of the array is calculated. 

371 ddof: 

372 'Delta degrees of freedom'. 

373 For sample variances, set ``ddof=1`` to obtain an unbiased estimator. 

374 For normally distributed variables, set ``ddof=0`` to obtain a maximum 

375 likelihood estimate. 

376 See :func:`numpy.nanvar` for more details. 

377 

378 In contrast to NumPy, this is a required parameter in Scipp to 

379 avoid potentially hard-to-find mistakes based on implicit assumptions 

380 about what the input data represents. 

381 

382 Returns 

383 ------- 

384 : Same type as x 

385 The variance of the non-NaN input values. 

386 

387 Raises 

388 ------ 

389 scipp.VariancesError 

390 If the input has variances. 

391 scipp.DTypeError 

392 If the input is binned or does otherwise not support computing variances. 

393 ValueError 

394 If the input has masks. 

395 Mask out NaN's and then use :func:`scipp.var` instead. 

396 

397 See Also 

398 -------- 

399 scipp.nanmean: 

400 Compute the arithmetic mean ignoring NaN's. 

401 scipp.nanstd: 

402 Compute the standard deviation, ignoring NaN's. 

403 scipp.var: 

404 Compute the variance without special handling of NaN's. 

405 

406 Examples 

407 -------- 

408 ``nanvar`` is available as a method: 

409 

410 >>> x = sc.array(dims=['x'], values=[np.nan, 5, 2, 3]) 

411 >>> x.nanvar(ddof=0) 

412 <scipp.Variable> () float64 [dimensionless] 1.55556 

413 >>> x.nanvar(ddof=1) 

414 <scipp.Variable> () float64 [dimensionless] 2.33333 

415 """ 

416 

417 def _catch_masked(*args: object, **kwargs: object) -> NoReturn: 

418 # Because there is no np.ma.nanvar 

419 raise ValueError( 

420 'nanvar does not support masked data arrays. ' 

421 'Consider masking NaN values and calling scipp.var' 

422 ) 

423 

424 return _reduce_with_numpy( 

425 x, 

426 dim=dim, 

427 sc_func=cast(Callable[..., VariableLike], nanvar), 

428 np_func=np.nanvar, 

429 np_ma_func=_catch_masked, 

430 unit_func=lambda u: u**2, 

431 kwargs={'ddof': ddof}, 

432 ) 

433 

434 

435def std(x: VariableLikeType, dim: Dims = None, *, ddof: int) -> VariableLikeType: 

436 r"""Compute the standard deviation of the input values. 

437 

438 This function computes the standard deviation of the input values which is *not* 

439 related to the ``x.variances`` property but instead defined as 

440 

441 .. math:: 

442 

443 \mathsf{std}(x)^2 = \frac1{N - \mathsf{ddof}} 

444 \sum_{i=1}^{N}\, {(x_i - \bar{x})}^2 

445 

446 where :math:`x_i` are the unmasked ``values`` of the input and 

447 :math:`\bar{x}` is the mean, see :func:`scipp.mean`. 

448 See the ``ddof`` parameter description for what value to choose. 

449 

450 Note 

451 ---- 

452 Masks are broadcast to the shape of ``x``. 

453 This can lead to a large temporary memory usage. 

454 

455 Parameters 

456 ---------- 

457 x: scipp.typing.VariableLike 

458 Input data. 

459 dim: 

460 Dimension(s) along which to calculate the standard deviation. 

461 If not given, the standard deviation over a flattened version 

462 of the array is calculated. 

463 ddof: 

464 'Delta degrees of freedom'. 

465 For sample standard deviations, set ``ddof=1`` to obtain an unbiased estimator. 

466 For normally distributed variables, set ``ddof=0`` to obtain a maximum 

467 likelihood estimate. 

468 See :func:`numpy.std` for more details. 

469 

470 In contrast to NumPy, this is a required parameter in Scipp to 

471 avoid potentially hard-to-find mistakes based on implicit assumptions 

472 about what the input data represents. 

473 

474 Returns 

475 ------- 

476 : Same type as x 

477 The standard deviation of the input values. 

478 

479 Raises 

480 ------ 

481 scipp.VariancesError 

482 If the input has variances. 

483 scipp.DTypeError 

484 If the input is binned or does 

485 otherwise not support computing standard deviations. 

486 

487 See Also 

488 -------- 

489 scipp.stddevs: 

490 Compute the standard deviations from the stored 

491 variances of a :class:`scipp.Variable`. 

492 scipp.mean: 

493 Compute the arithmetic mean. 

494 scipp.var: 

495 Compute the variance. 

496 scipp.nanstd: 

497 Ignore NaN's when calculating the standard deviation. 

498 

499 Examples 

500 -------- 

501 ``std`` is available as a method: 

502 

503 >>> x = sc.array(dims=['x'], values=[3, 5, 2, 3]) 

504 >>> x.std(ddof=0) 

505 <scipp.Variable> () float64 [dimensionless] 1.08972 

506 >>> x.std(ddof=1) 

507 <scipp.Variable> () float64 [dimensionless] 1.25831 

508 

509 Select a dimension to reduce: 

510 

511 >>> x = sc.array(dims=['x', 'y'], values=[[1, 3, 6], [2, 7, 4]]) 

512 >>> x.std('y', ddof=0) 

513 <scipp.Variable> (x: 2) float64 [dimensionless] [2.0548, 2.0548] 

514 >>> x.std('x', ddof=0) 

515 <scipp.Variable> (y: 3) float64 [dimensionless] [0.5, 2, 1] 

516 """ 

517 return _reduce_with_numpy( 

518 x, 

519 dim=dim, 

520 sc_func=cast(Callable[..., VariableLike], std), 

521 np_func=np.std, 

522 np_ma_func=np.ma.std, 

523 unit_func=lambda u: u, 

524 kwargs={'ddof': ddof}, 

525 ) 

526 

527 

528def nanstd(x: VariableLikeType, dim: Dims = None, *, ddof: int) -> VariableLikeType: 

529 r"""Compute the standard deviation of the input values ignoring NaN's. 

530 

531 This function computes the standard deviation of the input values which is *not* 

532 related to the ``x.variances`` property but instead defined as 

533 

534 .. math:: 

535 

536 \mathsf{nanstd}(x)^2 = \frac1{N - \mathsf{ddof}} 

537 \sum_{i=1}^{N}\, {(x_i - \bar{x})}^2 

538 

539 where :math:`x_i` are the non-NaN ``values`` of the input and 

540 :math:`\bar{x}` is the mean, see :func:`scipp.nanmean`. 

541 See the ``ddof`` parameter description for what value to choose. 

542 

543 Parameters 

544 ---------- 

545 x: scipp.typing.VariableLike 

546 Input data. 

547 dim: 

548 Dimension(s) along which to calculate the standard deviation. 

549 If not given, the standard deviation over a flattened version 

550 of the array is calculated. 

551 ddof: 

552 'Delta degrees of freedom'. 

553 For sample standard deviations, set ``ddof=1`` to obtain an unbiased estimator. 

554 For normally distributed variables, set ``ddof=0`` to obtain a maximum 

555 likelihood estimate. 

556 See :func:`numpy.nanstd` for more details. 

557 

558 In contrast to NumPy, this is a required parameter in Scipp to 

559 avoid potentially hard-to-find mistakes based on implicit assumptions 

560 about what the input data represents. 

561 

562 Returns 

563 ------- 

564 : Same type as x 

565 The standard deviation of the input values. 

566 

567 Raises 

568 ------ 

569 scipp.VariancesError 

570 If the input has variances. 

571 scipp.DTypeError 

572 If the input is binned or does 

573 otherwise not support computing standard deviations. 

574 ValueError 

575 If the input has masks. 

576 Mask out NaN's and then use :func:`scipp.std` instead. 

577 

578 See Also 

579 -------- 

580 scipp.nanmean: 

581 Compute the arithmetic mean ignoring NaN's. 

582 scipp.nanvar: 

583 Compute the variance, ignoring NaN's. 

584 scipp.std: 

585 Compute the standard deviation without special handling of NaN's. 

586 

587 Examples 

588 -------- 

589 ``nanstd`` is available as a method: 

590 

591 >>> x = sc.array(dims=['x'], values=[np.nan, 5, 2, 3]) 

592 >>> x.nanstd(ddof=0) 

593 <scipp.Variable> () float64 [dimensionless] 1.24722 

594 >>> x.nanstd(ddof=1) 

595 <scipp.Variable> () float64 [dimensionless] 1.52753 

596 """ 

597 

598 def _catch_masked(*args: object, **kwargs: object) -> NoReturn: 

599 # Because there is no np.ma.nanstd 

600 raise ValueError( 

601 'nanstd does not support masked data arrays. ' 

602 'Consider masking NaN values and calling scipp.std' 

603 ) 

604 

605 return _reduce_with_numpy( 

606 x, 

607 dim=dim, 

608 sc_func=cast(Callable[..., VariableLike], nanstd), 

609 np_func=np.nanstd, 

610 np_ma_func=_catch_masked, 

611 unit_func=lambda u: u, 

612 kwargs={'ddof': ddof}, 

613 ) 

614 

615 

616def sum(x: VariableLikeType, dim: Dims = None) -> VariableLikeType: 

617 """Sum of elements in the input. 

618 

619 If the input data is in single precision (dtype='float32') this internally uses 

620 double precision (dtype='float64') to reduce the effect of accumulated rounding 

621 errors. If multiple dimensions are reduced, the current implementation casts back 

622 to float32 after handling each dimension, i.e., the result is equivalent to what 

623 would be obtained from manually summing individual dimensions. 

624 

625 Parameters 

626 ---------- 

627 x: scipp.typing.VariableLike 

628 Input data. 

629 dim: 

630 Optional dimension along which to calculate the sum. If not 

631 given, the sum over all dimensions is calculated. 

632 

633 Returns 

634 ------- 

635 : Same type as x 

636 The sum of the input values. 

637 

638 See Also 

639 -------- 

640 scipp.nansum: 

641 Ignore NaN's when calculating the sum. 

642 """ 

643 if dim is None: 

644 return _call_cpp_func(_cpp.sum, x) # type: ignore[return-value] 

645 elif isinstance(dim, str): 

646 return _call_cpp_func(_cpp.sum, x, dim=dim) # type: ignore[return-value] 

647 for d in dim: 

648 x = _call_cpp_func(_cpp.sum, x, d) # type: ignore[assignment] 

649 return x 

650 

651 

652def nansum(x: VariableLikeType, dim: str | None = None) -> VariableLikeType: 

653 """Sum of elements in the input ignoring NaN's. 

654 

655 See :py:func:`scipp.sum` on how rounding errors for float32 inputs are handled. 

656 

657 Parameters 

658 ---------- 

659 x: scipp.typing.VariableLike 

660 Input data. 

661 dim: 

662 Optional dimension along which to calculate the sum. If not 

663 given, the sum over all dimensions is calculated. 

664 

665 Returns 

666 ------- 

667 : Same type as x 

668 The sum of the input values which are not NaN. 

669 

670 See Also 

671 -------- 

672 scipp.sum: 

673 Compute the sum without special handling of NaN. 

674 """ 

675 if dim is None: 

676 return _call_cpp_func(_cpp.nansum, x) # type: ignore[return-value] 

677 else: 

678 return _call_cpp_func(_cpp.nansum, x, dim=dim) # type: ignore[return-value] 

679 

680 

681def min(x: VariableLikeType, dim: str | None = None) -> VariableLikeType: 

682 """Minimum of elements in the input. 

683 

684 Warning 

685 ------- 

686 

687 Scipp returns DBL_MAX or INT_MAX for empty inputs of float or int dtype, 

688 respectively, while NumPy raises. Note that in the case of :py:class:`DataArray`, 

689 inputs can also be "empty" if all elements contributing to an output element are 

690 masked. 

691 

692 Parameters 

693 ---------- 

694 x: scipp.typing.VariableLike 

695 Input data. 

696 dim: 

697 Optional dimension along which to calculate the min. If not 

698 given, the min over all dimensions is calculated. 

699 

700 Returns 

701 ------- 

702 : 

703 The minimum of the input values. 

704 

705 See Also 

706 -------- 

707 scipp.max: 

708 Element-wise maximum. 

709 scipp.nanmin: 

710 Same as min but ignoring NaN's. 

711 scipp.nanmax: 

712 Same as max but ignoring NaN's. 

713 """ 

714 if dim is None: 

715 return _call_cpp_func(_cpp.min, x) # type: ignore[return-value] 

716 else: 

717 return _call_cpp_func(_cpp.min, x, dim=dim) # type: ignore[return-value] 

718 

719 

720def max(x: VariableLikeType, dim: str | None = None) -> VariableLikeType: 

721 """Maximum of elements in the input. 

722 

723 Warning 

724 ------- 

725 

726 Scipp returns DBL_MIN or INT_MIN for empty inputs of float or int dtype, 

727 respectively, while NumPy raises. Note that in the case of :py:class:`DataArray`, 

728 inputs can also be "empty" if all elements contributing to an output element are 

729 masked. 

730 

731 Parameters 

732 ---------- 

733 x: scipp.typing.VariableLike 

734 Input data. 

735 dim: 

736 Optional dimension along which to calculate the max. If not 

737 given, the max over all dimensions is calculated. 

738 

739 Returns 

740 ------- 

741 : 

742 The maximum of the input values. 

743 

744 See Also 

745 -------- 

746 scipp.min: 

747 Element-wise minimum. 

748 scipp.nanmin: 

749 Same as min but ignoring NaN's. 

750 scipp.nanmax: 

751 Same as max but ignoring NaN's. 

752 """ 

753 if dim is None: 

754 return _call_cpp_func(_cpp.max, x) # type: ignore[return-value] 

755 else: 

756 return _call_cpp_func(_cpp.max, x, dim=dim) # type: ignore[return-value] 

757 

758 

759def nanmin(x: VariableLikeType, dim: str | None = None) -> VariableLikeType: 

760 """Minimum of elements in the input ignoring NaN's. 

761 

762 Warning 

763 ------- 

764 

765 Scipp returns DBL_MAX or INT_MAX for empty inputs of float or int dtype, 

766 respectively, while NumPy raises. Note that in the case of :py:class:`DataArray`, 

767 inputs can also be "empty" if all elements contributing to an output element are 

768 masked. The same applies if all elements are NaN (or masked). 

769 

770 Parameters 

771 ---------- 

772 x: scipp.typing.VariableLike 

773 Input data. 

774 dim: 

775 Optional dimension along which to calculate the min. If not 

776 given, the min over all dimensions is calculated. 

777 

778 Returns 

779 ------- 

780 : 

781 The minimum of the input values. 

782 

783 See Also 

784 -------- 

785 scipp.min: 

786 Element-wise minimum without special handling for NaN. 

787 scipp.max: 

788 Element-wise maximum without special handling for NaN. 

789 scipp.nanmax: 

790 Same as max but ignoring NaN's. 

791 """ 

792 if dim is None: 

793 return _call_cpp_func(_cpp.nanmin, x) # type: ignore[return-value] 

794 else: 

795 return _call_cpp_func(_cpp.nanmin, x, dim=dim) # type: ignore[return-value] 

796 

797 

798def nanmax(x: VariableLikeType, dim: str | None = None) -> VariableLikeType: 

799 """Maximum of elements in the input ignoring NaN's. 

800 

801 Warning 

802 ------- 

803 

804 Scipp returns DBL_MIN or INT_MIN for empty inputs of float or int dtype, 

805 respectively, while NumPy raises. Note that in the case of :py:class:`DataArray`, 

806 inputs can also be "empty" if all elements contributing to an output element are 

807 masked. The same applies if all elements are NaN (or masked). 

808 

809 Parameters 

810 ---------- 

811 x: scipp.typing.VariableLike 

812 Input data. 

813 dim: 

814 Optional dimension along which to calculate the max. If not 

815 given, the max over all dimensions is calculated. 

816 

817 Returns 

818 ------- 

819 : 

820 The maximum of the input values. 

821 

822 See Also 

823 -------- 

824 scipp.max: 

825 Element-wise maximum without special handling for NaN. 

826 scipp.min: 

827 Element-wise minimum without special handling for NaN. 

828 scipp.nanmin: 

829 Same as min but ignoring NaN's. 

830 """ 

831 if dim is None: 

832 return _call_cpp_func(_cpp.nanmax, x) # type: ignore[return-value] 

833 else: 

834 return _call_cpp_func(_cpp.nanmax, x, dim=dim) # type: ignore[return-value] 

835 

836 

837def all(x: VariableLikeType, dim: str | None = None) -> VariableLikeType: 

838 """Logical AND over input values. 

839 

840 Parameters 

841 ---------- 

842 x: scipp.typing.VariableLike 

843 Input data. 

844 dim: 

845 Optional dimension along which to calculate the AND. If not 

846 given, the AND over all dimensions is calculated. 

847 

848 Returns 

849 ------- 

850 : 

851 A variable containing ``True`` if all input values (along the given dimension) 

852 are ``True``. 

853 

854 See Also 

855 -------- 

856 scipp.any: 

857 Logical OR. 

858 """ 

859 if dim is None: 

860 return _call_cpp_func(_cpp.all, x) # type: ignore[return-value] 

861 else: 

862 return _call_cpp_func(_cpp.all, x, dim=dim) # type: ignore[return-value] 

863 

864 

865def any(x: VariableLikeType, dim: str | None = None) -> VariableLikeType: 

866 """Logical OR over input values. 

867 

868 Parameters 

869 ---------- 

870 x: scipp.typing.VariableLike 

871 Input data. 

872 dim: 

873 Optional dimension along which to calculate the OR. If not 

874 given, the OR over all dimensions is calculated. 

875 

876 Returns 

877 ------- 

878 : 

879 A variable containing ``True`` if any input values (along the given dimension) 

880 are ``True``. 

881 

882 See Also 

883 -------- 

884 scipp.all: 

885 Logical AND. 

886 """ 

887 if dim is None: 

888 return _call_cpp_func(_cpp.any, x) # type: ignore[return-value] 

889 else: 

890 return _call_cpp_func(_cpp.any, x, dim=dim) # type: ignore[return-value] 

891 

892 

893# Note: When passing `sc_func`, make sure to disassociate type vars of that function 

894# from the calling function. E.g., in `median`, use 

895# sc_func=cast(Callable[..., VariableLike], median) 

896# This ensures that the return type of the `median` function is deduced correctly. 

897def _reduce_with_numpy( 

898 x: VariableLikeType, 

899 *, 

900 dim: Dims = None, 

901 sc_func: Callable[..., VariableLike], 

902 np_func: Callable[..., npt.NDArray[Any]], 

903 np_ma_func: Callable[..., npt.NDArray[Any]], 

904 unit_func: Callable[[Unit], Unit], 

905 kwargs: dict[str, Any], 

906) -> VariableLikeType: 

907 if isinstance(x, Dataset): 

908 return Dataset({k: sc_func(v, dim=dim, **kwargs) for k, v in x.items()}) # type: ignore[arg-type] 

909 if isinstance(x, DataGroup): 

910 return data_group_nary(sc_func, x, dim=dim, **kwargs) 

911 

912 _expect_no_variance(x, sc_func.__name__) 

913 _expect_not_binned(x, sc_func.__name__) 

914 reduced_dims, out_dims, axis = _split_dims(x, dim) 

915 if isinstance(x, Variable): 

916 return array( 

917 dims=out_dims, 

918 values=np_func(x.values, axis=axis, **kwargs), 

919 unit=unit_func(x.unit) if x.unit is not None else None, 

920 ) 

921 if isinstance(x, DataArray): 

922 if (mask := concepts.irreducible_mask(x, dim)) is not None: 

923 masked = np.ma.masked_where( # type: ignore[no-untyped-call] 

924 mask.broadcast(dims=x.dims, shape=x.shape).values, x.values 

925 ) 

926 res = np_ma_func(masked, axis=axis, **kwargs) 

927 else: 

928 res = np_func(x.values, axis=axis, **kwargs) 

929 return concepts.rewrap_reduced_data( 

930 x, array(dims=out_dims, values=res, unit=x.unit), dim 

931 ) 

932 raise TypeError(f'invalid argument of type {type(x)} to {sc_func}') 

933 

934 

935def _dims_to_axis(x: VariableLikeType, dim: tuple[str, ...]) -> tuple[int, ...]: 

936 return tuple(_dim_index(x.dims, d) for d in dim) 

937 

938 

939def _dim_index(dims: tuple[str, ...], dim: str) -> int: 

940 try: 

941 return dims.index(dim) 

942 except ValueError: 

943 raise DimensionError(f'Expected dimension to be in {dims}, got {dim}') from None 

944 

945 

946def _split_dims( 

947 x: VariableLikeType, dim: Dims 

948) -> tuple[tuple[str, ...], tuple[str, ...], tuple[int, ...]]: 

949 reduced_dims = concepts.concrete_dims(x, dim) 

950 out_dims = tuple(d for d in x.dims if d not in reduced_dims) 

951 axis = _dims_to_axis(x, reduced_dims) 

952 return reduced_dims, out_dims, axis 

953 

954 

955def _expect_no_variance(x: VariableLikeType, op: str) -> None: 

956 if getattr(x, 'variances', None) is not None: 

957 raise VariancesError(f"'{op}' does not support variances") 

958 

959 

960def _expect_not_binned(x: VariableLikeType, op: str) -> None: 

961 if getattr(x, 'bins', None) is not None: 

962 raise DTypeError(f"'{op}' does not support binned data")