LCOV - code coverage report
Current view: top level - dataset - histogram.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 64 65 98.5 %
Date: 2024-12-01 01:56:34 Functions: 11 11 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: BSD-3-Clause
       2             : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
       3             : /// @file
       4             : /// @author Simon Heybrock
       5             : #include <algorithm>
       6             : 
       7             : #include "scipp/core/element/histogram.h"
       8             : #include "scipp/dataset/bins.h"
       9             : #include "scipp/dataset/dataset.h"
      10             : #include "scipp/dataset/except.h"
      11             : #include "scipp/dataset/groupby.h"
      12             : #include "scipp/dataset/histogram.h"
      13             : #include "scipp/variable/arithmetic.h"
      14             : #include "scipp/variable/astype.h"
      15             : #include "scipp/variable/reduction.h"
      16             : #include "scipp/variable/shape.h"
      17             : #include "scipp/variable/transform_subspan.h"
      18             : #include "scipp/variable/util.h"
      19             : 
      20             : #include "bins_util.h"
      21             : #include "dataset_operations_common.h"
      22             : 
      23             : using namespace scipp::core;
      24             : using namespace scipp::variable;
      25             : 
      26             : namespace scipp::dataset {
      27             : 
      28       47595 : DataArray histogram(const DataArray &events, const Variable &binEdges) {
      29             :   using namespace scipp::core;
      30       47595 :   auto dim = binEdges.dims().inner();
      31       47595 :   auto con_bin_edges = as_contiguous(binEdges, dim);
      32             : 
      33       47595 :   DataArray result;
      34       47595 :   if (events.dtype() == dtype<bucket<DataArray>>) {
      35             :     // TODO Histogram data from buckets. Is this the natural choice for the API,
      36             :     // i.e., does it make sense that histograming considers bucket contents?
      37             :     // Should we instead have a separate named function for this case?
      38         229 :     result = apply_and_drop_dim(
      39             :         events,
      40         115 :         [](const DataArray &events_, const Dim dim_,
      41             :            const Variable &binEdges_) {
      42             :           return buckets::histogram(
      43         231 :               hide_masked(events_.data(), events_.masks(),
      44             :                           scipp::span<const Dim>{&dim_, 1}),
      45         229 :               binEdges_);
      46             :         },
      47         114 :         dim, con_bin_edges);
      48       47480 :   } else if (!is_histogram(events, dim)) {
      49       47478 :     const auto event_dim = events.coords().dim_of(dim);
      50       94956 :     result = apply_and_drop_dim(
      51             :         events,
      52       47478 :         [dim](const DataArray &events_, const Dim event_dim_,
      53             :               const Variable &binEdges_) {
      54       47478 :           const auto data = masked_data(events_, event_dim_);
      55             :           // Warning: Don't try to move the `as_contiguous` into `subspan_view`
      56             :           // without special care: It may return a new variable which will go
      57             :           // out of scope, leading to subtle bugs. Here on the other hand the
      58             :           // returned temporary is kept alive until the end of the
      59             :           // full-expression.
      60       47478 :           const auto cont_data = as_contiguous(data, event_dim_);
      61             :           const auto cont_coord =
      62       47478 :               as_contiguous(events_.coords()[dim], event_dim_);
      63       47478 :           if (data.ndim() == 1 && data.dims().volume() > 100000) {
      64          40 :             const DataArray content(cont_data, {{dim, cont_coord}});
      65             :             const auto binned =
      66          10 :                 pretend_bins_for_threading(content, Dim::InternalHistogram);
      67             :             // This sums automatically over Dim::InternalHistogram
      68          10 :             return buckets::histogram(binned, binEdges_);
      69          10 :           }
      70             :           // Due to the combinatoric explosion of dtype combinations, we promote
      71             :           // either the bin edges or the coord in case their dtypes mismatch.
      72             :           // This is less efficient, but hopefully an edge case. If performance
      73             :           // matters, users should ensure that they use bin edges and coord with
      74             :           // the same dtype.
      75       47468 :           const auto dt = common_type(binEdges_, cont_coord);
      76             :           const auto promoted_coord =
      77       47468 :               astype(cont_coord, dt, CopyPolicy::TryAvoid);
      78             :           const auto promoted_edges =
      79       47468 :               astype(binEdges_, dt, CopyPolicy::TryAvoid);
      80             :           return transform_subspan(
      81       47468 :               events_.dtype(), dim, binEdges_.dims()[dim] - 1,
      82       94936 :               subspan_view(promoted_coord, event_dim_),
      83       94936 :               subspan_view(cont_data, event_dim_), promoted_edges,
      84       94936 :               element::histogram, "histogram");
      85       47478 :         },
      86       47478 :         event_dim, con_bin_edges);
      87             :   } else {
      88           2 :     throw except::BinEdgeError(
      89             :         "Data is already histogrammed. Expected event data or dense point "
      90           4 :         "data, got data with bin edges.");
      91             :   }
      92       47592 :   result.coords().set(dim, binEdges);
      93       95184 :   return result;
      94       47598 : }
      95             : 
      96           1 : Dataset histogram(const Dataset &dataset, const Variable &binEdges) {
      97             :   return apply_to_items(
      98             :       dataset,
      99           2 :       [](const auto &item, const Dim, const auto &binEdges_) {
     100           2 :         return histogram(item, binEdges_);
     101             :       },
     102           2 :       binEdges.dims().inner(), binEdges);
     103             : }
     104             : 
     105             : /// Return the dimensions of the given data array that have an "bin edge"
     106             : /// coordinate.
     107          18 : std::set<Dim> edge_dimensions(const DataArray &a) {
     108          18 :   std::set<Dim> dims;
     109          39 :   for (const auto &[d, coord] : a.coords())
     110          40 :     if (a.dims().contains(d) && coord.dims().contains(d) &&
     111          19 :         coord.dims()[d] == a.dims()[d] + 1)
     112          15 :       dims.insert(d);
     113          18 :   return dims;
     114           0 : }
     115             : 
     116             : /// Return the Dim of the given data array that has an "bin edge" coordinate.
     117             : ///
     118             : /// Throws if there is not exactly one such dimension.
     119          18 : Dim edge_dimension(const DataArray &a) {
     120          18 :   const auto &dims = edge_dimensions(a);
     121          18 :   if (dims.size() != 1)
     122           7 :     throw except::BinEdgeError("Expected bin edges in only one dimension.");
     123          22 :   return *dims.begin();
     124          18 : }
     125             : 
     126             : namespace {
     127       47491 : template <typename T> bool is_histogram_impl(const T &a, const Dim dim) {
     128       47491 :   const auto dims = a.dims();
     129       47491 :   const auto coords = a.coords();
     130       47691 :   return dims.contains(dim) && coords.contains(dim) &&
     131       47889 :          coords[dim].dims().contains(dim) &&
     132       47689 :          coords[dim].dims()[dim] == dims.at(dim) + 1;
     133       47491 : }
     134             : } // namespace
     135             : /// Return true if the data array represents a histogram for given dim.
     136       47489 : bool is_histogram(const DataArray &a, const Dim dim) {
     137       47489 :   return is_histogram_impl(a, dim);
     138             : }
     139             : 
     140             : /// Return true if the dataset represents a histogram for given dim.
     141           2 : bool is_histogram(const Dataset &a, const Dim dim) {
     142           2 :   return is_histogram_impl(a, dim);
     143             : }
     144             : 
     145             : } // namespace scipp::dataset

Generated by: LCOV version 1.14