LCOV - code coverage report
Current view: top level - dataset - variable_instantiate_bin_elements.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 76 99 76.8 %
Date: 2024-12-01 01:56:34 Functions: 11 20 55.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: BSD-3-Clause
       2             : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
       3             : /// @file
       4             : /// @author Simon Heybrock
       5             : #include "scipp/dataset/bins.h"
       6             : #include "scipp/dataset/dataset.h"
       7             : #include "scipp/dataset/string.h"
       8             : #include "scipp/variable/bin_array_variable.tcc"
       9             : #include "scipp/variable/bins.h"
      10             : #include "scipp/variable/string.h"
      11             : 
      12             : namespace scipp::variable {
      13             : 
      14             : namespace {
      15             : 
      16             : template <class Key, class Value>
      17         136 : std::string compact_dict_entry(const Key &key, const Value &var) {
      18         136 :   std::stringstream s;
      19         136 :   s << "'" << key << "':" << format_variable_compact(var);
      20         272 :   return s.str();
      21         136 : }
      22             : 
      23             : template <class Key, class Value>
      24             : std::string
      25          64 : dict_to_compact_string(const scipp::dataset::SizedDict<Key, Value> &dict,
      26             :                        const std::string &description,
      27             :                        const std::string &margin) {
      28          64 :   std::stringstream s;
      29          64 :   const scipp::index max_length = 70;
      30          64 :   const auto indent = margin.size() + description.size() + 2;
      31          64 :   s << margin << description << "={";
      32          64 :   bool first_iter = true;
      33          64 :   auto current_line_length = indent;
      34         200 :   for (const auto &[key, var] : dict) {
      35         136 :     if (current_line_length > max_length) {
      36          18 :       s << ",\n" << std::string(indent, ' ');
      37          18 :       current_line_length = indent;
      38          18 :       first_iter = true;
      39             :     }
      40         136 :     const auto append = compact_dict_entry(key, var);
      41         136 :     auto length = append.size();
      42         136 :     if (first_iter)
      43          82 :       first_iter = false;
      44             :     else {
      45          54 :       s << ", ";
      46          54 :       length += 2;
      47             :     }
      48         136 :     s << append;
      49         136 :     current_line_length += length;
      50             :   }
      51          64 :   s << "}";
      52         128 :   return s.str();
      53          64 : }
      54             : } // namespace
      55             : 
      56             : template <>
      57          46 : std::string Formatter<core::bin<DataArray>>::format(const Variable &var) const {
      58          46 :   const auto &[indices, dim, buffer] = var.constituents<DataArray>();
      59          46 :   std::string margin(10, ' ');
      60          46 :   std::stringstream s;
      61          46 :   s << "binned data: dim='" + to_string(dim) + "', content=DataArray(";
      62          46 :   s << "\n" << margin << "dims=" << to_string(buffer.dims()) << ',';
      63          46 :   s << "\n" << margin << "data=" << format_variable_compact(buffer.data());
      64          46 :   if (!buffer.coords().empty())
      65          46 :     s << ",\n" << dict_to_compact_string(buffer.coords(), "coords", margin);
      66          46 :   if (!buffer.masks().empty())
      67          18 :     s << ",\n" << dict_to_compact_string(buffer.masks(), "masks", margin);
      68          46 :   if (!buffer.attrs().empty())
      69           0 :     s << ",\n" << dict_to_compact_string(buffer.attrs(), "attrs", margin);
      70         138 :   return s.str() + ')';
      71          46 : }
      72             : 
      73             : INSTANTIATE_BIN_ARRAY_VARIABLE(DatasetView, Dataset)
      74             : INSTANTIATE_BIN_ARRAY_VARIABLE(DataArrayView, DataArray)
      75             : 
      76             : } // namespace scipp::variable
      77             : 
      78             : namespace scipp::dataset {
      79             : 
      80             : namespace {
      81          30 : Variable apply_mask(const DataArray &buffer, const Variable &indices,
      82             :                     const Dim dim, const Variable &mask, const FillValue fill) {
      83             :   return make_bins(
      84             :       indices, dim,
      85          60 :       where(mask, special_like(Variable(buffer.data(), Dimensions{}), fill),
      86          60 :             buffer.data()));
      87             : }
      88             : } // namespace
      89             : 
      90             : class BinVariableMakerDataArray : public variable::BinVariableMaker<DataArray> {
      91             : private:
      92          49 :   Variable call_make_bins(const Variable &parent, const Variable &indices,
      93             :                           const Dim dim, const DType type,
      94             :                           const Dimensions &dims, const units::Unit &unit,
      95             :                           const bool variances) const override {
      96          49 :     const auto &source = buffer(parent);
      97          49 :     if (parent.dims() !=
      98             :         indices
      99             :             .dims()) // would need to select and copy slices from source coords
     100           0 :       throw std::runtime_error(
     101           0 :           "Shape changing operations with bucket<DataArray> not supported yet");
     102             :     // The only caller is BinVariableMaker::create, which should ensure that
     103             :     // indices and buffer size are valid and compatible.
     104             :     auto data_buffer =
     105          49 :         variable::variableFactory().create(type, dims, unit, variances);
     106             :     // If the buffer size is unchanged and input indices match output indices we
     107             :     // can use a cheap and simple copy of the buffer's coords and masks.
     108             :     // Otherwise we fall back to a copy via the binned views of the respective
     109             :     // content buffers.
     110         144 :     if (source.dims() == Dimensions{dim, dims.volume()} &&
     111          95 :         indices == parent.bin_indices()) {
     112          92 :       auto buffer = DataArray(std::move(data_buffer), copy(source.coords()),
     113         184 :                               copy(source.masks()), copy(source.attrs()));
     114          46 :       return make_bins_no_validate(indices, dim, std::move(buffer));
     115          46 :     } else {
     116           3 :       auto buffer = resize_default_init(source, dim, dims.volume());
     117           6 :       auto out = make_bins_no_validate(indices, dim, std::move(buffer));
     118             :       // Note the inefficiency here: The data is copied, even though it will be
     119             :       // replaced and overwritten. Since this branch is a special case it is not
     120             :       // worth the effort to avoid this.
     121           3 :       copy(parent, out);
     122           3 :       out.bin_buffer<DataArray>().setData(std::move(data_buffer));
     123           3 :       return out;
     124           3 :     }
     125          49 :   }
     126       17713 :   const Variable &data(const Variable &var) const override {
     127       17713 :     return buffer(var).data();
     128             :   }
     129         150 :   Variable data(Variable &var) const override { return buffer(var).data(); }
     130             : 
     131             :   [[nodiscard]] Variable
     132        8672 :   apply_event_masks(const Variable &var, const FillValue fill) const override {
     133       17344 :     if (const auto mask_union = irreducible_event_mask(var);
     134        8672 :         mask_union.is_valid()) {
     135          30 :       const auto &&[indices, dim, buffer] = var.constituents<DataArray>();
     136          30 :       return apply_mask(buffer, indices, dim, mask_union, fill);
     137        8702 :     }
     138        8642 :     return var;
     139             :   }
     140             : 
     141             :   [[nodiscard]] Variable
     142        8695 :   irreducible_event_mask(const Variable &var) const override {
     143        8695 :     const auto &&[indices, dim, buffer] = var.constituents<DataArray>();
     144       17390 :     return irreducible_mask(buffer.masks(), dim);
     145        8695 :   }
     146             : };
     147             : 
     148             : /// This is currently a dummy implemented just to make `is_bins` work.
     149             : class BinVariableMakerDataset
     150             :     : public variable::BinVariableMakerCommon<Dataset> {
     151           0 :   Variable create(const DType, const Dimensions &, const units::Unit &,
     152             :                   const bool, const parent_list &) const override {
     153           0 :     throw std::runtime_error("not implemented");
     154             :   }
     155           0 :   Dim elem_dim(const Variable &) const override {
     156           0 :     throw std::runtime_error("undefined");
     157             :   }
     158           0 :   DType elem_dtype(const Variable &) const override {
     159           0 :     throw std::runtime_error("undefined");
     160             :   }
     161           0 :   units::Unit elem_unit(const Variable &) const override {
     162           0 :     throw std::runtime_error("undefined");
     163             :   }
     164           0 :   void expect_can_set_elem_unit(const Variable &,
     165             :                                 const units::Unit &) const override {
     166           0 :     throw std::runtime_error("undefined");
     167             :   }
     168           0 :   void set_elem_unit(Variable &, const units::Unit &) const override {
     169           0 :     throw std::runtime_error("undefined");
     170             :   }
     171           0 :   bool has_variances(const Variable &) const override {
     172           0 :     throw std::runtime_error("undefined");
     173             :   }
     174           0 :   [[nodiscard]] Variable apply_event_masks(const Variable &,
     175             :                                            const FillValue) const override {
     176           0 :     throw except::NotImplementedError(
     177           0 :         "Event masks for bins containing datasets are not supported.");
     178             :   }
     179             :   [[nodiscard]] Variable
     180           0 :   irreducible_event_mask(const Variable &) const override {
     181           0 :     throw except::NotImplementedError(
     182           0 :         "Event masks for bins containing datasets are not supported.");
     183             :   }
     184             : };
     185             : 
     186             : REGISTER_FORMATTER(bin_DataArray, core::bin<DataArray>)
     187             : REGISTER_FORMATTER(bin_Dataset, core::bin<Dataset>)
     188             : 
     189             : namespace {
     190             : auto register_variable_maker_bucket_DataArray(
     191             :     (variable::variableFactory().emplace(
     192             :          dtype<bucket<DataArray>>,
     193             :          std::make_unique<BinVariableMakerDataArray>()),
     194             :      variable::variableFactory().emplace(
     195             :          dtype<bucket<Dataset>>, std::make_unique<BinVariableMakerDataset>()),
     196             :      0));
     197             : } // namespace
     198             : } // namespace scipp::dataset

Generated by: LCOV version 1.14