LCOV - code coverage report
Current view: top level - dataset - sized_dict.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 233 243 95.9 %
Date: 2024-11-24 01:48:31 Functions: 77 95 81.1 %

          Line data    Source code
       1             : // SPDX-License-Identifier: BSD-3-Clause
       2             : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
       3             : /// @file
       4             : /// @author Simon Heybrock
       5             : #include <algorithm>
       6             : #include <utility>
       7             : 
       8             : #include "scipp/dataset/except.h"
       9             : #include "scipp/dataset/sized_dict.h"
      10             : #include "scipp/variable/variable_factory.h"
      11             : 
      12             : namespace scipp::dataset {
      13             : 
      14             : namespace {
      15      452534 : template <class T> void expect_writable(const T &dict) {
      16      452534 :   if (dict.is_readonly())
      17          31 :     throw except::DataArrayError(
      18             :         "Read-only flag is set, cannot mutate metadata dict.");
      19      452503 : }
      20             : 
      21          71 : void merge_sizes_into(Sizes &target, const Dimensions &s) {
      22             :   using std::to_string;
      23             : 
      24         119 :   for (const auto &dim : s) {
      25          50 :     if (target.contains(dim)) {
      26          17 :       const auto a = target[dim];
      27          17 :       const auto b = s[dim];
      28          17 :       if (a == b + 1) // had bin-edges, replace by regular coord
      29           3 :         target.resize(dim, b);
      30          14 :       else if (a + 1 == b) { // had regular coord, got extra by bin-edges
      31             :         // keep current
      32          11 :       } else if (a != b)
      33           2 :         throw except::DimensionError(
      34           4 :             "Conflicting length in dimension " + to_string(dim) + ": " +
      35           8 :             to_string(target[dim]) + " vs " + to_string(s[dim]));
      36             :     } else {
      37          33 :       target.set(dim, s[dim]);
      38             :     }
      39             :   }
      40          69 : }
      41             : 
      42             : template <class Key, class Value>
      43          66 : auto make_from_items(typename SizedDict<Key, Value>::holder_type items,
      44             :                      const bool readonly) {
      45          66 :   Sizes sizes;
      46         135 :   for (auto &&[key, value] : items) {
      47          71 :     merge_sizes_into(sizes, value.dims());
      48             :   }
      49         130 :   return SizedDict<Key, Value>(std::move(sizes), std::move(items), readonly);
      50          66 : }
      51             : } // namespace
      52             : 
      53             : template <class Key, class Value>
      54        2239 : SizedDict<Key, Value>::SizedDict(
      55             :     const Sizes &sizes,
      56             :     std::initializer_list<std::pair<const Key, Value>> items,
      57             :     const bool readonly)
      58        2239 :     : SizedDict(sizes, holder_type(items), readonly) {}
      59             : 
      60             : template <class Key, class Value>
      61           0 : SizedDict<Key, Value>::SizedDict(
      62             :     AutoSizeTag tag, std::initializer_list<std::pair<const Key, Value>> items,
      63             :     const bool readonly)
      64           0 :     : SizedDict(tag, holder_type(items), readonly) {}
      65             : 
      66             : template <class Key, class Value>
      67      470804 : SizedDict<Key, Value>::SizedDict(Sizes sizes, holder_type items,
      68             :                                  const bool readonly)
      69      470804 :     : m_sizes(std::move(sizes)) {
      70      649424 :   for (auto &&[key, value] : items)
      71      178636 :     set(key, std::move(value));
      72             :   // `set` requires Dict to be writable, set readonly flag at the end.
      73      470796 :   m_readonly = readonly; // NOLINT(cppcoreguidelines-prefer-member-initializer)
      74      470812 : }
      75             : 
      76             : template <class Key, class Value>
      77          66 : SizedDict<Key, Value>::SizedDict(AutoSizeTag, holder_type items,
      78             :                                  const bool readonly)
      79          66 :     : SizedDict(make_from_items<Key, Value>(std::move(items), readonly)) {}
      80             : 
      81             : template <class Key, class Value>
      82      880779 : SizedDict<Key, Value>::SizedDict(const SizedDict &other)
      83      880779 :     : m_sizes(other.m_sizes), m_items(other.m_items), m_readonly(false) {}
      84             : 
      85             : template <class Key, class Value>
      86       65275 : SizedDict<Key, Value>::SizedDict(SizedDict &&other) noexcept
      87       65275 :     : m_sizes(std::move(other.m_sizes)), m_items(std::move(other.m_items)),
      88       65275 :       m_readonly(other.m_readonly) {}
      89             : 
      90             : template <class Key, class Value>
      91             : SizedDict<Key, Value> &
      92        6524 : SizedDict<Key, Value>::operator=(const SizedDict &other) = default;
      93             : 
      94             : template <class Key, class Value>
      95             : SizedDict<Key, Value> &
      96        4119 : SizedDict<Key, Value>::operator=(SizedDict &&other) noexcept = default;
      97             : 
      98             : namespace {
      99             : template <class Item, class Key, class Value, class Compare>
     100       22436 : bool item_in_other(const Item &item, const SizedDict<Key, Value> &other,
     101             :                    Compare &&compare_data) {
     102       22436 :   const auto &[name, data] = item;
     103       22436 :   if (!other.contains(name))
     104          72 :     return false;
     105       22364 :   const auto &other_data = other[name];
     106       44275 :   return compare_data(data, other_data) &&
     107       44275 :          data.is_aligned() == other_data.is_aligned();
     108             : }
     109             : } // namespace
     110             : 
     111             : template <class Key, class Value>
     112       19040 : bool SizedDict<Key, Value>::operator==(const SizedDict &other) const {
     113       19040 :   if (size() != other.size())
     114         113 :     return false;
     115       59576 :   return std::all_of(this->begin(), this->end(), [&other](const auto &item) {
     116       43444 :     return item_in_other(item, other,
     117       65118 :                          [](const auto &x, const auto &y) { return x == y; });
     118       18927 :   });
     119             : }
     120             : 
     121             : template <class Key, class Value>
     122        1209 : bool equals_nan(const SizedDict<Key, Value> &a,
     123             :                 const SizedDict<Key, Value> &b) {
     124        1209 :   if (a.size() != b.size())
     125          30 :     return false;
     126        3072 :   return std::all_of(a.begin(), a.end(), [&b](const auto &item) {
     127        1428 :     return item_in_other(
     128        2118 :         item, b, [](const auto &x, const auto &y) { return equals_nan(x, y); });
     129        1179 :   });
     130             : }
     131             : 
     132             : template <class Key, class Value>
     133       18625 : bool SizedDict<Key, Value>::operator!=(const SizedDict &other) const {
     134       18625 :   return !operator==(other);
     135             : }
     136             : 
     137             : /// Returns whether a given key is present in the view.
     138             : template <class Key, class Value>
     139      942541 : bool SizedDict<Key, Value>::contains(const Key &k) const {
     140      942541 :   return m_items.contains(k);
     141             : }
     142             : 
     143             : /// Returns 1 or 0, depending on whether key is present in the view or not.
     144             : template <class Key, class Value>
     145       23853 : scipp::index SizedDict<Key, Value>::count(const Key &k) const {
     146       23853 :   return static_cast<scipp::index>(contains(k));
     147             : }
     148             : 
     149             : /// Const reference to the coordinate for given dimension.
     150             : template <class Key, class Value>
     151      129797 : const Value &SizedDict<Key, Value>::operator[](const Key &key) const {
     152      129797 :   return at(key);
     153             : }
     154             : 
     155             : /// Const reference to the coordinate for given dimension.
     156             : template <class Key, class Value>
     157      233152 : const Value &SizedDict<Key, Value>::at(const Key &key) const {
     158      233152 :   scipp::expect::contains(*this, key);
     159      228704 :   return m_items.at(key);
     160             : }
     161             : 
     162             : /// The coordinate for given dimension.
     163             : template <class Key, class Value>
     164       42634 : Value SizedDict<Key, Value>::operator[](const Key &key) {
     165       42634 :   return std::as_const(*this).at(key);
     166             : }
     167             : 
     168             : /// The coordinate for given dimension.
     169             : template <class Key, class Value>
     170        2297 : Value SizedDict<Key, Value>::at(const Key &key) {
     171        2297 :   return std::as_const(*this).at(key);
     172             : }
     173             : 
     174             : /// Return the dimension for given coord.
     175             : /// @param key Key of the coordinate in a coord dict
     176             : ///
     177             : /// Return the dimension of the coord for 1-D coords or Dim::Invalid for 0-D
     178             : /// coords. In the special case of multi-dimension coords the following applies,
     179             : /// in this order:
     180             : /// - For bin-edge coords return the dimension in which the coord dimension
     181             : ///   exceeds the data dimensions.
     182             : /// - Else, for dimension coords (key matching a dimension), return the key.
     183             : /// - Else, return Dim::Invalid.
     184             : template <class Key, class Value>
     185       51166 : Dim SizedDict<Key, Value>::dim_of(const Key &key) const {
     186       51166 :   const auto &var = at(key);
     187       51166 :   if (var.dims().ndim() == 0)
     188         196 :     return Dim::Invalid;
     189       50970 :   if (var.dims().ndim() == 1)
     190       50642 :     return var.dims().inner();
     191             :   if constexpr (std::is_same_v<Key, Dim>) {
     192         986 :     for (const auto &dim : var.dims())
     193         671 :       if (core::is_edges(sizes(), var.dims(), dim))
     194          13 :         return dim;
     195         315 :     if (var.dims().contains(key))
     196          14 :       return key; // dimension coord
     197             :   }
     198         301 :   return Dim::Invalid;
     199             : }
     200             : 
     201             : template <class Key, class Value>
     202       58985 : void SizedDict<Key, Value>::setSizes(const Sizes &sizes) {
     203       58985 :   scipp::expect::includes(sizes, m_sizes);
     204       58985 :   m_sizes = sizes;
     205       58985 : }
     206             : 
     207             : namespace {
     208             : template <class Key>
     209      432648 : void expect_valid_coord_dims(const Key &key, const Dimensions &coord_dims,
     210             :                              const Sizes &da_sizes) {
     211             :   using core::to_string;
     212      432648 :   if (!da_sizes.includes(coord_dims))
     213          32 :     throw except::DimensionError(
     214           0 :         "Cannot add coord '" + to_string(key) + "' of dims " +
     215             :         to_string(coord_dims) + " to DataArray with dims " +
     216          32 :         to_string(Dimensions{da_sizes.labels(), da_sizes.sizes()}));
     217      432632 : }
     218             : } // namespace
     219             : 
     220             : template <class Key, class Value>
     221      434045 : void SizedDict<Key, Value>::set(const key_type &key, mapped_type coord) {
     222      434045 :   if (contains(key) && at(key).is_same(coord))
     223        1373 :     return;
     224      432672 :   expect_writable(*this);
     225             :   using core::to_string;
     226      432655 :   if (is_bins(coord))
     227          13 :     throw except::VariableError(
     228          10 :         std::string("Cannot set binned variable as coord or mask.\n") +
     229             :         "When working with binned data, binned coords or masks are typically "
     230             :         "set via the `bins` property.\nInstead of\n"
     231             :         "    da.coords[" +
     232           2 :         to_string(key) + "] = binned_var`\n" +
     233             :         "use\n"
     234             :         "    da.bins.coords[" +
     235           2 :         to_string(key) + "] = binned_var`");
     236      432650 :   auto dims = coord.dims();
     237             :   // Is a good definition for things that are allowed: "would be possible to
     238             :   // concat along existing dim or extra dim"?
     239      798710 :   for (const auto &dim : coord.dims()) {
     240      446073 :     if (!sizes().contains(dim) && dims[dim] == 2) { // bin edge along extra dim
     241         556 :       dims.erase(dim);
     242         556 :       break;
     243      445517 :     } else if (dims[dim] == sizes()[dim] + 1) {
     244       79455 :       dims.resize(dim, sizes()[dim]);
     245       79455 :       break;
     246             :     }
     247             :   }
     248      432648 :   expect_valid_coord_dims(key, dims, m_sizes);
     249      432632 :   m_items.insert_or_assign(key, std::move(coord));
     250      432650 : }
     251             : 
     252             : template <class Key, class Value>
     253        7947 : void SizedDict<Key, Value>::erase(const key_type &key) {
     254        7947 :   static_cast<void>(extract(key));
     255        7933 : }
     256             : 
     257             : template <class Key, class Value>
     258        8226 : Value SizedDict<Key, Value>::extract(const key_type &key) {
     259        8226 :   expect_writable(*this);
     260        8213 :   return m_items.extract(key);
     261             : }
     262             : 
     263             : template <class Key, class Value>
     264           0 : Value SizedDict<Key, Value>::extract(const key_type &key,
     265             :                                      const mapped_type &default_value) {
     266           0 :   if (contains(key)) {
     267           0 :     return extract(key);
     268             :   }
     269           0 :   return default_value;
     270             : }
     271             : 
     272             : template <class Key, class Value>
     273       28577 : SizedDict<Key, Value> SizedDict<Key, Value>::slice(const Slice &params) const {
     274       28577 :   const bool readonly = true;
     275       28577 :   return {m_sizes.slice(params), slice_map(m_sizes, m_items, params), readonly};
     276             : }
     277             : 
     278             : namespace {
     279       26901 : constexpr auto unaligned_by_dim_slice = [](const auto &coords, const auto &key,
     280             :                                            const auto &var,
     281             :                                            const Slice &params) {
     282       26901 :   if (params == Slice{} || params.end() != -1)
     283       20994 :     return false;
     284        5907 :   const Dim dim = params.dim();
     285        5907 :   return var.dims().contains(dim) && coords.dim_of(key) == dim;
     286             : };
     287             : } // namespace
     288             : 
     289             : template <class Key, class Value>
     290             : SizedDict<Key, Value>
     291       10387 : SizedDict<Key, Value>::slice_coords(const Slice &params) const {
     292       10387 :   auto coords = slice(params);
     293       10385 :   coords.m_readonly = false;
     294       37286 :   for (const auto &[key, var] : *this)
     295       26901 :     if (unaligned_by_dim_slice(*this, key, var, params))
     296        1699 :       coords.set_aligned(key, false);
     297       10385 :   coords.m_readonly = true;
     298       10385 :   return coords;
     299           0 : }
     300             : 
     301             : template <class Key, class Value>
     302          58 : void SizedDict<Key, Value>::validateSlice(const Slice &s,
     303             :                                           const SizedDict &dict) const {
     304             :   using core::to_string;
     305             :   using units::to_string;
     306          68 :   for (const auto &[key, item] : dict) {
     307          17 :     const auto it = find(key);
     308          17 :     if (it == end()) {
     309           4 :       throw except::NotFoundError("Cannot insert new meta data '" +
     310           2 :                                   to_string(key) + "' via a slice.");
     311          15 :     } else if (const auto &var = it->second;
     312          21 :                (var.is_readonly() || !var.dims().contains(s.dim())) &&
     313          21 :                (var.dims().contains(s.dim()) ? var.slice(s) : var) != item) {
     314          10 :       throw except::DimensionError("Cannot update meta data '" +
     315           5 :                                    to_string(key) +
     316             :                                    "' via slice since it is implicitly "
     317             :                                    "broadcast along the slice dimension '" +
     318             :                                    to_string(s.dim()) + "'.");
     319             :     }
     320             :   }
     321          51 : }
     322             : 
     323             : template <class Key, class Value>
     324          40 : SizedDict<Key, Value> &SizedDict<Key, Value>::setSlice(const Slice &s,
     325             :                                                        const SizedDict &dict) {
     326          40 :   validateSlice(s, dict);
     327          43 :   for (const auto &[key, item] : dict) {
     328           7 :     const auto it = find(key);
     329          14 :     if (it != end() && !it->second.is_readonly() &&
     330          14 :         it->second.dims().contains(s.dim()))
     331           6 :       it->second.setSlice(s, item);
     332             :   }
     333          36 :   return *this;
     334             : }
     335             : 
     336             : template <class Key, class Value>
     337         908 : SizedDict<Key, Value> SizedDict<Key, Value>::rename_dims(
     338             :     const std::vector<std::pair<Dim, Dim>> &names,
     339             :     const bool fail_on_unknown) const {
     340         908 :   auto out(*this);
     341         908 :   out.m_sizes = out.m_sizes.rename_dims(names, fail_on_unknown);
     342        1601 :   for (auto &&item : out.m_items) {
     343             :     // DataArray coords/attrs support the special case of length-2 items with a
     344             :     // dim that is not contained in the data array dims. This occurs, e.g., when
     345             :     // slicing along a dim that has a bin edge coord. We must prevent renaming
     346             :     // to such dims. This is the reason for calling with `names` that may
     347             :     // contain unknown dims (and the `fail_on_unknown` arg). Otherwise the
     348             :     // caller would need to perform this check.
     349        1714 :     for (const auto &rename : names)
     350        1447 :       if (!m_sizes.contains(rename.second) &&
     351         427 :           item.second.dims().contains(rename.second))
     352           5 :         throw except::DimensionError("Duplicate dimension " +
     353             :                                      units::to_string(rename.second) + ".");
     354         694 :     item.second = item.second.rename_dims(names, false);
     355             :   }
     356         902 :   return out;
     357           6 : }
     358             : 
     359             : /// Mark the dict as readonly. Does not imply that items are readonly.
     360             : template <class Key, class Value>
     361       48483 : void SizedDict<Key, Value>::set_readonly() noexcept {
     362       48483 :   m_readonly = true;
     363       48483 : }
     364             : 
     365             : /// Return true if the dict is readonly. Does not imply that items are readonly.
     366             : template <class Key, class Value>
     367      452548 : bool SizedDict<Key, Value>::is_readonly() const noexcept {
     368      452548 :   return m_readonly;
     369             : }
     370             : 
     371             : template <class Key, class Value>
     372           6 : SizedDict<Key, Value> SizedDict<Key, Value>::as_const() const {
     373           6 :   holder_type items;
     374           6 :   items.reserve(m_items.size());
     375           9 :   for (const auto &[key, val] : m_items)
     376           3 :     items.insert_or_assign(key, val.as_const());
     377           6 :   const bool readonly = true;
     378          12 :   return {sizes(), std::move(items), readonly};
     379           6 : }
     380             : 
     381             : template <class Key, class Value>
     382             : SizedDict<Key, Value>
     383       50808 : SizedDict<Key, Value>::merge_from(const SizedDict &other) const {
     384             :   using core::to_string;
     385             :   using units::to_string;
     386             : 
     387       50808 :   auto out(*this);
     388       50808 :   out.m_readonly = false;
     389      225966 :   for (const auto &[key, value] : other) {
     390      175159 :     if (out.contains(key))
     391           1 :       throw except::DataArrayError(
     392           0 :           "Coord '" + to_string(key) +
     393             :           "' shadows attr of the same name. Remove the attr if you are slicing "
     394             :           "an array or use the `coords` and `attrs` properties instead of "
     395             :           "`meta`.");
     396      175158 :     out.set(key, value);
     397             :   }
     398       50807 :   out.m_readonly = m_readonly;
     399       50807 :   return out;
     400           1 : }
     401             : 
     402             : template <class Key, class Value>
     403       20895 : bool SizedDict<Key, Value>::item_applies_to(const Key &key,
     404             :                                             const Dimensions &dims) const {
     405       20895 :   const auto &val = m_items.at(key);
     406       41790 :   return std::all_of(val.dims().begin(), val.dims().end(),
     407       61783 :                      [&dims](const Dim dim) { return dims.contains(dim); });
     408             : }
     409             : 
     410             : template <class Key, class Value>
     411        4968 : bool SizedDict<Key, Value>::is_edges(const Key &key,
     412             :                                      const std::optional<Dim> dim) const {
     413        4968 :   const auto &val = this->at(key);
     414        9933 :   return core::is_edges(m_sizes, val.dims(),
     415       14900 :                         dim.has_value() ? *dim : val.dim());
     416             : }
     417             : 
     418             : template <class Key, class Value>
     419       11636 : void SizedDict<Key, Value>::set_aligned(const Key &key, const bool aligned) {
     420       11636 :   expect_writable(*this);
     421       11635 :   m_items.at(key).set_aligned(aligned);
     422       11635 : }
     423             : 
     424             : template <class Key, class Value>
     425         269 : core::Dict<Key, Value> union_(const SizedDict<Key, Value> &a,
     426             :                               const SizedDict<Key, Value> &b,
     427             :                               std::string_view opname) {
     428         269 :   core::Dict<Key, Value> out;
     429         269 :   out.reserve(a.size() + b.size());
     430         565 :   for (const auto &[key, val_a] : a)
     431         296 :     if (val_a.is_aligned())
     432         290 :       out.insert_or_assign(key, val_a);
     433             : 
     434         538 :   for (const auto &[key, val_b] : b) {
     435         288 :     if (const auto it = a.find(key); it != a.end()) {
     436         282 :       auto &&val_a = it->second;
     437         282 :       if (val_a.is_aligned() && val_b.is_aligned())
     438         273 :         expect::matching_coord(key, val_a, val_b, opname);
     439           9 :       else if (val_b.is_aligned())
     440             :         // aligned b takes precedence over unaligned a
     441           2 :         out.insert_or_assign(key, val_b);
     442           7 :       else if (!val_a.is_aligned()) {
     443             :         // neither is aligned
     444           3 :         if (equals_nan(val_a, val_b))
     445           1 :           out.insert_or_assign(key, val_b);
     446             :         // else: mismatching unaligned coords => do not include in out
     447             :       }
     448             :       // else: aligned a takes precedence over unaligned b
     449             :     } else {
     450           6 :       if (val_b.is_aligned())
     451           5 :         out.insert_or_assign(key, val_b);
     452             :     }
     453             :   }
     454             : 
     455         250 :   return out;
     456          19 : }
     457             : 
     458             : template <class Key, class Value>
     459         244 : core::Dict<Key, Value> intersection(const SizedDict<Key, Value> &a,
     460             :                                     const SizedDict<Key, Value> &b) {
     461         244 :   core::Dict<Key, Value> out;
     462         326 :   for (const auto &[key, item] : a)
     463          82 :     if (const auto it = b.find(key);
     464          82 :         it != b.end() && equals_nan(it->second, item))
     465          71 :       out.insert_or_assign(key, item);
     466         244 :   return out;
     467           0 : }
     468             : 
     469             : template class SCIPP_DATASET_EXPORT SizedDict<Dim, Variable>;
     470             : template class SCIPP_DATASET_EXPORT SizedDict<std::string, Variable>;
     471             : template SCIPP_DATASET_EXPORT bool equals_nan(const Coords &a, const Coords &b);
     472             : template SCIPP_DATASET_EXPORT bool equals_nan(const Masks &a, const Masks &b);
     473             : template SCIPP_DATASET_EXPORT typename Coords::holder_type
     474             : union_(const Coords &, const Coords &, std::string_view opname);
     475             : template SCIPP_DATASET_EXPORT typename Coords::holder_type
     476             : intersection(const Coords &, const Coords &);
     477             : } // namespace scipp::dataset

Generated by: LCOV version 1.14