LCOV - code coverage report
Current view: top level - dataset - dataset.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 215 228 94.3 %
Date: 2024-12-01 01:56:34 Functions: 40 41 97.6 %

          Line data    Source code
       1             : // SPDX-License-Identifier: BSD-3-Clause
       2             : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
       3             : /// @file
       4             : /// @author Simon Heybrock
       5             : #include <sstream>
       6             : 
       7             : #include "scipp/core/except.h"
       8             : #include "scipp/dataset/dataset.h"
       9             : #include "scipp/dataset/dataset_util.h"
      10             : #include "scipp/dataset/except.h"
      11             : #include "scipp/units/unit.h"
      12             : 
      13             : namespace scipp::dataset {
      14             : 
      15             : namespace {
      16       11105 : template <class T> void expect_writable(const T &dict) {
      17       11105 :   if (dict.is_readonly())
      18           3 :     throw except::DatasetError(
      19             :         "Read-only flag is set, cannot insert new or erase existing items.");
      20       11102 : }
      21             : 
      22       11064 : void expect_valid(const Dataset &ds) {
      23       11064 :   if (!ds.is_valid())
      24           2 :     throw except::DatasetError(
      25             :         "Dataset is not valid. This is an internal error stemming from an "
      26           4 :         "improperly initialized dataset.");
      27       11062 : }
      28             : 
      29             : template <class T>
      30        4322 : void expect_matching_item_dims(const Dataset &dset, const std::string_view key,
      31             :                                const T &to_insert) {
      32        4322 :   if (dset.sizes() != to_insert.dims()) {
      33          12 :     std::ostringstream msg;
      34          12 :     msg << "Cannot add item '" << key << "' with dims " << to_insert.dims()
      35          12 :         << " to dataset with dims " << to_string(dset.sizes()) << ".";
      36          12 :     throw except::DimensionError(msg.str());
      37          12 :   }
      38        4310 : }
      39             : } // namespace
      40             : 
      41             : /// Make an invalid dataset.
      42             : ///
      43             : /// Such a dataset is intended to be filled using setDataInit and must
      44             : /// never be exposed to Python!
      45       41083 : Dataset::Dataset() : m_valid{false} {}
      46             : 
      47        1612 : Dataset::Dataset(const Dataset &other)
      48        1612 :     : m_coords(other.m_coords), m_data(other.m_data), m_readonly(false),
      49        1612 :       m_valid{other.m_valid} {}
      50             : 
      51          23 : Dataset::Dataset(const DataArray &data) {
      52          23 :   m_coords.setSizes(data.dims());
      53          23 :   setData(data.name(), data);
      54          23 : }
      55             : 
      56          28 : Dataset &Dataset::operator=(const Dataset &other) {
      57          28 :   return *this = Dataset(other);
      58             : }
      59             : 
      60         188 : Dataset &Dataset::operator=(Dataset &&other) {
      61         188 :   if (this == &other) {
      62           0 :     return *this;
      63             :   }
      64         188 :   check_nested_in_assign(*this, other);
      65         186 :   m_coords = std::move(other.m_coords);
      66         186 :   m_data = std::move(other.m_data);
      67         186 :   m_readonly = other.m_readonly;
      68         186 :   m_valid = other.m_valid;
      69         186 :   return *this;
      70             : }
      71             : 
      72             : /// Removes all data items from the Dataset.
      73             : ///
      74             : /// Coordinates are not modified.
      75           3 : void Dataset::clear() {
      76           3 :   expect_writable(*this);
      77           3 :   m_data.clear();
      78           3 : }
      79             : 
      80           5 : void Dataset::setCoords(Coords other) {
      81           5 :   expect_valid(*this);
      82           5 :   scipp::expect::includes(other.sizes(), m_coords.sizes());
      83           1 :   m_coords = std::move(other);
      84           1 : }
      85             : /// Return a const view to all coordinates of the dataset.
      86       16937 : const Coords &Dataset::coords() const noexcept { return m_coords; }
      87             : 
      88             : /// Return a view to all coordinates of the dataset.
      89        2356 : Coords &Dataset::coords() noexcept { return m_coords; }
      90             : 
      91             : /// Return a Dataset without the given coordinate names.
      92           8 : Dataset Dataset::drop_coords(const scipp::span<const Dim> coord_names) const {
      93           8 :   Dataset result = *this;
      94          19 :   for (const auto &name : coord_names)
      95          11 :     result.coords().erase(name);
      96           8 :   return result;
      97           0 : }
      98             : 
      99             : /// Alias for coords().
     100          41 : const Coords &Dataset::meta() const noexcept { return coords(); }
     101             : /// Alias for coords().
     102           0 : Coords &Dataset::meta() noexcept { return coords(); }
     103             : 
     104        9471 : bool Dataset::contains(const std::string &name) const noexcept {
     105        9471 :   return m_data.contains(name);
     106             : }
     107             : 
     108             : /// Removes a data item from the Dataset
     109             : ///
     110             : /// Coordinates are not modified.
     111          41 : void Dataset::erase(const std::string &name) {
     112          41 :   expect_writable(*this);
     113          40 :   scipp::expect::contains(*this, name);
     114          38 :   m_data.erase(std::string(name));
     115          38 : }
     116             : 
     117             : /// Extract a data item from the Dataset, returning a DataArray
     118             : ///
     119             : /// Coordinates are not modified.
     120          13 : DataArray Dataset::extract(const std::string &name) {
     121          13 :   auto extracted = operator[](name);
     122          12 :   erase(name);
     123          12 :   return extracted;
     124           0 : }
     125             : 
     126             : /// Return a data item with coordinates with given name.
     127        3681 : DataArray Dataset::operator[](const std::string &name) const {
     128        3681 :   scipp::expect::contains(*this, name);
     129        7336 :   return *find(name);
     130             : }
     131             : 
     132             : /// Set (insert or replace) the coordinate for the given dimension.
     133        6736 : void Dataset::setCoord(const Dim dim, Variable coord) {
     134        6736 :   expect_writable(*this);
     135        6736 :   expect_valid(*this);
     136        6735 :   m_coords.set(dim, std::move(coord));
     137        6728 : }
     138             : 
     139             : /// Set (insert or replace) data (values, optional variances) with given name.
     140             : ///
     141             : /// Throws if the provided values bring the dataset into an inconsistent state
     142             : /// (mismatching dimensions). The default is to drop existing attributes, unless
     143             : /// AttrPolicy::Keep is specified.
     144        4325 : void Dataset::setData(const std::string &name, Variable data,
     145             :                       const AttrPolicy attrPolicy) {
     146        4325 :   expect_writable(*this);
     147        4323 :   expect_valid(*this);
     148        4322 :   expect_matching_item_dims(*this, name, data);
     149        4310 :   const auto replace = contains(name);
     150        4310 :   if (replace && attrPolicy == AttrPolicy::Keep)
     151          73 :     m_data.insert_or_assign(
     152         219 :         name, DataArray(std::move(data), {}, m_data[name].masks().items(),
     153          73 :                         m_data[name].attrs().items(), name));
     154             :   else
     155        4237 :     m_data.insert_or_assign(name, DataArray(std::move(data)));
     156        4310 : }
     157             : 
     158             : // See docs of overload for data arrays.
     159         486 : void Dataset::setDataInit(const std::string &name, Variable data,
     160             :                           const AttrPolicy attrPolicy) {
     161         486 :   if (!is_valid()) {
     162         418 :     m_coords.setSizes(data.dims());
     163         418 :     m_valid = true;
     164             :   }
     165         486 :   setData(name, std::move(data), attrPolicy);
     166         486 : }
     167             : 
     168             : namespace {
     169        3248 : auto coords_to_skip(const Dataset &dst, const DataArray &src) {
     170        3248 :   std::vector<Dim> to_skip;
     171        6017 :   for (auto &&[dim, coord] : src.coords())
     172        2770 :     if (const auto it = dst.coords().find(dim); it != dst.coords().end()) {
     173        1080 :       if (it->second.is_aligned() == coord.is_aligned())
     174        1080 :         expect::matching_coord(dim, coord, it->second, "set coord");
     175           0 :       else if (it->second.is_aligned())
     176             :         // Aligned coords overwrite unaligned.
     177           0 :         to_skip.push_back(dim);
     178             :     }
     179        3247 :   return to_skip;
     180           1 : }
     181             : } // namespace
     182             : 
     183             : /// Set (insert or replace) data from a DataArray with a given name.
     184             : ///
     185             : /// Coordinates, masks, and attributes of the data array are added to the
     186             : /// dataset. Throws if there are existing but mismatching coords, masks, or
     187             : /// attributes. Throws if the provided data brings the dataset into an
     188             : /// inconsistent state (mismatching dimensions).
     189        3255 : void Dataset::setData(const std::string &name, const DataArray &data) {
     190             :   // Return early on self assign to avoid exceptions from Python inplace ops
     191        3255 :   if (const auto it = find(name); it != end()) {
     192          76 :     if (it->data().is_same(data.data()) && it->masks() == data.masks() &&
     193          76 :         it->attrs() == data.attrs() && it->coords() == data.coords())
     194           7 :       return;
     195             :   }
     196        3248 :   const auto to_skip = coords_to_skip(*this, data);
     197             : 
     198        3250 :   setData(name, data.data());
     199        3244 :   auto &item = m_data[name];
     200        6012 :   for (auto &&[dim, coord] : data.coords())
     201        4458 :     if (m_coords.find(dim) == m_coords.end() &&
     202        4458 :         std::find(to_skip.begin(), to_skip.end(), dim) == to_skip.end())
     203        1690 :       setCoord(dim, coord);
     204             :   // Attrs might be shadowed by a coord, but this cannot be prevented in
     205             :   // general, so instead of failing here we proceed (and may fail later if
     206             :   // meta() is called).
     207        3244 :   item.attrs() = data.attrs();
     208        3244 :   item.masks() = data.masks();
     209        3247 : }
     210             : 
     211             : /// Like setData but allow inserting into a default-initialized dataset.
     212             : ///
     213             : /// A default-constructed dataset cannot be filled using setData or setCoord
     214             : /// as the dataset's dimensions are unknown and the input cannot be validated.
     215             : /// setDataInit sets the sizes when called with a default-initialized dataset.
     216             : /// It can be used for creating a new dataset and filling it step by step.
     217             : ///
     218             : /// When using this, always make sure to ultimately produce a valid dataset.
     219             : /// setDataInit is often called in a loop.
     220             : /// Keep in mind that the loop might not run when the input dataset is empty!
     221         834 : void Dataset::setDataInit(const std::string &name, const DataArray &data) {
     222         834 :   if (!is_valid()) {
     223         670 :     m_coords.setSizes(data.dims());
     224         670 :     m_valid = true;
     225             :   }
     226         834 :   setData(name, data);
     227         834 : }
     228             : 
     229             : /// Return slice of the dataset along given dimension with given extents.
     230        1294 : Dataset Dataset::slice(const Slice &s) const {
     231        2588 :   Dataset out(slice_map(m_coords.sizes(), m_data, s));
     232        1291 :   out.m_coords = m_coords.slice_coords(s);
     233        1290 :   out.m_readonly = true;
     234        1290 :   return out;
     235           1 : }
     236             : 
     237          12 : Dataset &Dataset::setSlice(const Slice &s, const Dataset &data) {
     238             :   // Validate slice on all items as a dry-run
     239          12 :   expect::coords_are_superset(slice(s).coords(), data.coords(), "");
     240          27 :   for (const auto &[name, item] : m_data)
     241          17 :     item.validateSlice(s, data.m_data.at(name));
     242             :   // Only if all items checked for dry-run does modification go-ahead
     243          24 :   for (auto &&[name, item] : m_data)
     244          14 :     item.setSlice(s, data.m_data.at(name));
     245          10 :   return *this;
     246             : }
     247             : 
     248           1 : Dataset &Dataset::setSlice(const Slice &s, const DataArray &data) {
     249             :   // Validate slice on all items as a dry-run
     250           1 :   expect::coords_are_superset(slice(s).coords(), data.coords(), "");
     251           1 :   for (const auto &item : m_data)
     252           1 :     item.second.validateSlice(s, data);
     253             :   // Only if all items checked for dry-run does modification go-ahead
     254           0 :   for (auto &&[_, val] : m_data)
     255           0 :     val.setSlice(s, data);
     256           0 :   return *this;
     257             : }
     258             : 
     259           2 : Dataset &Dataset::setSlice(const Slice &s, const Variable &data) {
     260           5 :   for (auto &&item : *this)
     261           3 :     item.setSlice(s, data);
     262           2 :   return *this;
     263             : }
     264             : 
     265             : /// Rename dimension `from` to `to`.
     266             : Dataset
     267          53 : Dataset::rename_dims(const std::vector<std::pair<Dim, Dim>> &names) const {
     268         104 :   Dataset out({}, m_coords.rename_dims(names));
     269         114 :   for (const auto &[name, da] : m_data)
     270          64 :     out.setData(name, da.rename_dims(names, false));
     271          50 :   return out;
     272           1 : }
     273             : 
     274             : /// Return true if the datasets have identical content.
     275        1199 : bool Dataset::operator==(const Dataset &other) const {
     276        1199 :   if (size() != other.size())
     277          80 :     return false;
     278        1119 :   if (coords() != other.coords())
     279         240 :     return false;
     280        1559 :   for (const auto &data : *this)
     281         930 :     if (!other.contains(data.name()) || data != other[data.name()])
     282         930 :       return false;
     283         629 :   return true;
     284             : }
     285             : 
     286             : /// Return true if the datasets have mismatching content./
     287         354 : bool Dataset::operator!=(const Dataset &other) const {
     288         354 :   return !operator==(other);
     289             : }
     290             : 
     291         356 : bool equals_nan(const Dataset &a, const Dataset &b) {
     292         356 :   if (a.size() != b.size())
     293          40 :     return false;
     294         316 :   if (!equals_nan(a.coords(), b.coords()))
     295         112 :     return false;
     296         296 :   for (const auto &data : a)
     297         204 :     if (!b.contains(data.name()) || !equals_nan(data, b[data.name()]))
     298         204 :       return false;
     299          92 :   return true;
     300             : }
     301             : 
     302        5294 : const Sizes &Dataset::sizes() const { return m_coords.sizes(); }
     303         453 : const Sizes &Dataset::dims() const { return sizes(); }
     304          14 : Dim Dataset::dim() const {
     305          14 :   core::expect::ndim_is(sizes(), 1);
     306          12 :   return *sizes().begin();
     307             : }
     308         238 : scipp::index Dataset::ndim() const { return scipp::size(m_coords.sizes()); }
     309             : 
     310       18624 : bool Dataset::is_readonly() const noexcept { return m_readonly; }
     311             : 
     312       14340 : bool Dataset::is_valid() const noexcept { return m_valid; }
     313             : 
     314         247 : typename Masks::holder_type union_or(const Masks &currentMasks,
     315             :                                      const Masks &otherMasks) {
     316         247 :   typename Masks::holder_type out;
     317             : 
     318         337 :   for (const auto &[key, item] : currentMasks)
     319          90 :     out.insert_or_assign(key, copy(item));
     320         341 :   for (const auto &[key, item] : otherMasks) {
     321          97 :     if (!currentMasks.contains(key)) {
     322          19 :       out.insert_or_assign(key, copy(item));
     323         153 :     } else if (item.dtype() != core::dtype<bool> ||
     324         153 :                out[key].dtype() != core::dtype<bool>) {
     325           3 :       throw except::TypeError(" Cannot combine non-boolean mask '" + key +
     326           6 :                               "' in operation");
     327         150 :     } else if (item.unit() != scipp::units::none ||
     328          75 :                out[key].unit() != scipp::units::none) {
     329           0 :       throw except::UnitError(" Cannot combine a unit-specified mask '" + key +
     330           0 :                               "' in operation");
     331          75 :     } else if (out[key].dims().includes(item.dims())) {
     332          67 :       out[key] |= item;
     333             :     } else {
     334           8 :       out[key] = out[key] | item;
     335             :     }
     336             :   }
     337         244 :   return out;
     338           3 : }
     339             : 
     340         291 : void union_or_in_place(Masks &currentMasks, const Masks &otherMasks) {
     341             :   using core::to_string;
     342             :   using units::to_string;
     343             : 
     344         449 :   for (const auto &[key, item] : otherMasks) {
     345         163 :     const auto it = currentMasks.find(key);
     346         163 :     if (it == currentMasks.end() && currentMasks.is_readonly()) {
     347           1 :       throw except::NotFoundError("Cannot insert new mask '" + to_string(key) +
     348           2 :                                   "' via a slice.");
     349         164 :     } else if (it != currentMasks.end() && it->second.is_readonly() &&
     350         164 :                it->second != (it->second | item)) {
     351           1 :       throw except::DimensionError("Cannot update mask '" + to_string(key) +
     352             :                                    "' via slice since the mask is implicitly "
     353           2 :                                    "broadcast along the slice dimension.");
     354         464 :     } else if (it != currentMasks.end() &&
     355         303 :                (item.dtype() != core::dtype<bool> ||
     356         311 :                 currentMasks[key].dtype() != core::dtype<bool>)) {
     357           3 :       throw except::TypeError(" Cannot combine non-boolean mask '" + key +
     358           6 :                               "' in operation");
     359         308 :     } else if (it != currentMasks.end() &&
     360         150 :                (item.unit() != scipp::units::none ||
     361         308 :                 currentMasks[key].unit() != scipp::units::none)) {
     362           0 :       throw except::UnitError(" Cannot combine a unit-specified mask '" + key +
     363           0 :                               "' in operation");
     364             :     }
     365             :   }
     366             : 
     367         442 :   for (const auto &[key, item] : otherMasks) {
     368         156 :     const auto it = currentMasks.find(key);
     369         156 :     if (it == currentMasks.end()) {
     370           8 :       currentMasks.set(key, copy(item));
     371         148 :     } else if (!it->second.is_readonly()) {
     372         147 :       it->second |= item;
     373             :     }
     374             :   }
     375         286 : }
     376             : 
     377             : } // namespace scipp::dataset

Generated by: LCOV version 1.14