Line data Source code
1 : // SPDX-License-Identifier: BSD-3-Clause
2 : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3 : /// @file
4 : /// @author Simon Heybrock
5 : #include <algorithm>
6 : #include <utility>
7 :
8 : #include "scipp/dataset/except.h"
9 : #include "scipp/dataset/sized_dict.h"
10 : #include "scipp/variable/variable_factory.h"
11 :
12 : namespace scipp::dataset {
13 :
14 : namespace {
15 452534 : template <class T> void expect_writable(const T &dict) {
16 452534 : if (dict.is_readonly())
17 31 : throw except::DataArrayError(
18 : "Read-only flag is set, cannot mutate metadata dict.");
19 452503 : }
20 :
21 71 : void merge_sizes_into(Sizes &target, const Dimensions &s) {
22 : using std::to_string;
23 :
24 119 : for (const auto &dim : s) {
25 50 : if (target.contains(dim)) {
26 17 : const auto a = target[dim];
27 17 : const auto b = s[dim];
28 17 : if (a == b + 1) // had bin-edges, replace by regular coord
29 3 : target.resize(dim, b);
30 14 : else if (a + 1 == b) { // had regular coord, got extra by bin-edges
31 : // keep current
32 11 : } else if (a != b)
33 2 : throw except::DimensionError(
34 4 : "Conflicting length in dimension " + to_string(dim) + ": " +
35 8 : to_string(target[dim]) + " vs " + to_string(s[dim]));
36 : } else {
37 33 : target.set(dim, s[dim]);
38 : }
39 : }
40 69 : }
41 :
42 : template <class Key, class Value>
43 66 : auto make_from_items(typename SizedDict<Key, Value>::holder_type items,
44 : const bool readonly) {
45 66 : Sizes sizes;
46 135 : for (auto &&[key, value] : items) {
47 71 : merge_sizes_into(sizes, value.dims());
48 : }
49 130 : return SizedDict<Key, Value>(std::move(sizes), std::move(items), readonly);
50 66 : }
51 : } // namespace
52 :
53 : template <class Key, class Value>
54 2239 : SizedDict<Key, Value>::SizedDict(
55 : const Sizes &sizes,
56 : std::initializer_list<std::pair<const Key, Value>> items,
57 : const bool readonly)
58 2239 : : SizedDict(sizes, holder_type(items), readonly) {}
59 :
60 : template <class Key, class Value>
61 0 : SizedDict<Key, Value>::SizedDict(
62 : AutoSizeTag tag, std::initializer_list<std::pair<const Key, Value>> items,
63 : const bool readonly)
64 0 : : SizedDict(tag, holder_type(items), readonly) {}
65 :
66 : template <class Key, class Value>
67 470804 : SizedDict<Key, Value>::SizedDict(Sizes sizes, holder_type items,
68 : const bool readonly)
69 470804 : : m_sizes(std::move(sizes)) {
70 649424 : for (auto &&[key, value] : items)
71 178636 : set(key, std::move(value));
72 : // `set` requires Dict to be writable, set readonly flag at the end.
73 470796 : m_readonly = readonly; // NOLINT(cppcoreguidelines-prefer-member-initializer)
74 470812 : }
75 :
76 : template <class Key, class Value>
77 66 : SizedDict<Key, Value>::SizedDict(AutoSizeTag, holder_type items,
78 : const bool readonly)
79 66 : : SizedDict(make_from_items<Key, Value>(std::move(items), readonly)) {}
80 :
81 : template <class Key, class Value>
82 880779 : SizedDict<Key, Value>::SizedDict(const SizedDict &other)
83 880779 : : m_sizes(other.m_sizes), m_items(other.m_items), m_readonly(false) {}
84 :
85 : template <class Key, class Value>
86 65275 : SizedDict<Key, Value>::SizedDict(SizedDict &&other) noexcept
87 65275 : : m_sizes(std::move(other.m_sizes)), m_items(std::move(other.m_items)),
88 65275 : m_readonly(other.m_readonly) {}
89 :
90 : template <class Key, class Value>
91 : SizedDict<Key, Value> &
92 6524 : SizedDict<Key, Value>::operator=(const SizedDict &other) = default;
93 :
94 : template <class Key, class Value>
95 : SizedDict<Key, Value> &
96 4119 : SizedDict<Key, Value>::operator=(SizedDict &&other) noexcept = default;
97 :
98 : namespace {
99 : template <class Item, class Key, class Value, class Compare>
100 22436 : bool item_in_other(const Item &item, const SizedDict<Key, Value> &other,
101 : Compare &&compare_data) {
102 22436 : const auto &[name, data] = item;
103 22436 : if (!other.contains(name))
104 72 : return false;
105 22364 : const auto &other_data = other[name];
106 44275 : return compare_data(data, other_data) &&
107 44275 : data.is_aligned() == other_data.is_aligned();
108 : }
109 : } // namespace
110 :
111 : template <class Key, class Value>
112 19040 : bool SizedDict<Key, Value>::operator==(const SizedDict &other) const {
113 19040 : if (size() != other.size())
114 113 : return false;
115 59576 : return std::all_of(this->begin(), this->end(), [&other](const auto &item) {
116 43444 : return item_in_other(item, other,
117 65118 : [](const auto &x, const auto &y) { return x == y; });
118 18927 : });
119 : }
120 :
121 : template <class Key, class Value>
122 1209 : bool equals_nan(const SizedDict<Key, Value> &a,
123 : const SizedDict<Key, Value> &b) {
124 1209 : if (a.size() != b.size())
125 30 : return false;
126 3072 : return std::all_of(a.begin(), a.end(), [&b](const auto &item) {
127 1428 : return item_in_other(
128 2118 : item, b, [](const auto &x, const auto &y) { return equals_nan(x, y); });
129 1179 : });
130 : }
131 :
132 : template <class Key, class Value>
133 18625 : bool SizedDict<Key, Value>::operator!=(const SizedDict &other) const {
134 18625 : return !operator==(other);
135 : }
136 :
137 : /// Returns whether a given key is present in the view.
138 : template <class Key, class Value>
139 942541 : bool SizedDict<Key, Value>::contains(const Key &k) const {
140 942541 : return m_items.contains(k);
141 : }
142 :
143 : /// Returns 1 or 0, depending on whether key is present in the view or not.
144 : template <class Key, class Value>
145 23853 : scipp::index SizedDict<Key, Value>::count(const Key &k) const {
146 23853 : return static_cast<scipp::index>(contains(k));
147 : }
148 :
149 : /// Const reference to the coordinate for given dimension.
150 : template <class Key, class Value>
151 129797 : const Value &SizedDict<Key, Value>::operator[](const Key &key) const {
152 129797 : return at(key);
153 : }
154 :
155 : /// Const reference to the coordinate for given dimension.
156 : template <class Key, class Value>
157 233152 : const Value &SizedDict<Key, Value>::at(const Key &key) const {
158 233152 : scipp::expect::contains(*this, key);
159 228704 : return m_items.at(key);
160 : }
161 :
162 : /// The coordinate for given dimension.
163 : template <class Key, class Value>
164 42634 : Value SizedDict<Key, Value>::operator[](const Key &key) {
165 42634 : return std::as_const(*this).at(key);
166 : }
167 :
168 : /// The coordinate for given dimension.
169 : template <class Key, class Value>
170 2297 : Value SizedDict<Key, Value>::at(const Key &key) {
171 2297 : return std::as_const(*this).at(key);
172 : }
173 :
174 : /// Return the dimension for given coord.
175 : /// @param key Key of the coordinate in a coord dict
176 : ///
177 : /// Return the dimension of the coord for 1-D coords or Dim::Invalid for 0-D
178 : /// coords. In the special case of multi-dimension coords the following applies,
179 : /// in this order:
180 : /// - For bin-edge coords return the dimension in which the coord dimension
181 : /// exceeds the data dimensions.
182 : /// - Else, for dimension coords (key matching a dimension), return the key.
183 : /// - Else, return Dim::Invalid.
184 : template <class Key, class Value>
185 51166 : Dim SizedDict<Key, Value>::dim_of(const Key &key) const {
186 51166 : const auto &var = at(key);
187 51166 : if (var.dims().ndim() == 0)
188 196 : return Dim::Invalid;
189 50970 : if (var.dims().ndim() == 1)
190 50642 : return var.dims().inner();
191 : if constexpr (std::is_same_v<Key, Dim>) {
192 986 : for (const auto &dim : var.dims())
193 671 : if (core::is_edges(sizes(), var.dims(), dim))
194 13 : return dim;
195 315 : if (var.dims().contains(key))
196 14 : return key; // dimension coord
197 : }
198 301 : return Dim::Invalid;
199 : }
200 :
201 : template <class Key, class Value>
202 58985 : void SizedDict<Key, Value>::setSizes(const Sizes &sizes) {
203 58985 : scipp::expect::includes(sizes, m_sizes);
204 58985 : m_sizes = sizes;
205 58985 : }
206 :
207 : namespace {
208 : template <class Key>
209 432648 : void expect_valid_coord_dims(const Key &key, const Dimensions &coord_dims,
210 : const Sizes &da_sizes) {
211 : using core::to_string;
212 432648 : if (!da_sizes.includes(coord_dims))
213 32 : throw except::DimensionError(
214 0 : "Cannot add coord '" + to_string(key) + "' of dims " +
215 : to_string(coord_dims) + " to DataArray with dims " +
216 32 : to_string(Dimensions{da_sizes.labels(), da_sizes.sizes()}));
217 432632 : }
218 : } // namespace
219 :
220 : template <class Key, class Value>
221 434045 : void SizedDict<Key, Value>::set(const key_type &key, mapped_type coord) {
222 434045 : if (contains(key) && at(key).is_same(coord))
223 1373 : return;
224 432672 : expect_writable(*this);
225 : using core::to_string;
226 432655 : if (is_bins(coord))
227 13 : throw except::VariableError(
228 10 : std::string("Cannot set binned variable as coord or mask.\n") +
229 : "When working with binned data, binned coords or masks are typically "
230 : "set via the `bins` property.\nInstead of\n"
231 : " da.coords[" +
232 2 : to_string(key) + "] = binned_var`\n" +
233 : "use\n"
234 : " da.bins.coords[" +
235 2 : to_string(key) + "] = binned_var`");
236 432650 : auto dims = coord.dims();
237 : // Is a good definition for things that are allowed: "would be possible to
238 : // concat along existing dim or extra dim"?
239 798710 : for (const auto &dim : coord.dims()) {
240 446073 : if (!sizes().contains(dim) && dims[dim] == 2) { // bin edge along extra dim
241 556 : dims.erase(dim);
242 556 : break;
243 445517 : } else if (dims[dim] == sizes()[dim] + 1) {
244 79455 : dims.resize(dim, sizes()[dim]);
245 79455 : break;
246 : }
247 : }
248 432648 : expect_valid_coord_dims(key, dims, m_sizes);
249 432632 : m_items.insert_or_assign(key, std::move(coord));
250 432650 : }
251 :
252 : template <class Key, class Value>
253 7947 : void SizedDict<Key, Value>::erase(const key_type &key) {
254 7947 : static_cast<void>(extract(key));
255 7933 : }
256 :
257 : template <class Key, class Value>
258 8226 : Value SizedDict<Key, Value>::extract(const key_type &key) {
259 8226 : expect_writable(*this);
260 8213 : return m_items.extract(key);
261 : }
262 :
263 : template <class Key, class Value>
264 0 : Value SizedDict<Key, Value>::extract(const key_type &key,
265 : const mapped_type &default_value) {
266 0 : if (contains(key)) {
267 0 : return extract(key);
268 : }
269 0 : return default_value;
270 : }
271 :
272 : template <class Key, class Value>
273 28577 : SizedDict<Key, Value> SizedDict<Key, Value>::slice(const Slice ¶ms) const {
274 28577 : const bool readonly = true;
275 28577 : return {m_sizes.slice(params), slice_map(m_sizes, m_items, params), readonly};
276 : }
277 :
278 : namespace {
279 26901 : constexpr auto unaligned_by_dim_slice = [](const auto &coords, const auto &key,
280 : const auto &var,
281 : const Slice ¶ms) {
282 26901 : if (params == Slice{} || params.end() != -1)
283 20994 : return false;
284 5907 : const Dim dim = params.dim();
285 5907 : return var.dims().contains(dim) && coords.dim_of(key) == dim;
286 : };
287 : } // namespace
288 :
289 : template <class Key, class Value>
290 : SizedDict<Key, Value>
291 10387 : SizedDict<Key, Value>::slice_coords(const Slice ¶ms) const {
292 10387 : auto coords = slice(params);
293 10385 : coords.m_readonly = false;
294 37286 : for (const auto &[key, var] : *this)
295 26901 : if (unaligned_by_dim_slice(*this, key, var, params))
296 1699 : coords.set_aligned(key, false);
297 10385 : coords.m_readonly = true;
298 10385 : return coords;
299 0 : }
300 :
301 : template <class Key, class Value>
302 58 : void SizedDict<Key, Value>::validateSlice(const Slice &s,
303 : const SizedDict &dict) const {
304 : using core::to_string;
305 : using units::to_string;
306 68 : for (const auto &[key, item] : dict) {
307 17 : const auto it = find(key);
308 17 : if (it == end()) {
309 4 : throw except::NotFoundError("Cannot insert new meta data '" +
310 2 : to_string(key) + "' via a slice.");
311 15 : } else if (const auto &var = it->second;
312 21 : (var.is_readonly() || !var.dims().contains(s.dim())) &&
313 21 : (var.dims().contains(s.dim()) ? var.slice(s) : var) != item) {
314 10 : throw except::DimensionError("Cannot update meta data '" +
315 5 : to_string(key) +
316 : "' via slice since it is implicitly "
317 : "broadcast along the slice dimension '" +
318 : to_string(s.dim()) + "'.");
319 : }
320 : }
321 51 : }
322 :
323 : template <class Key, class Value>
324 40 : SizedDict<Key, Value> &SizedDict<Key, Value>::setSlice(const Slice &s,
325 : const SizedDict &dict) {
326 40 : validateSlice(s, dict);
327 43 : for (const auto &[key, item] : dict) {
328 7 : const auto it = find(key);
329 14 : if (it != end() && !it->second.is_readonly() &&
330 14 : it->second.dims().contains(s.dim()))
331 6 : it->second.setSlice(s, item);
332 : }
333 36 : return *this;
334 : }
335 :
336 : template <class Key, class Value>
337 908 : SizedDict<Key, Value> SizedDict<Key, Value>::rename_dims(
338 : const std::vector<std::pair<Dim, Dim>> &names,
339 : const bool fail_on_unknown) const {
340 908 : auto out(*this);
341 908 : out.m_sizes = out.m_sizes.rename_dims(names, fail_on_unknown);
342 1601 : for (auto &&item : out.m_items) {
343 : // DataArray coords/attrs support the special case of length-2 items with a
344 : // dim that is not contained in the data array dims. This occurs, e.g., when
345 : // slicing along a dim that has a bin edge coord. We must prevent renaming
346 : // to such dims. This is the reason for calling with `names` that may
347 : // contain unknown dims (and the `fail_on_unknown` arg). Otherwise the
348 : // caller would need to perform this check.
349 1714 : for (const auto &rename : names)
350 1447 : if (!m_sizes.contains(rename.second) &&
351 427 : item.second.dims().contains(rename.second))
352 5 : throw except::DimensionError("Duplicate dimension " +
353 : units::to_string(rename.second) + ".");
354 694 : item.second = item.second.rename_dims(names, false);
355 : }
356 902 : return out;
357 6 : }
358 :
359 : /// Mark the dict as readonly. Does not imply that items are readonly.
360 : template <class Key, class Value>
361 48483 : void SizedDict<Key, Value>::set_readonly() noexcept {
362 48483 : m_readonly = true;
363 48483 : }
364 :
365 : /// Return true if the dict is readonly. Does not imply that items are readonly.
366 : template <class Key, class Value>
367 452548 : bool SizedDict<Key, Value>::is_readonly() const noexcept {
368 452548 : return m_readonly;
369 : }
370 :
371 : template <class Key, class Value>
372 6 : SizedDict<Key, Value> SizedDict<Key, Value>::as_const() const {
373 6 : holder_type items;
374 6 : items.reserve(m_items.size());
375 9 : for (const auto &[key, val] : m_items)
376 3 : items.insert_or_assign(key, val.as_const());
377 6 : const bool readonly = true;
378 12 : return {sizes(), std::move(items), readonly};
379 6 : }
380 :
381 : template <class Key, class Value>
382 : SizedDict<Key, Value>
383 50808 : SizedDict<Key, Value>::merge_from(const SizedDict &other) const {
384 : using core::to_string;
385 : using units::to_string;
386 :
387 50808 : auto out(*this);
388 50808 : out.m_readonly = false;
389 225966 : for (const auto &[key, value] : other) {
390 175159 : if (out.contains(key))
391 1 : throw except::DataArrayError(
392 0 : "Coord '" + to_string(key) +
393 : "' shadows attr of the same name. Remove the attr if you are slicing "
394 : "an array or use the `coords` and `attrs` properties instead of "
395 : "`meta`.");
396 175158 : out.set(key, value);
397 : }
398 50807 : out.m_readonly = m_readonly;
399 50807 : return out;
400 1 : }
401 :
402 : template <class Key, class Value>
403 20895 : bool SizedDict<Key, Value>::item_applies_to(const Key &key,
404 : const Dimensions &dims) const {
405 20895 : const auto &val = m_items.at(key);
406 41790 : return std::all_of(val.dims().begin(), val.dims().end(),
407 61783 : [&dims](const Dim dim) { return dims.contains(dim); });
408 : }
409 :
410 : template <class Key, class Value>
411 4968 : bool SizedDict<Key, Value>::is_edges(const Key &key,
412 : const std::optional<Dim> dim) const {
413 4968 : const auto &val = this->at(key);
414 9933 : return core::is_edges(m_sizes, val.dims(),
415 14900 : dim.has_value() ? *dim : val.dim());
416 : }
417 :
418 : template <class Key, class Value>
419 11636 : void SizedDict<Key, Value>::set_aligned(const Key &key, const bool aligned) {
420 11636 : expect_writable(*this);
421 11635 : m_items.at(key).set_aligned(aligned);
422 11635 : }
423 :
424 : template <class Key, class Value>
425 269 : core::Dict<Key, Value> union_(const SizedDict<Key, Value> &a,
426 : const SizedDict<Key, Value> &b,
427 : std::string_view opname) {
428 269 : core::Dict<Key, Value> out;
429 269 : out.reserve(a.size() + b.size());
430 565 : for (const auto &[key, val_a] : a)
431 296 : if (val_a.is_aligned())
432 290 : out.insert_or_assign(key, val_a);
433 :
434 538 : for (const auto &[key, val_b] : b) {
435 288 : if (const auto it = a.find(key); it != a.end()) {
436 282 : auto &&val_a = it->second;
437 282 : if (val_a.is_aligned() && val_b.is_aligned())
438 273 : expect::matching_coord(key, val_a, val_b, opname);
439 9 : else if (val_b.is_aligned())
440 : // aligned b takes precedence over unaligned a
441 2 : out.insert_or_assign(key, val_b);
442 7 : else if (!val_a.is_aligned()) {
443 : // neither is aligned
444 3 : if (equals_nan(val_a, val_b))
445 1 : out.insert_or_assign(key, val_b);
446 : // else: mismatching unaligned coords => do not include in out
447 : }
448 : // else: aligned a takes precedence over unaligned b
449 : } else {
450 6 : if (val_b.is_aligned())
451 5 : out.insert_or_assign(key, val_b);
452 : }
453 : }
454 :
455 250 : return out;
456 19 : }
457 :
458 : template <class Key, class Value>
459 244 : core::Dict<Key, Value> intersection(const SizedDict<Key, Value> &a,
460 : const SizedDict<Key, Value> &b) {
461 244 : core::Dict<Key, Value> out;
462 326 : for (const auto &[key, item] : a)
463 82 : if (const auto it = b.find(key);
464 82 : it != b.end() && equals_nan(it->second, item))
465 71 : out.insert_or_assign(key, item);
466 244 : return out;
467 0 : }
468 :
469 : template class SCIPP_DATASET_EXPORT SizedDict<Dim, Variable>;
470 : template class SCIPP_DATASET_EXPORT SizedDict<std::string, Variable>;
471 : template SCIPP_DATASET_EXPORT bool equals_nan(const Coords &a, const Coords &b);
472 : template SCIPP_DATASET_EXPORT bool equals_nan(const Masks &a, const Masks &b);
473 : template SCIPP_DATASET_EXPORT typename Coords::holder_type
474 : union_(const Coords &, const Coords &, std::string_view opname);
475 : template SCIPP_DATASET_EXPORT typename Coords::holder_type
476 : intersection(const Coords &, const Coords &);
477 : } // namespace scipp::dataset
|