Line data Source code
1 : // SPDX-License-Identifier: BSD-3-Clause
2 : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3 : /// @file
4 : /// @author Simon Heybrock
5 : #include <algorithm>
6 : #include <utility>
7 :
8 : #include "scipp/dataset/except.h"
9 : #include "scipp/dataset/sized_dict.h"
10 : #include "scipp/variable/variable_factory.h"
11 :
12 : namespace scipp::dataset {
13 :
14 : namespace {
15 459398 : template <class T> void expect_writable(const T &dict) {
16 459398 : if (dict.is_readonly())
17 31 : throw except::DataArrayError(
18 : "Read-only flag is set, cannot mutate metadata dict.");
19 459367 : }
20 :
21 71 : void merge_sizes_into(Sizes &target, const Dimensions &s) {
22 : using std::to_string;
23 :
24 119 : for (const auto &dim : s) {
25 50 : if (target.contains(dim)) {
26 17 : const auto a = target[dim];
27 17 : const auto b = s[dim];
28 17 : if (a == b + 1) // had bin-edges, replace by regular coord
29 3 : target.resize(dim, b);
30 14 : else if (a + 1 == b) { // had regular coord, got extra by bin-edges
31 : // keep current
32 11 : } else if (a != b)
33 2 : throw except::DimensionError(
34 4 : "Conflicting length in dimension " + to_string(dim) + ": " +
35 8 : to_string(target[dim]) + " vs " + to_string(s[dim]));
36 : } else {
37 33 : target.set(dim, s[dim]);
38 : }
39 : }
40 69 : }
41 :
42 : template <class Key, class Value>
43 66 : auto make_from_items(typename SizedDict<Key, Value>::holder_type items,
44 : const bool readonly) {
45 66 : Sizes sizes;
46 135 : for (auto &&[key, value] : items) {
47 71 : merge_sizes_into(sizes, value.dims());
48 : }
49 130 : return SizedDict<Key, Value>(std::move(sizes), std::move(items), readonly);
50 66 : }
51 : } // namespace
52 :
53 : template <class Key, class Value>
54 2239 : SizedDict<Key, Value>::SizedDict(
55 : const Sizes &sizes,
56 : std::initializer_list<std::pair<const Key, Value>> items,
57 : const bool readonly)
58 2239 : : SizedDict(sizes, holder_type(items), readonly) {}
59 :
60 : template <class Key, class Value>
61 0 : SizedDict<Key, Value>::SizedDict(
62 : AutoSizeTag tag, std::initializer_list<std::pair<const Key, Value>> items,
63 : const bool readonly)
64 0 : : SizedDict(tag, holder_type(items), readonly) {}
65 :
66 : template <class Key, class Value>
67 474306 : SizedDict<Key, Value>::SizedDict(Sizes sizes, holder_type items,
68 : const bool readonly)
69 474306 : : m_sizes(std::move(sizes)) {
70 653618 : for (auto &&[key, value] : items)
71 179328 : set(key, std::move(value));
72 : // `set` requires Dict to be writable, set readonly flag at the end.
73 474298 : m_readonly = readonly; // NOLINT(cppcoreguidelines-prefer-member-initializer)
74 474314 : }
75 :
76 : template <class Key, class Value>
77 66 : SizedDict<Key, Value>::SizedDict(AutoSizeTag, holder_type items,
78 : const bool readonly)
79 66 : : SizedDict(make_from_items<Key, Value>(std::move(items), readonly)) {}
80 :
81 : template <class Key, class Value>
82 901643 : SizedDict<Key, Value>::SizedDict(const SizedDict &other)
83 901643 : : m_sizes(other.m_sizes), m_items(other.m_items), m_readonly(false) {}
84 :
85 : template <class Key, class Value>
86 67381 : SizedDict<Key, Value>::SizedDict(SizedDict &&other) noexcept
87 67381 : : m_sizes(std::move(other.m_sizes)), m_items(std::move(other.m_items)),
88 67381 : m_readonly(other.m_readonly) {}
89 :
90 : template <class Key, class Value>
91 : SizedDict<Key, Value> &
92 6524 : SizedDict<Key, Value>::operator=(const SizedDict &other) = default;
93 :
94 : template <class Key, class Value>
95 : SizedDict<Key, Value> &
96 4119 : SizedDict<Key, Value>::operator=(SizedDict &&other) noexcept = default;
97 :
98 : namespace {
99 : template <class Item, class Key, class Value, class Compare>
100 22614 : bool item_in_other(const Item &item, const SizedDict<Key, Value> &other,
101 : Compare &&compare_data) {
102 22614 : const auto &[name, data] = item;
103 22614 : if (!other.contains(name))
104 72 : return false;
105 22542 : const auto &other_data = other[name];
106 44631 : return compare_data(data, other_data) &&
107 44631 : data.is_aligned() == other_data.is_aligned();
108 : }
109 : } // namespace
110 :
111 : template <class Key, class Value>
112 19748 : bool SizedDict<Key, Value>::operator==(const SizedDict &other) const {
113 19748 : if (size() != other.size())
114 113 : return false;
115 61170 : return std::all_of(this->begin(), this->end(), [&other](const auto &item) {
116 43800 : return item_in_other(item, other,
117 65652 : [](const auto &x, const auto &y) { return x == y; });
118 19635 : });
119 : }
120 :
121 : template <class Key, class Value>
122 1209 : bool equals_nan(const SizedDict<Key, Value> &a,
123 : const SizedDict<Key, Value> &b) {
124 1209 : if (a.size() != b.size())
125 30 : return false;
126 3072 : return std::all_of(a.begin(), a.end(), [&b](const auto &item) {
127 1428 : return item_in_other(
128 2118 : item, b, [](const auto &x, const auto &y) { return equals_nan(x, y); });
129 1179 : });
130 : }
131 :
132 : template <class Key, class Value>
133 19333 : bool SizedDict<Key, Value>::operator!=(const SizedDict &other) const {
134 19333 : return !operator==(other);
135 : }
136 :
137 : /// Returns whether a given key is present in the view.
138 : template <class Key, class Value>
139 956258 : bool SizedDict<Key, Value>::contains(const Key &k) const {
140 956258 : return m_items.contains(k);
141 : }
142 :
143 : /// Returns 1 or 0, depending on whether key is present in the view or not.
144 : template <class Key, class Value>
145 23889 : scipp::index SizedDict<Key, Value>::count(const Key &k) const {
146 23889 : return static_cast<scipp::index>(contains(k));
147 : }
148 :
149 : /// Const reference to the coordinate for given dimension.
150 : template <class Key, class Value>
151 130029 : const Value &SizedDict<Key, Value>::operator[](const Key &key) const {
152 130029 : return at(key);
153 : }
154 :
155 : /// Const reference to the coordinate for given dimension.
156 : template <class Key, class Value>
157 234293 : const Value &SizedDict<Key, Value>::at(const Key &key) const {
158 234293 : scipp::expect::contains(*this, key);
159 229845 : return m_items.at(key);
160 : }
161 :
162 : /// The coordinate for given dimension.
163 : template <class Key, class Value>
164 43388 : Value SizedDict<Key, Value>::operator[](const Key &key) {
165 43388 : return std::as_const(*this).at(key);
166 : }
167 :
168 : /// The coordinate for given dimension.
169 : template <class Key, class Value>
170 2434 : Value SizedDict<Key, Value>::at(const Key &key) {
171 2434 : return std::as_const(*this).at(key);
172 : }
173 :
174 : /// Return the dimension for given coord.
175 : /// @param key Key of the coordinate in a coord dict
176 : ///
177 : /// Return the dimension of the coord for 1-D coords or Dim::Invalid for 0-D
178 : /// coords. In the special case of multi-dimension coords the following applies,
179 : /// in this order:
180 : /// - For bin-edge coords return the dimension in which the coord dimension
181 : /// exceeds the data dimensions.
182 : /// - Else, for dimension coords (key matching a dimension), return the key.
183 : /// - Else, return Dim::Invalid.
184 : template <class Key, class Value>
185 51166 : Dim SizedDict<Key, Value>::dim_of(const Key &key) const {
186 51166 : const auto &var = at(key);
187 51166 : if (var.dims().ndim() == 0)
188 196 : return Dim::Invalid;
189 50970 : if (var.dims().ndim() == 1)
190 50642 : return var.dims().inner();
191 : if constexpr (std::is_same_v<Key, Dim>) {
192 986 : for (const auto &dim : var.dims())
193 671 : if (core::is_edges(sizes(), var.dims(), dim))
194 13 : return dim;
195 315 : if (var.dims().contains(key))
196 14 : return key; // dimension coord
197 : }
198 301 : return Dim::Invalid;
199 : }
200 :
201 : template <class Key, class Value>
202 61073 : void SizedDict<Key, Value>::setSizes(const Sizes &sizes) {
203 61073 : scipp::expect::includes(sizes, m_sizes);
204 61073 : m_sizes = sizes;
205 61073 : }
206 :
207 : namespace {
208 : template <class Key>
209 438762 : void expect_valid_coord_dims(const Key &key, const Dimensions &coord_dims,
210 : const Sizes &da_sizes) {
211 : using core::to_string;
212 438762 : if (!da_sizes.includes(coord_dims))
213 32 : throw except::DimensionError(
214 0 : "Cannot add coord '" + to_string(key) + "' of dims " +
215 : to_string(coord_dims) + " to DataArray with dims " +
216 32 : to_string(Dimensions{da_sizes.labels(), da_sizes.sizes()}));
217 438746 : }
218 : } // namespace
219 :
220 : template <class Key, class Value>
221 440240 : void SizedDict<Key, Value>::set(const key_type &key, mapped_type coord) {
222 440240 : if (contains(key) && at(key).is_same(coord))
223 1454 : return;
224 438786 : expect_writable(*this);
225 : using core::to_string;
226 438769 : if (is_bins(coord))
227 13 : throw except::VariableError(
228 10 : std::string("Cannot set binned variable as coord or mask.\n") +
229 : "When working with binned data, binned coords or masks are typically "
230 : "set via the `bins` property.\nInstead of\n"
231 : " da.coords[" +
232 2 : to_string(key) + "] = binned_var`\n" +
233 : "use\n"
234 : " da.bins.coords[" +
235 2 : to_string(key) + "] = binned_var`");
236 438764 : auto dims = coord.dims();
237 : // Is a good definition for things that are allowed: "would be possible to
238 : // concat along existing dim or extra dim"?
239 809408 : for (const auto &dim : coord.dims()) {
240 451787 : if (!sizes().contains(dim) && dims[dim] == 2) { // bin edge along extra dim
241 556 : dims.erase(dim);
242 556 : break;
243 451231 : } else if (dims[dim] == sizes()[dim] + 1) {
244 80585 : dims.resize(dim, sizes()[dim]);
245 80585 : break;
246 : }
247 : }
248 438762 : expect_valid_coord_dims(key, dims, m_sizes);
249 438746 : m_items.insert_or_assign(key, std::move(coord));
250 438764 : }
251 :
252 : template <class Key, class Value>
253 7947 : void SizedDict<Key, Value>::erase(const key_type &key) {
254 7947 : static_cast<void>(extract(key));
255 7933 : }
256 :
257 : template <class Key, class Value>
258 8226 : Value SizedDict<Key, Value>::extract(const key_type &key) {
259 8226 : expect_writable(*this);
260 8213 : return m_items.extract(key);
261 : }
262 :
263 : template <class Key, class Value>
264 0 : Value SizedDict<Key, Value>::extract(const key_type &key,
265 : const mapped_type &default_value) {
266 0 : if (contains(key)) {
267 0 : return extract(key);
268 : }
269 0 : return default_value;
270 : }
271 :
272 : template <class Key, class Value>
273 28541 : SizedDict<Key, Value> SizedDict<Key, Value>::slice(const Slice ¶ms) const {
274 28541 : const bool readonly = true;
275 28541 : return {m_sizes.slice(params), slice_map(m_sizes, m_items, params), readonly};
276 : }
277 :
278 : namespace {
279 26846 : constexpr auto unaligned_by_dim_slice = [](const auto &coords, const auto &key,
280 : const auto &var,
281 : const Slice ¶ms) {
282 26846 : if (params == Slice{} || params.end() != -1)
283 20939 : return false;
284 5907 : const Dim dim = params.dim();
285 5907 : return var.dims().contains(dim) && coords.dim_of(key) == dim;
286 : };
287 : } // namespace
288 :
289 : template <class Key, class Value>
290 : SizedDict<Key, Value>
291 10375 : SizedDict<Key, Value>::slice_coords(const Slice ¶ms) const {
292 10375 : auto coords = slice(params);
293 10373 : coords.m_readonly = false;
294 37219 : for (const auto &[key, var] : *this)
295 26846 : if (unaligned_by_dim_slice(*this, key, var, params))
296 1699 : coords.set_aligned(key, false);
297 10373 : coords.m_readonly = true;
298 10373 : return coords;
299 0 : }
300 :
301 : template <class Key, class Value>
302 58 : void SizedDict<Key, Value>::validateSlice(const Slice &s,
303 : const SizedDict &dict) const {
304 : using core::to_string;
305 : using units::to_string;
306 68 : for (const auto &[key, item] : dict) {
307 17 : const auto it = find(key);
308 17 : if (it == end()) {
309 4 : throw except::NotFoundError("Cannot insert new meta data '" +
310 2 : to_string(key) + "' via a slice.");
311 15 : } else if (const auto &var = it->second;
312 21 : (var.is_readonly() || !var.dims().contains(s.dim())) &&
313 21 : (var.dims().contains(s.dim()) ? var.slice(s) : var) != item) {
314 10 : throw except::DimensionError("Cannot update meta data '" +
315 5 : to_string(key) +
316 : "' via slice since it is implicitly "
317 : "broadcast along the slice dimension '" +
318 : to_string(s.dim()) + "'.");
319 : }
320 : }
321 51 : }
322 :
323 : template <class Key, class Value>
324 40 : SizedDict<Key, Value> &SizedDict<Key, Value>::setSlice(const Slice &s,
325 : const SizedDict &dict) {
326 40 : validateSlice(s, dict);
327 43 : for (const auto &[key, item] : dict) {
328 7 : const auto it = find(key);
329 14 : if (it != end() && !it->second.is_readonly() &&
330 14 : it->second.dims().contains(s.dim()))
331 6 : it->second.setSlice(s, item);
332 : }
333 36 : return *this;
334 : }
335 :
336 : template <class Key, class Value>
337 908 : SizedDict<Key, Value> SizedDict<Key, Value>::rename_dims(
338 : const std::vector<std::pair<Dim, Dim>> &names,
339 : const bool fail_on_unknown) const {
340 908 : auto out(*this);
341 908 : out.m_sizes = out.m_sizes.rename_dims(names, fail_on_unknown);
342 1601 : for (auto &&item : out.m_items) {
343 : // DataArray coords/attrs support the special case of length-2 items with a
344 : // dim that is not contained in the data array dims. This occurs, e.g., when
345 : // slicing along a dim that has a bin edge coord. We must prevent renaming
346 : // to such dims. This is the reason for calling with `names` that may
347 : // contain unknown dims (and the `fail_on_unknown` arg). Otherwise the
348 : // caller would need to perform this check.
349 1714 : for (const auto &rename : names)
350 1447 : if (!m_sizes.contains(rename.second) &&
351 427 : item.second.dims().contains(rename.second))
352 5 : throw except::DimensionError("Duplicate dimension " +
353 : units::to_string(rename.second) + ".");
354 694 : item.second = item.second.rename_dims(names, false);
355 : }
356 902 : return out;
357 6 : }
358 :
359 : /// Mark the dict as readonly. Does not imply that items are readonly.
360 : template <class Key, class Value>
361 49959 : void SizedDict<Key, Value>::set_readonly() noexcept {
362 49959 : m_readonly = true;
363 49959 : }
364 :
365 : /// Return true if the dict is readonly. Does not imply that items are readonly.
366 : template <class Key, class Value>
367 459412 : bool SizedDict<Key, Value>::is_readonly() const noexcept {
368 459412 : return m_readonly;
369 : }
370 :
371 : template <class Key, class Value>
372 6 : SizedDict<Key, Value> SizedDict<Key, Value>::as_const() const {
373 6 : holder_type items;
374 6 : items.reserve(m_items.size());
375 9 : for (const auto &[key, val] : m_items)
376 3 : items.insert_or_assign(key, val.as_const());
377 6 : const bool readonly = true;
378 12 : return {sizes(), std::move(items), readonly};
379 6 : }
380 :
381 : template <class Key, class Value>
382 : SizedDict<Key, Value>
383 52284 : SizedDict<Key, Value>::merge_from(const SizedDict &other) const {
384 : using core::to_string;
385 : using units::to_string;
386 :
387 52284 : auto out(*this);
388 52284 : out.m_readonly = false;
389 232038 : for (const auto &[key, value] : other) {
390 179755 : if (out.contains(key))
391 1 : throw except::DataArrayError(
392 0 : "Coord '" + to_string(key) +
393 : "' shadows attr of the same name. Remove the attr if you are slicing "
394 : "an array or use the `coords` and `attrs` properties instead of "
395 : "`meta`.");
396 179754 : out.set(key, value);
397 : }
398 52283 : out.m_readonly = m_readonly;
399 52283 : return out;
400 1 : }
401 :
402 : template <class Key, class Value>
403 21147 : bool SizedDict<Key, Value>::item_applies_to(const Key &key,
404 : const Dimensions &dims) const {
405 21147 : const auto &val = m_items.at(key);
406 42294 : return std::all_of(val.dims().begin(), val.dims().end(),
407 62539 : [&dims](const Dim dim) { return dims.contains(dim); });
408 : }
409 :
410 : template <class Key, class Value>
411 4986 : bool SizedDict<Key, Value>::is_edges(const Key &key,
412 : const std::optional<Dim> dim) const {
413 4986 : const auto &val = this->at(key);
414 9969 : return core::is_edges(m_sizes, val.dims(),
415 14954 : dim.has_value() ? *dim : val.dim());
416 : }
417 :
418 : template <class Key, class Value>
419 12386 : void SizedDict<Key, Value>::set_aligned(const Key &key, const bool aligned) {
420 12386 : expect_writable(*this);
421 12385 : m_items.at(key).set_aligned(aligned);
422 12385 : }
423 :
424 : template <class Key, class Value>
425 269 : core::Dict<Key, Value> union_(const SizedDict<Key, Value> &a,
426 : const SizedDict<Key, Value> &b,
427 : std::string_view opname) {
428 269 : core::Dict<Key, Value> out;
429 269 : out.reserve(a.size() + b.size());
430 565 : for (const auto &[key, val_a] : a)
431 296 : if (val_a.is_aligned())
432 290 : out.insert_or_assign(key, val_a);
433 :
434 538 : for (const auto &[key, val_b] : b) {
435 288 : if (const auto it = a.find(key); it != a.end()) {
436 282 : auto &&val_a = it->second;
437 282 : if (val_a.is_aligned() && val_b.is_aligned())
438 273 : expect::matching_coord(key, val_a, val_b, opname);
439 9 : else if (val_b.is_aligned())
440 : // aligned b takes precedence over unaligned a
441 2 : out.insert_or_assign(key, val_b);
442 7 : else if (!val_a.is_aligned()) {
443 : // neither is aligned
444 3 : if (equals_nan(val_a, val_b))
445 1 : out.insert_or_assign(key, val_b);
446 : // else: mismatching unaligned coords => do not include in out
447 : }
448 : // else: aligned a takes precedence over unaligned b
449 : } else {
450 6 : if (val_b.is_aligned())
451 5 : out.insert_or_assign(key, val_b);
452 : }
453 : }
454 :
455 250 : return out;
456 19 : }
457 :
458 : template <class Key, class Value>
459 244 : core::Dict<Key, Value> intersection(const SizedDict<Key, Value> &a,
460 : const SizedDict<Key, Value> &b) {
461 244 : core::Dict<Key, Value> out;
462 326 : for (const auto &[key, item] : a)
463 82 : if (const auto it = b.find(key);
464 82 : it != b.end() && equals_nan(it->second, item))
465 71 : out.insert_or_assign(key, item);
466 244 : return out;
467 0 : }
468 :
469 : template class SCIPP_DATASET_EXPORT SizedDict<Dim, Variable>;
470 : template class SCIPP_DATASET_EXPORT SizedDict<std::string, Variable>;
471 : template SCIPP_DATASET_EXPORT bool equals_nan(const Coords &a, const Coords &b);
472 : template SCIPP_DATASET_EXPORT bool equals_nan(const Masks &a, const Masks &b);
473 : template SCIPP_DATASET_EXPORT typename Coords::holder_type
474 : union_(const Coords &, const Coords &, std::string_view opname);
475 : template SCIPP_DATASET_EXPORT typename Coords::holder_type
476 : intersection(const Coords &, const Coords &);
477 : } // namespace scipp::dataset
|