Line data Source code
1 : // SPDX-License-Identifier: BSD-3-Clause
2 : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3 : /// @file
4 : /// @author Simon Heybrock
5 : #pragma once
6 :
7 : #include <functional>
8 : #include <string>
9 :
10 : #include "scipp/dataset/data_array.h"
11 : #include "scipp/dataset/sized_dict.h"
12 : #include "scipp/variable/variable.h"
13 :
14 : namespace scipp::dataset {
15 :
16 : namespace detail {
17 :
18 : /// Helper for creating iterators of Dataset.
19 : template <class D> struct with_coords {
20 : const D *dataset;
21 7354 : template <class T> auto operator()(T &&item) const {
22 14708 : return item.second.view_with_coords(dataset->coords(), item.first,
23 7354 : dataset->is_readonly());
24 : }
25 : };
26 : template <class D> with_coords(const D *) -> with_coords<D>;
27 :
28 : template <class D> struct item_with_coords {
29 : const D *dataset;
30 625 : template <class T> auto operator()(T &&item) const {
31 625 : return std::pair{item.first, with_coords{dataset}(item)};
32 : }
33 : };
34 :
35 : template <class D> item_with_coords(const D *) -> item_with_coords<D>;
36 :
37 : // Use to disambiguate constructors.
38 : struct init_from_data_arrays_t {};
39 : static constexpr auto init_from_data_arrays = init_from_data_arrays_t{};
40 : } // namespace detail
41 :
42 : /// Collection of data arrays.
43 : class SCIPP_DATASET_EXPORT Dataset {
44 : public:
45 : using key_type = std::string;
46 : using mapped_type = DataArray;
47 : using value_type = std::pair<const std::string &, DataArray>;
48 :
49 : Dataset();
50 : Dataset(const Dataset &other);
51 1668 : Dataset(Dataset &&other) = default;
52 : explicit Dataset(const DataArray &data);
53 :
54 : // The constructor with the DataMap template also works with Variables.
55 : // But the compiler cannot deduce the type when called with initializer lists.
56 : template <class CoordMap = core::Dict<Dim, Variable>>
57 432 : explicit Dataset(core::Dict<std::string, Variable> data,
58 : CoordMap coords = core::Dict<Dim, Variable>{})
59 864 : : Dataset(std::move(data), std::move(coords),
60 865 : detail::init_from_data_arrays) {}
61 :
62 : template <class DataMap = core::Dict<std::string, DataArray>,
63 : class CoordMap = core::Dict<Dim, Variable>>
64 1725 : explicit Dataset(
65 : DataMap data, CoordMap coords = core::Dict<Dim, Variable>{},
66 1725 : const detail::init_from_data_arrays_t = detail::init_from_data_arrays) {
67 1725 : if (data.empty()) {
68 : if constexpr (std::is_same_v<std::decay_t<CoordMap>, Coords>)
69 397 : m_coords = std::move(coords);
70 : else
71 42 : m_coords = Coords(AutoSizeTag{}, std::move(coords));
72 : } else {
73 : // Set the sizes based on data in order to handle bin-edge coords.
74 : // The coords are then individually checked against those sizes.
75 1287 : m_coords = Coords(data.begin()->second.dims(), {});
76 1290 : for (auto &&[name, item] : coords) {
77 3 : setCoord(name, std::move(item));
78 : }
79 :
80 : if constexpr (std::is_base_of_v<Dataset, std::decay_t<DataMap>>)
81 : for (auto &&item : data) {
82 : setData(item.name(), item);
83 : }
84 : else
85 2685 : for (auto &&[name, item] : data) {
86 1398 : setData(std::string(name), std::move(item));
87 : }
88 : }
89 1726 : }
90 :
91 : Dataset &operator=(const Dataset &other);
92 : Dataset &operator=(Dataset &&other);
93 :
94 : void setCoords(Coords other);
95 :
96 : /// Return the number of data items in the dataset.
97 : ///
98 : /// This does not include coordinates or attributes, but only all named
99 : /// entities (which can consist of various combinations of values, variances,
100 : /// and events coordinates).
101 3461 : index size() const noexcept { return scipp::size(m_data); }
102 : /// Return true if there are 0 data items in the dataset.
103 19 : [[nodiscard]] bool empty() const noexcept { return size() == 0; }
104 : /// Return the number of elements that space is currently allocated for.
105 48 : [[nodiscard]] index capacity() const noexcept { return m_data.capacity(); }
106 :
107 : void clear();
108 :
109 : const Coords &coords() const noexcept;
110 : Coords &coords() noexcept;
111 :
112 : Dataset drop_coords(const scipp::span<const Dim> coord_names) const;
113 :
114 : Dataset drop_masks(const scipp::span<const std::string> mask_names) const;
115 :
116 : Dataset drop_attrs(const scipp::span<const Dim> attr_names) const;
117 :
118 : const Coords &meta() const noexcept;
119 : Coords &meta() noexcept;
120 :
121 : bool contains(const std::string &name) const noexcept;
122 :
123 : void erase(const std::string &name);
124 : [[nodiscard]] DataArray extract(const std::string &name);
125 :
126 : auto find() const && = delete;
127 : auto find() && = delete;
128 3255 : auto find(const std::string &name) &noexcept {
129 3255 : return m_data.find(name).transform(detail::with_coords{this});
130 : }
131 3734 : auto find(const std::string &name) const &noexcept {
132 3734 : return m_data.find(name).transform(detail::with_coords{this});
133 : }
134 :
135 : DataArray operator[](const std::string &name) const;
136 :
137 : auto begin() const && = delete;
138 : auto begin() && = delete;
139 : /// Return const iterator to the beginning of all data items.
140 2423 : auto begin() const &noexcept {
141 2423 : return m_data.begin().transform(detail::with_coords{this});
142 : }
143 : /// Return iterator to the beginning of all data items.
144 213 : auto begin() &noexcept {
145 213 : return m_data.begin().transform(detail::with_coords{this});
146 : }
147 : auto end() const && = delete;
148 : auto end() && = delete;
149 : /// Return const iterator to the end of all data items.
150 2485 : auto end() const &noexcept {
151 2485 : return m_data.end().transform(detail::with_coords{this});
152 : }
153 :
154 : /// Return iterator to the end of all data items.
155 3472 : auto end() &noexcept {
156 3472 : return m_data.end().transform(detail::with_coords{this});
157 : }
158 :
159 : auto items_begin() const && = delete;
160 : auto items_begin() && = delete;
161 : auto items_begin() const &noexcept {
162 : return m_data.begin().transform(detail::item_with_coords{this});
163 : }
164 503 : auto items_begin() &noexcept {
165 503 : return m_data.begin().transform(detail::item_with_coords{this});
166 : }
167 : auto items_end() const && = delete;
168 : auto items_end() && = delete;
169 : auto items_end() const &noexcept {
170 : return m_data.end().transform(detail::item_with_coords{this});
171 : }
172 :
173 503 : auto items_end() &noexcept {
174 503 : return m_data.end().transform(detail::item_with_coords{this});
175 : }
176 :
177 : auto keys_begin() const && = delete;
178 : auto keys_begin() && = delete;
179 33 : auto keys_begin() const &noexcept { return m_data.keys_begin(); }
180 226 : auto keys_begin() &noexcept { return m_data.keys_begin(); }
181 : auto keys_end() const && = delete;
182 : auto keys_end() && = delete;
183 33 : auto keys_end() const &noexcept { return m_data.keys_end(); }
184 :
185 226 : auto keys_end() &noexcept { return m_data.keys_end(); }
186 :
187 : void setCoord(const Dim dim, Variable coord);
188 : void setData(const std::string &name, Variable data,
189 : const AttrPolicy attrPolicy = AttrPolicy::Drop);
190 : void setData(const std::string &name, const DataArray &data);
191 : void setDataInit(const std::string &name, Variable data,
192 : const AttrPolicy attrPolicy = AttrPolicy::Drop);
193 : void setDataInit(const std::string &name, const DataArray &data);
194 :
195 : Dataset slice(const Slice &s) const;
196 : [[maybe_unused]] Dataset &setSlice(const Slice &s, const Dataset &dataset);
197 : [[maybe_unused]] Dataset &setSlice(const Slice &s, const DataArray &array);
198 : [[maybe_unused]] Dataset &setSlice(const Slice &s, const Variable &var);
199 :
200 : [[nodiscard]] Dataset
201 : rename_dims(const std::vector<std::pair<Dim, Dim>> &names) const;
202 :
203 : bool operator==(const Dataset &other) const;
204 : bool operator!=(const Dataset &other) const;
205 :
206 : Dataset &operator+=(const DataArray &other);
207 : Dataset &operator-=(const DataArray &other);
208 : Dataset &operator*=(const DataArray &other);
209 : Dataset &operator/=(const DataArray &other);
210 : Dataset &operator+=(const Variable &other);
211 : Dataset &operator-=(const Variable &other);
212 : Dataset &operator*=(const Variable &other);
213 : Dataset &operator/=(const Variable &other);
214 : Dataset &operator+=(const Dataset &other);
215 : Dataset &operator-=(const Dataset &other);
216 : Dataset &operator*=(const Dataset &other);
217 : Dataset &operator/=(const Dataset &other);
218 :
219 : const Sizes &sizes() const;
220 : const Sizes &dims() const;
221 : Dim dim() const;
222 : [[nodiscard]] scipp::index ndim() const;
223 :
224 : bool is_readonly() const noexcept;
225 : bool is_valid() const noexcept;
226 :
227 287 : [[nodiscard]] Dataset or_empty() && {
228 287 : if (is_valid())
229 269 : return std::move(*this);
230 18 : return Dataset({}, {});
231 : }
232 :
233 : private:
234 : // Declared friend so gtest recognizes it
235 : friend SCIPP_DATASET_EXPORT std::ostream &operator<<(std::ostream &,
236 : const Dataset &);
237 :
238 : Coords m_coords;
239 : core::Dict<std::string, DataArray> m_data;
240 : bool m_readonly{false};
241 : /// See documentation of setDataInit.
242 : /// Invalid datasets are for internal use only.
243 : bool m_valid{true};
244 : };
245 :
246 : [[nodiscard]] SCIPP_DATASET_EXPORT Dataset
247 : copy(const Dataset &dataset, const AttrPolicy attrPolicy = AttrPolicy::Keep);
248 :
249 : [[maybe_unused]] SCIPP_DATASET_EXPORT DataArray &
250 : copy(const DataArray &array, DataArray &out,
251 : const AttrPolicy attrPolicy = AttrPolicy::Keep);
252 : [[maybe_unused]] SCIPP_DATASET_EXPORT DataArray
253 : copy(const DataArray &array, DataArray &&out,
254 : const AttrPolicy attrPolicy = AttrPolicy::Keep);
255 : [[maybe_unused]] SCIPP_DATASET_EXPORT Dataset &
256 : copy(const Dataset &dataset, Dataset &out,
257 : const AttrPolicy attrPolicy = AttrPolicy::Keep);
258 : [[maybe_unused]] SCIPP_DATASET_EXPORT Dataset
259 : copy(const Dataset &dataset, Dataset &&out,
260 : const AttrPolicy attrPolicy = AttrPolicy::Keep);
261 :
262 : SCIPP_DATASET_EXPORT Dataset operator+(const Dataset &lhs, const Dataset &rhs);
263 : SCIPP_DATASET_EXPORT Dataset operator+(const Dataset &lhs,
264 : const DataArray &rhs);
265 : SCIPP_DATASET_EXPORT Dataset operator+(const DataArray &lhs,
266 : const Dataset &rhs);
267 : SCIPP_DATASET_EXPORT Dataset operator+(const Dataset &lhs, const Variable &rhs);
268 : SCIPP_DATASET_EXPORT Dataset operator+(const Variable &lhs, const Dataset &rhs);
269 :
270 : SCIPP_DATASET_EXPORT Dataset operator-(const Dataset &lhs, const Dataset &rhs);
271 : SCIPP_DATASET_EXPORT Dataset operator-(const Dataset &lhs,
272 : const DataArray &rhs);
273 : SCIPP_DATASET_EXPORT Dataset operator-(const DataArray &lhs,
274 : const Dataset &rhs);
275 : SCIPP_DATASET_EXPORT Dataset operator-(const Dataset &lhs, const Variable &rhs);
276 : SCIPP_DATASET_EXPORT Dataset operator-(const Variable &lhs, const Dataset &rhs);
277 :
278 : SCIPP_DATASET_EXPORT Dataset operator*(const Dataset &lhs, const Dataset &rhs);
279 : SCIPP_DATASET_EXPORT Dataset operator*(const Dataset &lhs,
280 : const DataArray &rhs);
281 : SCIPP_DATASET_EXPORT Dataset operator*(const DataArray &lhs,
282 : const Dataset &rhs);
283 : SCIPP_DATASET_EXPORT Dataset operator*(const Dataset &lhs, const Variable &rhs);
284 : SCIPP_DATASET_EXPORT Dataset operator*(const Variable &lhs, const Dataset &rhs);
285 :
286 : SCIPP_DATASET_EXPORT Dataset operator/(const Dataset &lhs, const Dataset &rhs);
287 : SCIPP_DATASET_EXPORT Dataset operator/(const Dataset &lhs,
288 : const DataArray &rhs);
289 : SCIPP_DATASET_EXPORT Dataset operator/(const DataArray &lhs,
290 : const Dataset &rhs);
291 : SCIPP_DATASET_EXPORT Dataset operator/(const Dataset &lhs, const Variable &rhs);
292 : SCIPP_DATASET_EXPORT Dataset operator/(const Variable &lhs, const Dataset &rhs);
293 :
294 : /// Union the masks of the two proxies.
295 : /// If any of the masks repeat they are OR'ed.
296 : /// The result is stored in a new map
297 : SCIPP_DATASET_EXPORT
298 : typename Masks::holder_type union_or(const Masks ¤tMasks,
299 : const Masks &otherMasks);
300 :
301 : /// Union the masks of the two proxies.
302 : /// If any of the masks repeat they are OR'ed.
303 : /// The result is stored in the first view.
304 : SCIPP_DATASET_EXPORT void union_or_in_place(Masks &masks,
305 : const Masks &otherMasks);
306 :
307 : SCIPP_DATASET_EXPORT Dataset merge(const Dataset &a, const Dataset &b);
308 :
309 : [[nodiscard]] SCIPP_DATASET_EXPORT bool equals_nan(const Dataset &a,
310 : const Dataset &b);
311 :
312 : } // namespace scipp::dataset
313 :
314 : namespace scipp::core {
315 : template <> inline constexpr DType dtype<dataset::DataArray>{2000};
316 : template <> inline constexpr DType dtype<dataset::Dataset>{2001};
317 : template <> inline constexpr DType dtype<bucket<dataset::DataArray>>{2002};
318 : template <> inline constexpr DType dtype<bucket<dataset::Dataset>>{2003};
319 : } // namespace scipp::core
320 :
321 : namespace scipp {
322 : using dataset::Dataset;
323 : } // namespace scipp
324 :
325 : #include "scipp/dataset/arithmetic.h"
|