Line data Source code
1 : // SPDX-License-Identifier: BSD-3-Clause
2 : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3 : /// @file
4 : /// @author Simon Heybrock
5 :
6 : #include "scipp/dataset/dataset.h"
7 : #include "scipp/dataset/math.h"
8 : #include "scipp/dataset/rebin.h"
9 : #include "scipp/dataset/sized_dict.h"
10 :
11 : #include "bind_data_access.h"
12 : #include "bind_data_array.h"
13 : #include "bind_operators.h"
14 : #include "bind_slice_methods.h"
15 : #include "pybind11.h"
16 : #include "rename.h"
17 :
18 : using namespace scipp;
19 : using namespace scipp::dataset;
20 :
21 : namespace py = pybind11;
22 :
23 : namespace {
24 : template <class T, class... Ignored>
25 3 : void bind_dataset_properties(py::class_<T, Ignored...> &c) {
26 3 : c.def("drop_coords", [](T &self, const std::string &coord_name) {
27 4 : std::vector<scipp::Dim> coord_names_c = {scipp::Dim{coord_name}};
28 8 : return self.drop_coords(coord_names_c);
29 4 : });
30 3 : c.def("drop_coords",
31 3 : [](T &self, const std::vector<std::string> &coord_names) {
32 3 : std::vector<scipp::Dim> coord_names_c;
33 3 : std::transform(coord_names.begin(), coord_names.end(),
34 : std::back_inserter(coord_names_c),
35 6 : [](const auto &name) { return scipp::Dim{name}; });
36 6 : return self.drop_coords(coord_names_c);
37 3 : });
38 3 : }
39 :
40 : template <class T, class... Ignored>
41 3 : void bind_dataset_coord_properties(py::class_<T, Ignored...> &c) {
42 : // TODO does this comment still apply?
43 : // For some reason the return value policy and/or keep-alive policy do not
44 : // work unless we wrap things in py::cpp_function.
45 3 : c.def_property_readonly(
46 710 : "coords", [](T &self) -> decltype(auto) { return self.coords(); },
47 : R"(
48 : Dict of coordinates.)");
49 : // Metadata for dataset is same as `coords` since dataset cannot have attrs
50 : // (unaligned coords).
51 3 : c.def_property_readonly(
52 0 : "meta", [](T &self) -> decltype(auto) { return self.meta(); },
53 : R"(
54 : Dict of coordinates.)");
55 3 : }
56 :
57 : template <class... Ignored>
58 3 : void bind_dataset_view_methods(py::class_<Dataset, Ignored...> &c) {
59 3 : bind_common_operators(c);
60 3 : c.def("__len__", &Dataset::size);
61 0 : c.def(
62 : "__iter__",
63 4 : [](const Dataset &self) {
64 : return py::make_iterator(self.keys_begin(), self.keys_end(),
65 4 : py::return_value_policy::move);
66 : },
67 3 : py::return_value_policy::move, py::keep_alive<0, 1>());
68 0 : c.def(
69 223 : "keys", [](Dataset &self) { return keys_view(self); },
70 3 : py::return_value_policy::move, py::keep_alive<0, 1>(),
71 : R"(view on self's keys)");
72 0 : c.def(
73 70 : "values", [](Dataset &self) { return values_view(self); },
74 3 : py::return_value_policy::move, py::keep_alive<0, 1>(),
75 : R"(view on self's values)");
76 0 : c.def(
77 507 : "items", [](Dataset &self) { return items_view(self); },
78 3 : py::return_value_policy::move, py::keep_alive<0, 1>(),
79 : R"(view on self's items)");
80 3 : c.def("__getitem__", [](const Dataset &self, const std::string &name) {
81 383 : return self[name];
82 : });
83 3 : c.def("__contains__", [](const Dataset &self, const py::handle &key) {
84 : try {
85 19 : return self.contains(key.cast<std::string>());
86 0 : } catch (py::cast_error &) {
87 0 : return false; // if `key` is not a string, it cannot be contained
88 : }
89 : });
90 3 : c.def("_ipython_key_completions_", [](Dataset &self) {
91 1 : py::typing::List<py::str> out;
92 1 : const auto end = self.keys_end();
93 3 : for (auto it = self.keys_begin(); it != end; ++it) {
94 2 : out.append(*it);
95 : }
96 2 : return out;
97 0 : });
98 3 : bind_common_data_properties(c);
99 3 : bind_pop(c);
100 3 : }
101 :
102 : template <class T, class... Ignored>
103 3 : void bind_data_array(py::class_<T, Ignored...> &c) {
104 3 : bind_data_array_properties(c);
105 3 : bind_common_operators(c);
106 3 : bind_data_properties(c);
107 3 : bind_slice_methods(c);
108 3 : bind_in_place_binary<DataArray>(c);
109 3 : bind_in_place_binary<Variable>(c);
110 3 : bind_binary<Dataset>(c);
111 3 : bind_binary<DataArray>(c);
112 3 : bind_binary<Variable>(c);
113 3 : bind_binary_scalars(c);
114 3 : bind_reverse_binary_scalars(c);
115 3 : bind_comparison<DataArray>(c);
116 3 : bind_comparison<Variable>(c);
117 3 : bind_unary(c);
118 3 : bind_logical<DataArray>(c);
119 3 : bind_logical<Variable>(c);
120 3 : bind_boolean_unary(c);
121 3 : }
122 :
123 6 : template <class T> void bind_rebin(py::module &m) {
124 6 : m.def(
125 : "rebin",
126 5 : [](const T &x, const std::string &dim, const Variable &bins) {
127 5 : return rebin(x, Dim{dim}, bins);
128 : },
129 0 : py::arg("x"), py::arg("dim"), py::arg("bins"),
130 6 : py::call_guard<py::gil_scoped_release>());
131 6 : }
132 :
133 30135 : template <class Key, class Value> auto to_cpp_dict(const py::dict &dict) {
134 30135 : core::Dict<Key, Value> out;
135 39391 : for (const auto &[key, val] : dict) {
136 9256 : out.insert_or_assign(Key{key.template cast<std::string>()},
137 : val.template cast<Value &>());
138 : }
139 30135 : return out;
140 0 : }
141 :
142 798 : auto dataset_from_data_and_coords(const py::dict &data,
143 : const py::dict &coords) {
144 798 : Dataset d;
145 1757 : for (auto &&[name, item] : data) {
146 959 : if (py::isinstance<DataArray>(item)) {
147 473 : d.setDataInit(name.cast<std::string>(), item.cast<DataArray &>());
148 : } else {
149 486 : d.setDataInit(name.cast<std::string>(), item.cast<Variable &>());
150 : }
151 : }
152 798 : if (d.is_valid()) {
153 : // Need to use dataset_from_coords when there is no data to initialize
154 : // dimensions properly.
155 1008 : for (auto &&[dim, coord] : coords)
156 227 : d.setCoord(Dim{dim.cast<std::string>()}, coord.cast<Variable &>());
157 : }
158 798 : return d;
159 0 : }
160 :
161 17 : auto dataset_from_coords(const py::dict &py_coords) {
162 17 : typename Coords::holder_type coords;
163 47 : for (auto &&[dim, coord] : py_coords)
164 30 : coords.insert_or_assign(Dim{dim.cast<std::string>()},
165 : coord.cast<Variable &>());
166 35 : return Dataset({}, std::move(coords));
167 17 : }
168 : } // namespace
169 :
170 3 : void init_dataset(py::module &m) {
171 3 : static_cast<void>(py::class_<Slice>(m, "Slice"));
172 :
173 3 : bind_helper_view<items_view, Dataset>(m, "Dataset");
174 3 : bind_helper_view<str_items_view, Coords>(m, "Coords");
175 3 : bind_helper_view<items_view, Masks>(m, "Masks");
176 3 : bind_helper_view<keys_view, Dataset>(m, "Dataset");
177 3 : bind_helper_view<str_keys_view, Coords>(m, "Coords");
178 3 : bind_helper_view<keys_view, Masks>(m, "Masks");
179 3 : bind_helper_view<values_view, Dataset>(m, "Dataset");
180 3 : bind_helper_view<values_view, Coords>(m, "Coords");
181 3 : bind_helper_view<values_view, Masks>(m, "Masks");
182 :
183 3 : bind_mutable_view_no_dim<Coords>(m, "Coords",
184 : R"(dict-like collection of meta data
185 :
186 : Returned by :py:func:`DataArray.coords`, :py:func:`DataArray.attrs`, :py:func:`DataArray.meta`,
187 : and the corresponding properties of :py:class:`Dataset`.)");
188 3 : bind_mutable_view<Masks>(m, "Masks", R"(dict-like collection of masks.
189 :
190 : Returned by :py:func:`DataArray.masks`)");
191 :
192 : py::class_<DataArray> dataArray(m, "DataArray", R"(
193 3 : Named variable with associated coords, masks, and attributes.)");
194 3 : py::options options;
195 3 : options.disable_function_signatures();
196 6 : dataArray.def(
197 3 : py::init([](const Variable &data, const py::object &coords,
198 : const py::object &masks, const py::object &attrs,
199 : const std::string &name) {
200 20096 : return DataArray{data, to_cpp_dict<Dim, Variable>(coords),
201 20096 : to_cpp_dict<std::string, Variable>(masks),
202 40183 : to_cpp_dict<Dim, Variable>(attrs), name};
203 : }),
204 6 : py::arg("data"), py::kw_only(), py::arg("coords") = py::dict(),
205 6 : py::arg("masks") = py::dict(), py::arg("attrs") = py::dict(),
206 6 : py::arg("name") = std::string{},
207 : R"doc(__init__(self, data: Variable, coords: Union[Mapping[str, Variable], Iterable[tuple[str, Variable]]] = {}, masks: Union[Mapping[str, Variable], Iterable[tuple[str, Variable]]] = {}, attrs: Union[Mapping[str, Variable], Iterable[tuple[str, Variable]]] = {}, name: str = '') -> None
208 :
209 : DataArray initializer.
210 :
211 : Parameters
212 : ----------
213 : data:
214 : Data and optionally variances.
215 : coords:
216 : Coordinates referenced by dimension.
217 : masks:
218 : Masks referenced by name.
219 : attrs:
220 : Attributes referenced by dimension.
221 : name:
222 : Name of the data array.
223 : )doc");
224 3 : options.enable_function_signatures();
225 :
226 3 : bind_data_array(dataArray);
227 :
228 : py::class_<Dataset> dataset(m, "Dataset", R"(
229 3 : Dict of data arrays with aligned dimensions.)");
230 3 : options.disable_function_signatures();
231 3 : dataset.def(
232 3 : py::init([](const py::object &data, const py::object &coords) {
233 799 : if (data.is_none() && coords.is_none())
234 1 : throw py::type_error("Dataset needs data or coordinates or both.");
235 798 : const auto data_dict = data.is_none() ? py::dict() : py::dict(data);
236 : const auto coords_dict =
237 798 : coords.is_none() ? py::dict() : py::dict(coords);
238 798 : auto d = dataset_from_data_and_coords(data_dict, coords_dict);
239 1595 : return d.is_valid() ? d : dataset_from_coords(coords_dict);
240 800 : }),
241 6 : py::arg("data") = py::none{}, py::kw_only(),
242 6 : py::arg("coords") = py::none{},
243 : R"doc(__init__(self, data: Union[Mapping[str, Union[Variable, DataArray]], Iterable[tuple[str, Union[Variable, DataArray]]]] = {}, coords: Union[Mapping[str, Variable], Iterable[tuple[str, Variable]]] = {}) -> None
244 :
245 : Dataset initializer.
246 :
247 : Parameters
248 : ----------
249 : data:
250 : Dictionary of name and data pairs.
251 : coords:
252 : Dictionary of name and coord pairs.
253 : )doc");
254 3 : options.enable_function_signatures();
255 :
256 : dataset
257 3 : .def("__setitem__",
258 4 : [](Dataset &self, const std::string &name, const Variable &data) {
259 4 : self.setData(name, data);
260 3 : })
261 3 : .def("__setitem__",
262 13 : [](Dataset &self, const std::string &name, const DataArray &data) {
263 13 : self.setData(name, data);
264 12 : })
265 3 : .def("__delitem__", &Dataset::erase,
266 0 : py::call_guard<py::gil_scoped_release>())
267 3 : .def("clear", &Dataset::clear,
268 : R"(Removes all data, preserving coordinates.)");
269 :
270 3 : bind_dataset_view_methods(dataset);
271 3 : bind_dict_update(dataset,
272 11 : [](Dataset &self, const std::string &key,
273 11 : const DataArray &value) { self.setData(key, value); });
274 :
275 3 : bind_dataset_coord_properties(dataset);
276 3 : bind_dataset_properties(dataset);
277 :
278 3 : bind_slice_methods(dataset);
279 :
280 3 : bind_in_place_binary<Dataset>(dataset);
281 3 : bind_in_place_binary<DataArray>(dataset);
282 3 : bind_in_place_binary<Variable>(dataset);
283 3 : bind_in_place_binary_scalars(dataset);
284 3 : bind_in_place_binary_scalars(dataArray);
285 :
286 3 : bind_binary<Dataset>(dataset);
287 3 : bind_binary<DataArray>(dataset);
288 3 : bind_binary<Variable>(dataset);
289 3 : bind_binary_scalars(dataset);
290 :
291 3 : dataArray.def("_rename_dims", &rename_dims<DataArray>);
292 3 : dataset.def("_rename_dims", &rename_dims<Dataset>);
293 :
294 3 : m.def(
295 : "merge",
296 1 : [](const Dataset &lhs, const Dataset &rhs) {
297 1 : return dataset::merge(lhs, rhs);
298 : },
299 0 : py::arg("lhs"), py::arg("rhs"), py::call_guard<py::gil_scoped_release>());
300 :
301 3 : m.def(
302 : "irreducible_mask",
303 63 : [](const Masks &masks, const std::string &dim) {
304 63 : py::gil_scoped_release release;
305 63 : auto mask = irreducible_mask(masks, Dim{dim});
306 63 : py::gil_scoped_acquire acquire;
307 189 : return mask.is_valid() ? py::cast(mask) : py::none();
308 63 : },
309 0 : py::arg("masks"), py::arg("dim"));
310 :
311 3 : m.def(
312 6 : "reciprocal", [](const DataArray &self) { return reciprocal(self); },
313 0 : py::arg("x"), py::call_guard<py::gil_scoped_release>());
314 :
315 3 : bind_astype(dataArray);
316 :
317 3 : bind_rebin<DataArray>(m);
318 3 : bind_rebin<Dataset>(m);
319 3 : }
|