Line data Source code
1 : // SPDX-License-Identifier: BSD-3-Clause
2 : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3 : /// @file
4 : /// @author Simon Heybrock
5 :
6 : #include "scipp/dataset/dataset.h"
7 : #include "scipp/dataset/math.h"
8 : #include "scipp/dataset/rebin.h"
9 : #include "scipp/dataset/sized_dict.h"
10 :
11 : #include "bind_data_access.h"
12 : #include "bind_data_array.h"
13 : #include "bind_operators.h"
14 : #include "bind_slice_methods.h"
15 : #include "pybind11.h"
16 : #include "rename.h"
17 :
18 : using namespace scipp;
19 : using namespace scipp::dataset;
20 :
21 : namespace py = pybind11;
22 :
23 : namespace {
24 : template <class T, class... Ignored>
25 3 : void bind_dataset_properties(py::class_<T, Ignored...> &c) {
26 3 : c.def("drop_coords", [](T &self, const std::string &coord_name) {
27 4 : std::vector<scipp::Dim> coord_names_c = {scipp::Dim{coord_name}};
28 8 : return self.drop_coords(coord_names_c);
29 4 : });
30 3 : c.def("drop_coords",
31 3 : [](T &self, const std::vector<std::string> &coord_names) {
32 3 : std::vector<scipp::Dim> coord_names_c;
33 3 : std::transform(coord_names.begin(), coord_names.end(),
34 : std::back_inserter(coord_names_c),
35 6 : [](const auto &name) { return scipp::Dim{name}; });
36 6 : return self.drop_coords(coord_names_c);
37 3 : });
38 3 : }
39 :
40 : template <class T, class... Ignored>
41 3 : void bind_dataset_coord_properties(py::class_<T, Ignored...> &c) {
42 : // TODO does this comment still apply?
43 : // For some reason the return value policy and/or keep-alive policy do not
44 : // work unless we wrap things in py::cpp_function.
45 3 : c.def_property_readonly(
46 702 : "coords", [](T &self) -> decltype(auto) { return self.coords(); },
47 : R"(
48 : Dict of coordinates.)");
49 : // Metadata for dataset is same as `coords` since dataset cannot have attrs
50 : // (unaligned coords).
51 3 : c.def_property_readonly(
52 0 : "meta", [](T &self) -> decltype(auto) { return self.meta(); },
53 : R"(
54 : Dict of coordinates.)");
55 3 : }
56 :
57 : template <class T, class... Ignored>
58 3 : void bind_dataset_view_methods(py::class_<T, Ignored...> &c) {
59 3 : bind_common_operators(c);
60 3 : c.def("__len__", &T::size);
61 0 : c.def(
62 : "__iter__",
63 4 : [](const T &self) {
64 : return py::make_iterator(self.keys_begin(), self.keys_end(),
65 4 : py::return_value_policy::move);
66 : },
67 3 : py::return_value_policy::move, py::keep_alive<0, 1>());
68 0 : c.def(
69 223 : "keys", [](T &self) { return keys_view(self); },
70 3 : py::return_value_policy::move, py::keep_alive<0, 1>(),
71 : R"(view on self's keys)");
72 0 : c.def(
73 69 : "values", [](T &self) { return values_view(self); },
74 3 : py::return_value_policy::move, py::keep_alive<0, 1>(),
75 : R"(view on self's values)");
76 0 : c.def(
77 507 : "items", [](T &self) { return items_view(self); },
78 3 : py::return_value_policy::move, py::keep_alive<0, 1>(),
79 : R"(view on self's items)");
80 3 : c.def("__getitem__",
81 383 : [](const T &self, const std::string &name) { return self[name]; });
82 3 : c.def("__contains__", &T::contains);
83 3 : c.def("_ipython_key_completions_", [](T &self) {
84 1 : py::list out;
85 1 : const auto end = self.keys_end();
86 3 : for (auto it = self.keys_begin(); it != end; ++it) {
87 2 : out.append(*it);
88 : }
89 2 : return out;
90 0 : });
91 3 : bind_common_data_properties(c);
92 3 : bind_pop(c);
93 3 : }
94 :
95 : template <class T, class... Ignored>
96 3 : void bind_data_array(py::class_<T, Ignored...> &c) {
97 3 : bind_data_array_properties(c);
98 3 : bind_common_operators(c);
99 3 : bind_data_properties(c);
100 3 : bind_slice_methods(c);
101 3 : bind_in_place_binary<DataArray>(c);
102 3 : bind_in_place_binary<Variable>(c);
103 3 : bind_binary<Dataset>(c);
104 3 : bind_binary<DataArray>(c);
105 3 : bind_binary<Variable>(c);
106 3 : bind_binary_scalars(c);
107 3 : bind_reverse_binary_scalars(c);
108 3 : bind_comparison<DataArray>(c);
109 3 : bind_comparison<Variable>(c);
110 3 : bind_unary(c);
111 3 : bind_logical<DataArray>(c);
112 3 : bind_logical<Variable>(c);
113 3 : bind_boolean_unary(c);
114 3 : }
115 :
116 6 : template <class T> void bind_rebin(py::module &m) {
117 6 : m.def(
118 : "rebin",
119 9 : [](const T &x, const std::string &dim, const Variable &bins) {
120 9 : return rebin(x, Dim{dim}, bins);
121 : },
122 0 : py::arg("x"), py::arg("dim"), py::arg("bins"),
123 6 : py::call_guard<py::gil_scoped_release>());
124 6 : }
125 :
126 28980 : template <class Key, class Value> auto to_cpp_dict(const py::dict &dict) {
127 28980 : core::Dict<Key, Value> out;
128 37902 : for (const auto &[key, val] : dict) {
129 8922 : out.insert_or_assign(Key{key.template cast<std::string>()},
130 : val.template cast<Value &>());
131 : }
132 28980 : return out;
133 0 : }
134 :
135 798 : auto dataset_from_data_and_coords(const py::dict &data,
136 : const py::dict &coords) {
137 798 : Dataset d;
138 1757 : for (auto &&[name, item] : data) {
139 959 : if (py::isinstance<DataArray>(item)) {
140 473 : d.setDataInit(name.cast<std::string>(), item.cast<DataArray &>());
141 : } else {
142 486 : d.setDataInit(name.cast<std::string>(), item.cast<Variable &>());
143 : }
144 : }
145 798 : if (d.is_valid()) {
146 : // Need to use dataset_from_coords when there is no data to initialize
147 : // dimensions properly.
148 1008 : for (auto &&[dim, coord] : coords)
149 227 : d.setCoord(Dim{dim.cast<std::string>()}, coord.cast<Variable &>());
150 : }
151 798 : return d;
152 0 : }
153 :
154 17 : auto dataset_from_coords(const py::dict &py_coords) {
155 17 : typename Coords::holder_type coords;
156 47 : for (auto &&[dim, coord] : py_coords)
157 30 : coords.insert_or_assign(Dim{dim.cast<std::string>()},
158 : coord.cast<Variable &>());
159 35 : return Dataset({}, std::move(coords));
160 17 : }
161 : } // namespace
162 :
163 3 : void init_dataset(py::module &m) {
164 3 : static_cast<void>(py::class_<Slice>(m, "Slice"));
165 :
166 3 : bind_helper_view<items_view, Dataset>(m, "Dataset");
167 3 : bind_helper_view<str_items_view, Coords>(m, "Coords");
168 3 : bind_helper_view<items_view, Masks>(m, "Masks");
169 3 : bind_helper_view<keys_view, Dataset>(m, "Dataset");
170 3 : bind_helper_view<str_keys_view, Coords>(m, "Coords");
171 3 : bind_helper_view<keys_view, Masks>(m, "Masks");
172 3 : bind_helper_view<values_view, Dataset>(m, "Dataset");
173 3 : bind_helper_view<values_view, Coords>(m, "Coords");
174 3 : bind_helper_view<values_view, Masks>(m, "Masks");
175 :
176 3 : bind_mutable_view_no_dim<Coords>(m, "Coords",
177 : R"(dict-like collection of meta data
178 :
179 : Returned by :py:func:`DataArray.coords`, :py:func:`DataArray.attrs`, :py:func:`DataArray.meta`,
180 : and the corresponding properties of :py:class:`Dataset`.)");
181 3 : bind_mutable_view<Masks>(m, "Masks", R"(dict-like collection of masks.
182 :
183 : Returned by :py:func:`DataArray.masks`)");
184 :
185 : py::class_<DataArray> dataArray(m, "DataArray", R"(
186 3 : Named variable with associated coords, masks, and attributes.)");
187 3 : py::options options;
188 3 : options.disable_function_signatures();
189 6 : dataArray.def(
190 3 : py::init([](const Variable &data, const py::object &coords,
191 : const py::object &masks, const py::object &attrs,
192 : const std::string &name) {
193 19326 : return DataArray{data, to_cpp_dict<Dim, Variable>(coords),
194 19326 : to_cpp_dict<std::string, Variable>(masks),
195 38643 : to_cpp_dict<Dim, Variable>(attrs), name};
196 : }),
197 6 : py::arg("data"), py::kw_only(), py::arg("coords") = py::dict(),
198 6 : py::arg("masks") = py::dict(), py::arg("attrs") = py::dict(),
199 6 : py::arg("name") = std::string{},
200 : R"doc(__init__(self, data: Variable, coords: Union[typing.Mapping[str, Variable], Iterable[Tuple[str, Variable]]] = {}, masks: Union[typing.Mapping[str, Variable], Iterable[Tuple[str, Variable]]] = {}, attrs: Union[typing.Mapping[str, Variable], Iterable[Tuple[str, Variable]]] = {}, name: str = '') -> None
201 :
202 : DataArray initializer.
203 :
204 : Parameters
205 : ----------
206 : data:
207 : Data and optionally variances.
208 : coords:
209 : Coordinates referenced by dimension.
210 : masks:
211 : Masks referenced by name.
212 : attrs:
213 : Attributes referenced by dimension.
214 : name:
215 : Name of the data array.
216 : )doc");
217 3 : options.enable_function_signatures();
218 :
219 3 : bind_data_array(dataArray);
220 :
221 : py::class_<Dataset> dataset(m, "Dataset", R"(
222 3 : Dict of data arrays with aligned dimensions.)");
223 3 : options.disable_function_signatures();
224 3 : dataset.def(
225 3 : py::init([](const py::object &data, const py::object &coords) {
226 799 : if (data.is_none() && coords.is_none())
227 1 : throw py::type_error("Dataset needs data or coordinates or both.");
228 798 : const auto data_dict = data.is_none() ? py::dict() : py::dict(data);
229 : const auto coords_dict =
230 798 : coords.is_none() ? py::dict() : py::dict(coords);
231 798 : auto d = dataset_from_data_and_coords(data_dict, coords_dict);
232 1595 : return d.is_valid() ? d : dataset_from_coords(coords_dict);
233 800 : }),
234 6 : py::arg("data") = py::none{}, py::kw_only(),
235 6 : py::arg("coords") = py::none{},
236 : R"doc(__init__(self, data: Union[typing.Mapping[str, Union[Variable, DataArray]], Iterable[Tuple[str, Union[Variable, DataArray]]]] = {}, coords: Union[typing.Mapping[str, Variable], Iterable[Tuple[str, Variable]]] = {}) -> None
237 :
238 : Dataset initializer.
239 :
240 : Parameters
241 : ----------
242 : data:
243 : Dictionary of name and data pairs.
244 : coords:
245 : Dictionary of name and coord pairs.
246 : )doc");
247 3 : options.enable_function_signatures();
248 :
249 : dataset
250 3 : .def("__setitem__",
251 4 : [](Dataset &self, const std::string &name, const Variable &data) {
252 4 : self.setData(name, data);
253 3 : })
254 3 : .def("__setitem__",
255 13 : [](Dataset &self, const std::string &name, const DataArray &data) {
256 13 : self.setData(name, data);
257 12 : })
258 3 : .def("__delitem__", &Dataset::erase,
259 0 : py::call_guard<py::gil_scoped_release>())
260 3 : .def("clear", &Dataset::clear,
261 : R"(Removes all data, preserving coordinates.)");
262 :
263 3 : bind_dataset_view_methods(dataset);
264 3 : bind_dict_update(dataset,
265 11 : [](Dataset &self, const std::string &key,
266 11 : const DataArray &value) { self.setData(key, value); });
267 :
268 3 : bind_dataset_coord_properties(dataset);
269 3 : bind_dataset_properties(dataset);
270 :
271 3 : bind_slice_methods(dataset);
272 :
273 3 : bind_in_place_binary<Dataset>(dataset);
274 3 : bind_in_place_binary<DataArray>(dataset);
275 3 : bind_in_place_binary<Variable>(dataset);
276 3 : bind_in_place_binary_scalars(dataset);
277 3 : bind_in_place_binary_scalars(dataArray);
278 :
279 3 : bind_binary<Dataset>(dataset);
280 3 : bind_binary<DataArray>(dataset);
281 3 : bind_binary<Variable>(dataset);
282 :
283 3 : dataArray.def("_rename_dims", &rename_dims<DataArray>);
284 3 : dataset.def("_rename_dims", &rename_dims<Dataset>);
285 :
286 3 : m.def(
287 : "merge",
288 1 : [](const Dataset &lhs, const Dataset &rhs) {
289 1 : return dataset::merge(lhs, rhs);
290 : },
291 0 : py::arg("lhs"), py::arg("rhs"), py::call_guard<py::gil_scoped_release>());
292 :
293 3 : m.def(
294 : "irreducible_mask",
295 63 : [](const Masks &masks, const std::string &dim) {
296 63 : py::gil_scoped_release release;
297 63 : auto mask = irreducible_mask(masks, Dim{dim});
298 63 : py::gil_scoped_acquire acquire;
299 189 : return mask.is_valid() ? py::cast(mask) : py::none();
300 63 : },
301 0 : py::arg("masks"), py::arg("dim"));
302 :
303 3 : m.def(
304 6 : "reciprocal", [](const DataArray &self) { return reciprocal(self); },
305 0 : py::arg("x"), py::call_guard<py::gil_scoped_release>());
306 :
307 3 : bind_astype(dataArray);
308 :
309 3 : bind_rebin<DataArray>(m);
310 3 : bind_rebin<Dataset>(m);
311 3 : }
|