Line data Source code
1 : // SPDX-License-Identifier: BSD-3-Clause
2 : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3 : /// @file
4 : /// @author Simon Heybrock
5 : #pragma once
6 :
7 : #include "numpy.h"
8 : #include "pybind11.h"
9 : #include "scipp/core/dtype.h"
10 : #include "scipp/core/slice.h"
11 : #include "scipp/core/tag_util.h"
12 : #include "scipp/dataset/dataset.h"
13 : #include "scipp/dataset/extract.h"
14 : #include "scipp/dataset/shape.h"
15 : #include "scipp/dataset/slice.h"
16 : #include "scipp/dataset/util.h"
17 : #include "scipp/variable/slice.h"
18 : #include "scipp/variable/variable.h"
19 : #include "slice_utils.h"
20 :
21 : namespace py = pybind11;
22 : using namespace scipp;
23 : using namespace scipp::variable;
24 : using namespace scipp::dataset;
25 :
26 3032 : template <class T> auto to_dim_type(std::tuple<std::string, T> &&t) {
27 3032 : return std::tuple{Dim{std::move(std::get<0>(t))}, std::move(std::get<1>(t))};
28 : }
29 :
30 5990 : template <class T> auto dim_extent(const T &object, const Dim dim) {
31 : if constexpr (std::is_same_v<T, Dataset>) {
32 142 : scipp::index extent = -1;
33 142 : if (object.sizes().contains(dim))
34 142 : extent = object.sizes().at(dim);
35 142 : return extent;
36 : } else {
37 5848 : return object.dims()[dim];
38 : }
39 : }
40 :
41 : template <class T>
42 3205 : auto from_py_slice(const T &source,
43 : const std::tuple<Dim, const py::slice> &index) {
44 3205 : const auto &[dim, indices] = index;
45 : size_t start, stop, step, slicelength;
46 3205 : const auto size = dim_extent(source, dim);
47 3205 : if (!indices.compute(size, &start, &stop, &step, &slicelength))
48 0 : throw py::error_already_set();
49 3205 : if (slicelength == 0) {
50 739 : stop = start; // Propagate vanishing slice length downstream.
51 : }
52 6410 : return Slice(dim, start, stop, step);
53 : }
54 :
55 : template <class View> struct SetData {
56 : template <class T> struct Impl {
57 367 : static void apply(View &slice, const py::object &obj) {
58 367 : if (slice.has_variances())
59 0 : throw std::runtime_error("Data object contains variances, to set data "
60 : "values use the `values` property or provide "
61 : "a tuple of values and variances.");
62 :
63 367 : copy_array_into_view(cast_to_array_like<T>(obj, slice.unit()),
64 : slice.template values<T>(), slice.dims());
65 367 : }
66 : };
67 : };
68 :
69 8 : inline void throw_index_error(const scipp::index i, const scipp::index size) {
70 24 : throw std::out_of_range("The requested index " + std::to_string(i) +
71 16 : " is out of range. Dimension size is " +
72 32 : std::to_string(size) + " and the allowed range is [" +
73 32 : std::to_string(-size) + ":" +
74 32 : std::to_string(size - 1) + "].");
75 : }
76 :
77 : template <class T>
78 2785 : auto get_slice(T &self, const std::tuple<Dim, scipp::index> &index) {
79 2785 : auto [dim, i] = index;
80 2785 : auto sz = dim_extent(self, dim);
81 2785 : if (i < -sz || i >= sz) // index is out of range
82 4 : throw_index_error(i, sz);
83 2781 : if (i < 0)
84 132 : i = sz + i;
85 5562 : return Slice(dim, i);
86 : }
87 :
88 : template <class T>
89 3263 : auto get_slice_range(T &self, const std::tuple<Dim, const py::slice> &index) {
90 3263 : auto [dim, py_slice] = index;
91 : if constexpr (std::is_same_v<T, DataArray> || std::is_same_v<T, Dataset>) {
92 2495 : auto start = py::getattr(py_slice, "start");
93 2495 : auto stop = py::getattr(py_slice, "stop");
94 2495 : if (!start.is_none() || !stop.is_none()) { // Means default slice : is
95 : // treated as index slice
96 : try {
97 2447 : auto [start_var, stop_var] = label_bounds_from_pyslice(py_slice);
98 57 : return std::make_from_tuple<Slice>(
99 57 : get_slice_params(self, dim, start_var, stop_var));
100 2447 : } catch (const py::cast_error &) {
101 : }
102 : }
103 2553 : }
104 3973 : return from_py_slice(self, index);
105 3263 : }
106 :
107 : template <class T>
108 2699 : auto getitem(T &self, const std::tuple<Dim, scipp::index> &index) {
109 3907 : return self.slice(get_slice(self, index));
110 : }
111 :
112 : template <class T>
113 3198 : auto getitem(T &self, const std::tuple<Dim, py::slice> &index) {
114 5675 : return self.slice(get_slice_range(self, index));
115 : }
116 :
117 0 : template <class T> auto getitem(T &self, const py::ellipsis &) {
118 0 : return self.slice({});
119 : }
120 :
121 : // Helpers wrapped in struct to avoid unresolvable overloads.
122 : template <class T> struct slicer {
123 122 : static auto get_by_value(T &self,
124 : const std::tuple<std::string, Variable> &value) {
125 122 : auto &[dim, val] = value;
126 122 : return slice(self, Dim{dim}, val);
127 : }
128 :
129 367 : static void set_from_numpy(T &&slice, const py::object &obj) {
130 : core::CallDType<double, float, int64_t, int32_t,
131 367 : bool>::apply<SetData<T>::template Impl>(slice.dtype(),
132 : slice, obj);
133 367 : }
134 :
135 : template <class Other>
136 86 : static void set_from_view(T &self, const std::tuple<Dim, scipp::index> &index,
137 : const Other &data) {
138 86 : self.setSlice(get_slice(self, index), data);
139 74 : }
140 :
141 : template <class Other>
142 65 : static void set_from_view(T &self,
143 : const std::tuple<Dim, const py::slice> &index,
144 : const Other &data) {
145 65 : self.setSlice(get_slice_range(self, index), data);
146 63 : }
147 :
148 : template <class Other>
149 6 : static void set_from_view(T &self, const py::ellipsis &, const Other &data) {
150 6 : self.setSlice(Slice{}, data);
151 6 : }
152 :
153 : template <class Other>
154 9 : static void set_by_value(T &self,
155 : const std::tuple<std::string, Variable> &value,
156 : const Other &data) {
157 9 : auto &[dim, val] = value;
158 9 : self.setSlice(
159 9 : std::make_from_tuple<Slice>(get_slice_params(self, Dim(dim), val)),
160 : data);
161 9 : }
162 :
163 : // Manually dispatch based on the object we are assigning from in order to
164 : // cast it correctly to a scipp view, numpy array or fallback std::vector.
165 : // This needs to happen partly based on the dtype which cannot be encoded
166 : // in the Python bindings directly.
167 : template <class IndexOrRange>
168 524 : static void set(T &self, const IndexOrRange &index, const py::object &data) {
169 : if constexpr (std::is_same_v<T, Dataset>)
170 10 : if (py::isinstance<Dataset>(data))
171 8 : return set_from_view(self, index, py::cast<Dataset>(data));
172 :
173 : if constexpr (!std::is_same_v<T, Variable>)
174 65 : if (py::isinstance<DataArray>(data))
175 14 : return set_from_view(self, index, py::cast<DataArray>(data));
176 :
177 504 : if (py::isinstance<Variable>(data))
178 151 : return set_from_view(self, index, py::cast<Variable>(data));
179 :
180 : if constexpr (!std::is_same_v<T, Dataset>)
181 367 : return set_from_numpy(getitem(self, index), data);
182 :
183 0 : std::ostringstream oss;
184 0 : oss << "Cannot to assign a " << py::str(data.get_type())
185 0 : << " to a slice of a " << py::type_id<T>();
186 0 : throw py::type_error(oss.str());
187 0 : }
188 : };
189 :
190 : namespace {
191 3088 : void expect_implicit_dimension(const Sizes &dims) {
192 : using std::to_string;
193 3088 : if (dims.size() == 0)
194 0 : throw except::DimensionError("Slicing a scalar object is not possible.");
195 3088 : if (dims.size() > 1) {
196 : std::string msg("Slicing with implicit dimension label is only possible "
197 6 : "for 1-D objects. Got " +
198 12 : to_string(dims) + " with ndim=" + to_string(dims.size()) +
199 6 : ". Provide an explicit dimension label, e.g., var['" +
200 6 : to_string(*dims.begin()) + "', 0] instead of var[0].");
201 3 : throw except::DimensionError(msg);
202 3 : }
203 3085 : }
204 :
205 2430 : void expect_positional_index(const py::slice &py_slice) {
206 7288 : for (const auto &key : {"start", "stop"}) {
207 4859 : if (const auto index = py::getattr(py_slice, key); !index.is_none()) {
208 : try {
209 2330 : static_cast<void>(index.cast<Variable>());
210 1 : throw except::DimensionError(
211 2 : "Dimension must be specified when indexing with a label.");
212 2330 : } catch (const py::cast_error &) {
213 2329 : }
214 4859 : }
215 : }
216 2429 : }
217 :
218 : template <class T>
219 68 : T slice_by_list(const T &obj,
220 : const std::tuple<Dim, std::vector<scipp::index>> &index) {
221 142 : const auto make_slice = [](scipp::index p, const scipp::index s) {
222 142 : const auto positive_p = p < 0 ? s + p : p;
223 142 : if (positive_p < 0 || positive_p >= s)
224 0 : throw std::out_of_range("The requested index " + std::to_string(p) +
225 : " is out of range for dimension of length " +
226 : std::to_string(s));
227 142 : return std::tuple{positive_p, positive_p + 1};
228 : };
229 68 : const auto &[dim, indices] = index;
230 68 : const auto size = obj.dims()[dim];
231 68 : if (!indices.empty()) {
232 68 : const auto [min, max] = std::minmax_element(indices.begin(), indices.end());
233 68 : if (*min < -size || *max >= size) {
234 4 : const auto bad = *min < -size ? *min : *max;
235 4 : throw_index_error(bad, size);
236 : }
237 : }
238 64 : std::vector<scipp::index_pair> ranges;
239 64 : ranges.reserve(indices.size());
240 206 : for (const auto &pos : indices) {
241 142 : const auto [start, stop] = make_slice(pos, size);
242 0 : ranges.emplace_back(static_cast<scipp::index>(start),
243 142 : static_cast<scipp::index>(stop));
244 : }
245 64 : return extract_ranges(makeVariable<scipp::index_pair>(
246 128 : Dims{dim}, Shape{ranges.size()}, Values(ranges)),
247 128 : obj, dim);
248 64 : }
249 : } // namespace
250 :
251 : template <class T, class... Ignored>
252 9 : void bind_slice_methods(pybind11::class_<T, Ignored...> &c) {
253 : // Slice with implicit dim possible only if there is exactly one dimension. We
254 : // do *not* use the numpy/xarray mechanism which slices the outer dimension in
255 : // this case, since we consider it dangerous, leading to hard to find bugs.
256 38 : c.def("__getitem__", [](T &self, const scipp::index &index) {
257 596 : expect_implicit_dimension(self.dims());
258 596 : return getitem(self, {self.dim(), index});
259 : });
260 2136 : c.def("__getitem__", [](T &self, const py::slice &index) {
261 2411 : expect_implicit_dimension(self.dims());
262 2411 : expect_positional_index(index);
263 2410 : return getitem(self, {self.dim(), index});
264 : });
265 : // Note the order of overloads: For some reason pybind11(?) calls `len()` on
266 : // __getitem__ arguments when there is an overload accepting std::tuple. This
267 : // fails for scalar variables, so we place this before those overloads.
268 9 : c.def(
269 : "__getitem__",
270 402 : [](T &self, const Variable &condition) {
271 402 : return extract(self, condition);
272 : },
273 0 : py::call_guard<py::gil_scoped_release>());
274 : // These overloads must also be placed before the ones accepting std::tuple.
275 : // Otherwise, pybind11 would call `int()` on the 2nd tuple element
276 : // because of `Variable.__int__`.
277 : //
278 : // Note that label-based indexing with an implicit unique dim is not supported
279 : // for now. Labels can also be taken from any other coord, so if the coord
280 : // label is omitted we would have to "default" to using the dimension coord.
281 : // This may be too confusing, so we do not implement this for now. Note that
282 : // the objection to this is not absolute (unlike in the case of slicing outer
283 : // dimension above).
284 : if constexpr (std::is_same_v<T, DataArray>) {
285 3 : c.def("__getitem__", &slicer<T>::get_by_value);
286 3 : c.def("__setitem__", &slicer<T>::template set_by_value<Variable>);
287 3 : c.def("__setitem__", &slicer<T>::template set_by_value<DataArray>);
288 : }
289 : if constexpr (std::is_same_v<T, Dataset>) {
290 3 : c.def("__getitem__", &slicer<T>::get_by_value);
291 3 : c.def("__setitem__", &slicer<T>::template set_by_value<Dataset>);
292 : } else {
293 1289 : c.def("__len__", [](const T &self) {
294 1350 : if (self.dims().ndim() == 0)
295 2 : throw except::TypeError("len() of scalar object");
296 1348 : return self.dims().size(0);
297 : });
298 7 : c.def("_ipython_key_completions_", [](T &self) {
299 2 : py::typing::List<py::str> out;
300 6 : for (const auto &dim : self.dims()) {
301 4 : out.append(dim.name());
302 : }
303 2 : return out;
304 0 : });
305 : }
306 9 : c.def("__getitem__",
307 1739 : [](T &self, std::tuple<std::string, scipp::index> index) {
308 1739 : return getitem(self, to_dim_type(std::move(index)));
309 : });
310 360 : c.def("__getitem__", [](T &self, std::tuple<std::string, py::slice> index) {
311 785 : return getitem(self, to_dim_type(std::move(index)));
312 : });
313 9 : c.def("__getitem__", [](T &self, const py::ellipsis &index) {
314 0 : return getitem(self, index);
315 : });
316 9 : c.def("__setitem__",
317 106 : [](T &self, const scipp::index &index, const py::object &data) {
318 53 : expect_implicit_dimension(self.dims());
319 53 : slicer<T>::template set<std::tuple<Dim, scipp::index>>(
320 106 : self, {self.dim(), index}, data);
321 : });
322 9 : c.def("__setitem__",
323 38 : [](T &self, const py::slice &index, const py::object &data) {
324 19 : expect_implicit_dimension(self.dims());
325 19 : expect_positional_index(index);
326 19 : slicer<T>::template set<std::tuple<Dim, py::slice>>(
327 38 : self, {self.dim(), index}, data);
328 : });
329 97 : c.def("__setitem__", [](T &self, std::tuple<std::string, scipp::index> index,
330 : const py::object &data) {
331 397 : slicer<T>::template set(self, to_dim_type(std::move(index)), data);
332 : });
333 39 : c.def("__setitem__", [](T &self, std::tuple<std::string, py::slice> index,
334 : const py::object &data) {
335 49 : slicer<T>::template set(self, to_dim_type(std::move(index)), data);
336 : });
337 9 : c.def("__setitem__", &slicer<T>::template set<py::ellipsis>);
338 9 : c.def(
339 : "__getitem__",
340 9 : [](T &self, const std::vector<scipp::index> &indices) {
341 9 : expect_implicit_dimension(self.dims());
342 6 : return slice_by_list(self, {self.dim(), indices});
343 : },
344 0 : py::call_guard<py::gil_scoped_release>());
345 9 : c.def(
346 : "__getitem__",
347 62 : [](T &self, std::tuple<std::string, std::vector<scipp::index>> indices) {
348 62 : return slice_by_list<T>(self, to_dim_type(std::move(indices)));
349 : },
350 9 : py::call_guard<py::gil_scoped_release>());
351 9 : }
|