Line data Source code
1 : // SPDX-License-Identifier: BSD-3-Clause
2 : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3 : /// @file
4 : /// @author Simon Heybrock
5 : #pragma once
6 :
7 : #include <cstddef>
8 : #include <functional>
9 :
10 : #include "scipp/common/index_composition.h"
11 : #include "scipp/core/parallel.h"
12 : #include "scipp/variable/variable.h"
13 :
14 : #include "py_object.h"
15 : #include "pybind11.h"
16 :
17 : namespace py = pybind11;
18 :
19 : using namespace scipp;
20 :
21 : /// Map C++ types to Python types to perform conversion between scipp containers
22 : /// and numpy arrays.
23 : template <class T> struct ElementTypeMap {
24 : using PyType = T;
25 : constexpr static bool convert = false;
26 :
27 40897 : static void check_assignable(const py::object &, const units::Unit &) {}
28 : };
29 :
30 : template <> struct ElementTypeMap<scipp::core::time_point> {
31 : using PyType = int64_t;
32 : constexpr static bool convert = true;
33 :
34 : static void check_assignable(const py::object &obj, units::Unit unit);
35 : };
36 :
37 : template <> struct ElementTypeMap<scipp::python::PyObject> {
38 : using PyType = py::object;
39 : constexpr static bool convert = true;
40 :
41 45 : static void check_assignable(const py::object &, const units::Unit &) {}
42 : };
43 :
44 : /// Cast a py::object referring to an array to py::array_t<auto> if supported.
45 : /// Otherwise, copies the contents into a std::vector<auto>.
46 : template <class T>
47 23195 : auto cast_to_array_like(const py::object &obj, const units::Unit unit) {
48 : using TM = ElementTypeMap<T>;
49 : using PyType = typename TM::PyType;
50 23195 : TM::check_assignable(obj, unit);
51 : if constexpr (std::is_same_v<T, core::time_point>) {
52 : // pbj.cast<py::array_t<PyType> does not always work because
53 : // numpy.datetime64.__int__ delegates to datetime.datetime if the unit is
54 : // larger than ns and that cannot be converted to long.
55 : return obj.cast<py::array>()
56 : .attr("astype")(py::dtype::of<PyType>())
57 289 : .template cast<py::array_t<PyType>>();
58 : } else if constexpr (std::is_standard_layout_v<T> && std::is_trivial_v<T>) {
59 : // Casting to py::array_t applies all sorts of automatic conversions
60 : // such as integer to double, if required.
61 22766 : return obj.cast<py::array_t<PyType>>();
62 : } else {
63 : // py::array only supports POD types. Use a simple but expensive
64 : // solution for other types.
65 : // TODO Related to #290, we should properly support
66 : // multi-dimensional input, and ignore bad shapes.
67 : try {
68 196 : return obj.cast<const std::vector<PyType>>();
69 0 : } catch (std::runtime_error &) {
70 0 : const auto &array = obj.cast<py::array>();
71 0 : std::ostringstream oss;
72 0 : oss << "Unable to assign object of dtype " << py::str(array.dtype())
73 0 : << " to " << scipp::core::dtype<T>;
74 0 : throw std::invalid_argument(oss.str());
75 0 : }
76 : }
77 : }
78 :
79 : namespace scipp::detail {
80 : namespace {
81 : constexpr static size_t grainsize_1d = 10000;
82 :
83 23052 : template <class T> bool is_c_contiguous(const py::array_t<T> &array) {
84 : Py_buffer buffer;
85 23052 : if (PyObject_GetBuffer(array.ptr(), &buffer, PyBUF_C_CONTIGUOUS) != 0) {
86 807 : PyErr_Clear();
87 807 : return false;
88 : }
89 22245 : PyBuffer_Release(&buffer);
90 22245 : return true;
91 : }
92 :
93 : template <bool convert, class Source, class Destination>
94 21840528 : void copy_element(const Source &src, Destination &&dst) {
95 : if constexpr (convert) {
96 1273 : dst = std::remove_reference_t<Destination>{src};
97 : } else {
98 21839255 : std::forward<Destination>(dst) = src;
99 : }
100 21840528 : }
101 :
102 : template <bool convert, class T, class Dst>
103 0 : void copy_array_0d(const py::array_t<T> &src_array, Dst &dst) {
104 0 : const auto src = src_array.template unchecked<0>();
105 0 : auto it = dst.begin();
106 0 : copy_element<convert>(src(), *it);
107 0 : }
108 :
109 : template <bool convert, class T, class Dst>
110 220 : void copy_array_1d(const py::array_t<T> &src_array, Dst &dst) {
111 220 : const auto src = src_array.template unchecked<1>();
112 220 : const auto begin = dst.begin();
113 220 : core::parallel::parallel_for(
114 220 : core::parallel::blocked_range(0, src.shape(0), grainsize_1d),
115 19144 : [&](const auto &range) {
116 220 : auto it = begin + range.begin();
117 18924 : for (scipp::index i = range.begin(); i < range.end(); ++i, ++it) {
118 18704 : copy_element<convert>(src(i), *it);
119 : }
120 : });
121 220 : }
122 :
123 : template <bool convert, class T, class Dst>
124 154 : void copy_array_2d(const py::array_t<T> &src_array, Dst &dst) {
125 154 : const auto src = src_array.template unchecked<2>();
126 154 : const auto begin = dst.begin();
127 154 : core::parallel::parallel_for(
128 25404 : core::parallel::blocked_range(0, src.shape(0)), [&](const auto &range) {
129 154 : auto it = begin + range.begin() * src.shape(1);
130 2132 : for (scipp::index i = range.begin(); i < range.end(); ++i)
131 13852 : for (scipp::index j = 0; j < src.shape(1); ++j, ++it)
132 11874 : copy_element<convert>(src(i, j), *it);
133 : });
134 154 : }
135 :
136 : template <bool convert, class T, class Dst>
137 83 : void copy_array_3d(const py::array_t<T> &src_array, Dst &dst) {
138 83 : const auto src = src_array.template unchecked<3>();
139 83 : const auto begin = dst.begin();
140 83 : core::parallel::parallel_for(
141 4255 : core::parallel::blocked_range(0, src.shape(0)), [&](const auto &range) {
142 83 : auto it = begin + range.begin() * src.shape(1) * src.shape(2);
143 243 : for (scipp::index i = range.begin(); i < range.end(); ++i)
144 599 : for (scipp::index j = 0; j < src.shape(1); ++j)
145 1840 : for (scipp::index k = 0; k < src.shape(2); ++k, ++it)
146 1401 : copy_element<convert>(src(i, j, k), *it);
147 : });
148 83 : }
149 :
150 : template <bool convert, class T, class Dst>
151 113 : void copy_array_4d(const py::array_t<T> &src_array, Dst &dst) {
152 113 : const auto src = src_array.template unchecked<4>();
153 113 : const auto begin = dst.begin();
154 113 : core::parallel::parallel_for(
155 21200 : core::parallel::blocked_range(0, src.shape(0)), [&](const auto &range) {
156 113 : auto it =
157 113 : begin + range.begin() * src.shape(1) * src.shape(2) * src.shape(3);
158 311 : for (scipp::index i = range.begin(); i < range.end(); ++i)
159 728 : for (scipp::index j = 0; j < src.shape(1); ++j)
160 2322 : for (scipp::index k = 0; k < src.shape(2); ++k)
161 9632 : for (scipp::index l = 0; l < src.shape(3); ++l, ++it)
162 7840 : copy_element<convert>(src(i, j, k, l), *it);
163 : });
164 113 : }
165 :
166 : template <bool convert, class T, class Dst>
167 102 : void copy_array_5d(const py::array_t<T> &src_array, Dst &dst) {
168 102 : const auto src = src_array.template unchecked<5>();
169 102 : const auto begin = dst.begin();
170 102 : core::parallel::parallel_for(
171 85504 : core::parallel::blocked_range(0, src.shape(0)), [&](const auto &range) {
172 102 : auto it = begin + range.begin() * src.shape(1) * src.shape(2) *
173 102 : src.shape(3) * src.shape(4);
174 276 : for (scipp::index i = range.begin(); i < range.end(); ++i)
175 638 : for (scipp::index j = 0; j < src.shape(1); ++j)
176 2012 : for (scipp::index k = 0; k < src.shape(2); ++k)
177 8184 : for (scipp::index l = 0; l < src.shape(3); ++l)
178 40296 : for (scipp::index m = 0; m < src.shape(4); ++m, ++it)
179 33660 : copy_element<convert>(src(i, j, k, l, m), *it);
180 : });
181 102 : }
182 :
183 : template <bool convert, class T, class Dst>
184 135 : void copy_array_6d(const py::array_t<T> &src_array, Dst &dst) {
185 135 : const auto src = src_array.template unchecked<6>();
186 135 : const auto begin = dst.begin();
187 135 : core::parallel::parallel_for(
188 703904 : core::parallel::blocked_range(0, src.shape(0)), [&](const auto &range) {
189 135 : auto it = begin + range.begin() * src.shape(1) * src.shape(2) *
190 135 : src.shape(3) * src.shape(4) * src.shape(5);
191 373 : for (scipp::index i = range.begin(); i < range.end(); ++i)
192 895 : for (scipp::index j = 0; j < src.shape(1); ++j)
193 2903 : for (scipp::index k = 0; k < src.shape(2); ++k)
194 12116 : for (scipp::index l = 0; l < src.shape(3); ++l)
195 59394 : for (scipp::index m = 0; m < src.shape(4); ++m)
196 338520 : for (scipp::index n = 0; n < src.shape(5); ++n, ++it)
197 288996 : copy_element<convert>(src(i, j, k, l, m, n), *it);
198 : });
199 135 : }
200 :
201 : template <bool convert, class T, class Dst>
202 22245 : void copy_flattened(const py::array_t<T> &src_array, Dst &dst) {
203 22245 : const auto src_buffer = src_array.request();
204 22245 : auto src = reinterpret_cast<const T *>(src_buffer.ptr);
205 22245 : const auto begin = dst.begin();
206 22245 : core::parallel::parallel_for(
207 22245 : core::parallel::blocked_range(0, src_buffer.size, grainsize_1d),
208 20203404 : [&](const auto &range) {
209 22245 : auto it = begin + range.begin();
210 21500298 : for (scipp::index i = range.begin(); i < range.end(); ++i, ++it) {
211 21478053 : copy_element<convert>(src[i], *it);
212 : }
213 : });
214 22245 : }
215 :
216 23052 : template <class T> auto memory_begin_end(const py::buffer_info &info) {
217 23052 : auto *begin = static_cast<const T *>(info.ptr);
218 23052 : auto *end = static_cast<const T *>(info.ptr);
219 23052 : const auto [begin_offset, end_offset] =
220 23052 : memory_bounds(info.shape.begin(), info.shape.end(), info.strides.begin());
221 23052 : return std::pair{begin + begin_offset, end + end_offset};
222 : }
223 :
224 : template <class T, class View>
225 23052 : bool memory_overlaps(const py::array_t<T> &data, const View &view) {
226 23052 : const auto &buffer_info = data.request();
227 23052 : const auto [data_begin, data_end] = memory_begin_end<std::byte>(buffer_info);
228 23052 : const auto begin = view.begin();
229 23052 : const auto end = view.end();
230 23052 : const auto view_begin = reinterpret_cast<const std::byte *>(&*begin);
231 23052 : const auto view_end = reinterpret_cast<const std::byte *>(&*end);
232 : // Note the use of std::less, pointer comparison with operator< may be
233 : // undefined behavior with pointers from different arrays.
234 42257 : return std::less<>()(data_begin, view_end) &&
235 42257 : std::greater<>()(data_end, view_begin);
236 23052 : }
237 :
238 : /*
239 : * The code here is not pretty.
240 : * But a generic copy function would be much more complicated than the
241 : * straightforward nested loops we use here.
242 : * In practice, there is also little need to support ndim > 6 for non-contiguous
243 : * data as transform does not support such variables either.
244 : *
245 : * For a working, generic implementation, see git ref
246 : * bd2e5f0a84d02bd5baf6d0afc32a2ab66dc09e2b
247 : * and its history, in particular
248 : * 86761b1e280a63b4f0b723a165188d21dd097972
249 : * 8721b2d02b98c1acae5c786ffda88055551d832b
250 : * 4c03a553827f2881672ae1f00f43ae06e879452c
251 : * c2a1e3898467083bf7d019a3cb54702c8b50ba86
252 : * c2a1e3898467083bf7d019a3cb54702c8b50ba86
253 : */
254 : /// Copy all elements from src into dst.
255 : /// Performs an explicit conversion of elements in `src` to the element type of
256 : /// `dst` if `convert == true`.
257 : /// Otherwise, elements in src are simply assigned to dst.
258 : template <bool convert, class T, class Dst>
259 23052 : void copy_elements(const py::array_t<T> &src, Dst &dst) {
260 23052 : if (scipp::size(dst) != src.size())
261 0 : throw std::runtime_error(
262 : "Numpy data size does not match size of target object.");
263 :
264 69156 : const auto dispatch = [&dst](const py::array_t<T> &src_) {
265 23052 : if (is_c_contiguous(src_))
266 22245 : return copy_flattened<convert>(src_, dst);
267 :
268 807 : switch (src_.ndim()) {
269 0 : case 0:
270 0 : return copy_array_0d<convert>(src_, dst);
271 220 : case 1:
272 220 : return copy_array_1d<convert>(src_, dst);
273 154 : case 2:
274 154 : return copy_array_2d<convert>(src_, dst);
275 83 : case 3:
276 83 : return copy_array_3d<convert>(src_, dst);
277 113 : case 4:
278 113 : return copy_array_4d<convert>(src_, dst);
279 102 : case 5:
280 102 : return copy_array_5d<convert>(src_, dst);
281 135 : case 6:
282 135 : return copy_array_6d<convert>(src_, dst);
283 0 : default:
284 0 : throw std::runtime_error(
285 : "Numpy array with non-c-contiguous memory layout has more "
286 : "dimensions than supported in the current implementation. "
287 : "Try making a copy of the array first to get a "
288 : "c-contiguous layout.");
289 : }
290 : };
291 23052 : dispatch(memory_overlaps(src, dst) ? py::array_t<T>(src.request()) : src);
292 23052 : }
293 : } // namespace
294 : } // namespace scipp::detail
295 :
296 : template <class SourceDType, class Destination>
297 23055 : void copy_array_into_view(const py::array_t<SourceDType> &src,
298 : Destination &&dst, const Dimensions &dims) {
299 23055 : const auto &shape = dims.shape();
300 23055 : if (!std::equal(shape.begin(), shape.end(), src.shape(),
301 23055 : src.shape() + src.ndim()))
302 3 : throw except::DimensionError("The shape of the provided data "
303 : "does not match the existing "
304 : "object.");
305 : scipp::detail::copy_elements<ElementTypeMap<
306 23052 : typename std::remove_reference_t<Destination>::value_type>::convert>(src,
307 : dst);
308 23052 : }
309 :
310 : template <class SourceDType, class Destination>
311 98 : void copy_array_into_view(const std::vector<SourceDType> &src, Destination &dst,
312 : const Dimensions &) {
313 98 : core::expect::sizeMatches(dst, src);
314 98 : std::copy(begin(src), end(src), dst.begin());
315 98 : }
316 :
317 : core::time_point make_time_point(const py::buffer &buffer, int64_t scale = 1);
|