LCOV - code coverage report
Current view: top level - python - dataset.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 171 184 92.9 %
Date: 2024-04-28 01:25:40 Functions: 31 32 96.9 %

          Line data    Source code
       1             : // SPDX-License-Identifier: BSD-3-Clause
       2             : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
       3             : /// @file
       4             : /// @author Simon Heybrock
       5             : 
       6             : #include "scipp/dataset/dataset.h"
       7             : #include "scipp/dataset/math.h"
       8             : #include "scipp/dataset/rebin.h"
       9             : #include "scipp/dataset/sized_dict.h"
      10             : 
      11             : #include "bind_data_access.h"
      12             : #include "bind_data_array.h"
      13             : #include "bind_operators.h"
      14             : #include "bind_slice_methods.h"
      15             : #include "pybind11.h"
      16             : #include "rename.h"
      17             : 
      18             : using namespace scipp;
      19             : using namespace scipp::dataset;
      20             : 
      21             : namespace py = pybind11;
      22             : 
      23             : namespace {
      24             : template <class T, class... Ignored>
      25           3 : void bind_dataset_properties(py::class_<T, Ignored...> &c) {
      26           3 :   c.def("drop_coords", [](T &self, const std::string &coord_name) {
      27           4 :     std::vector<scipp::Dim> coord_names_c = {scipp::Dim{coord_name}};
      28           8 :     return self.drop_coords(coord_names_c);
      29           4 :   });
      30           3 :   c.def("drop_coords",
      31           3 :         [](T &self, const std::vector<std::string> &coord_names) {
      32           3 :           std::vector<scipp::Dim> coord_names_c;
      33           3 :           std::transform(coord_names.begin(), coord_names.end(),
      34             :                          std::back_inserter(coord_names_c),
      35           6 :                          [](const auto &name) { return scipp::Dim{name}; });
      36           6 :           return self.drop_coords(coord_names_c);
      37           3 :         });
      38           3 : }
      39             : 
      40             : template <class T, class... Ignored>
      41           3 : void bind_dataset_coord_properties(py::class_<T, Ignored...> &c) {
      42             :   // TODO does this comment still apply?
      43             :   // For some reason the return value policy and/or keep-alive policy do not
      44             :   // work unless we wrap things in py::cpp_function.
      45           3 :   c.def_property_readonly(
      46         702 :       "coords", [](T &self) -> decltype(auto) { return self.coords(); },
      47             :       R"(
      48             :       Dict of coordinates.)");
      49             :   // Metadata for dataset is same as `coords` since dataset cannot have attrs
      50             :   // (unaligned coords).
      51           3 :   c.def_property_readonly(
      52           0 :       "meta", [](T &self) -> decltype(auto) { return self.meta(); },
      53             :       R"(
      54             :       Dict of coordinates.)");
      55           3 : }
      56             : 
      57             : template <class T, class... Ignored>
      58           3 : void bind_dataset_view_methods(py::class_<T, Ignored...> &c) {
      59           3 :   bind_common_operators(c);
      60           3 :   c.def("__len__", &T::size);
      61           0 :   c.def(
      62             :       "__iter__",
      63           4 :       [](const T &self) {
      64             :         return py::make_iterator(self.keys_begin(), self.keys_end(),
      65           4 :                                  py::return_value_policy::move);
      66             :       },
      67           3 :       py::return_value_policy::move, py::keep_alive<0, 1>());
      68           0 :   c.def(
      69         223 :       "keys", [](T &self) { return keys_view(self); },
      70           3 :       py::return_value_policy::move, py::keep_alive<0, 1>(),
      71             :       R"(view on self's keys)");
      72           0 :   c.def(
      73          69 :       "values", [](T &self) { return values_view(self); },
      74           3 :       py::return_value_policy::move, py::keep_alive<0, 1>(),
      75             :       R"(view on self's values)");
      76           0 :   c.def(
      77         507 :       "items", [](T &self) { return items_view(self); },
      78           3 :       py::return_value_policy::move, py::keep_alive<0, 1>(),
      79             :       R"(view on self's items)");
      80           3 :   c.def("__getitem__",
      81         383 :         [](const T &self, const std::string &name) { return self[name]; });
      82           3 :   c.def("__contains__", &T::contains);
      83           3 :   c.def("_ipython_key_completions_", [](T &self) {
      84           1 :     py::list out;
      85           1 :     const auto end = self.keys_end();
      86           3 :     for (auto it = self.keys_begin(); it != end; ++it) {
      87           2 :       out.append(*it);
      88             :     }
      89           2 :     return out;
      90           0 :   });
      91           3 :   bind_common_data_properties(c);
      92           3 :   bind_pop(c);
      93           3 : }
      94             : 
      95             : template <class T, class... Ignored>
      96           3 : void bind_data_array(py::class_<T, Ignored...> &c) {
      97           3 :   bind_data_array_properties(c);
      98           3 :   bind_common_operators(c);
      99           3 :   bind_data_properties(c);
     100           3 :   bind_slice_methods(c);
     101           3 :   bind_in_place_binary<DataArray>(c);
     102           3 :   bind_in_place_binary<Variable>(c);
     103           3 :   bind_binary<Dataset>(c);
     104           3 :   bind_binary<DataArray>(c);
     105           3 :   bind_binary<Variable>(c);
     106           3 :   bind_binary_scalars(c);
     107           3 :   bind_reverse_binary_scalars(c);
     108           3 :   bind_comparison<DataArray>(c);
     109           3 :   bind_comparison<Variable>(c);
     110           3 :   bind_unary(c);
     111           3 :   bind_logical<DataArray>(c);
     112           3 :   bind_logical<Variable>(c);
     113           3 :   bind_boolean_unary(c);
     114           3 : }
     115             : 
     116           6 : template <class T> void bind_rebin(py::module &m) {
     117           6 :   m.def(
     118             :       "rebin",
     119           9 :       [](const T &x, const std::string &dim, const Variable &bins) {
     120           9 :         return rebin(x, Dim{dim}, bins);
     121             :       },
     122           0 :       py::arg("x"), py::arg("dim"), py::arg("bins"),
     123           6 :       py::call_guard<py::gil_scoped_release>());
     124           6 : }
     125             : 
     126       28980 : template <class Key, class Value> auto to_cpp_dict(const py::dict &dict) {
     127       28980 :   core::Dict<Key, Value> out;
     128       37902 :   for (const auto &[key, val] : dict) {
     129        8922 :     out.insert_or_assign(Key{key.template cast<std::string>()},
     130             :                          val.template cast<Value &>());
     131             :   }
     132       28980 :   return out;
     133           0 : }
     134             : 
     135         798 : auto dataset_from_data_and_coords(const py::dict &data,
     136             :                                   const py::dict &coords) {
     137         798 :   Dataset d;
     138        1757 :   for (auto &&[name, item] : data) {
     139         959 :     if (py::isinstance<DataArray>(item)) {
     140         473 :       d.setDataInit(name.cast<std::string>(), item.cast<DataArray &>());
     141             :     } else {
     142         486 :       d.setDataInit(name.cast<std::string>(), item.cast<Variable &>());
     143             :     }
     144             :   }
     145         798 :   if (d.is_valid()) {
     146             :     // Need to use dataset_from_coords when there is no data to initialize
     147             :     // dimensions properly.
     148        1008 :     for (auto &&[dim, coord] : coords)
     149         227 :       d.setCoord(Dim{dim.cast<std::string>()}, coord.cast<Variable &>());
     150             :   }
     151         798 :   return d;
     152           0 : }
     153             : 
     154          17 : auto dataset_from_coords(const py::dict &py_coords) {
     155          17 :   typename Coords::holder_type coords;
     156          47 :   for (auto &&[dim, coord] : py_coords)
     157          30 :     coords.insert_or_assign(Dim{dim.cast<std::string>()},
     158             :                             coord.cast<Variable &>());
     159          35 :   return Dataset({}, std::move(coords));
     160          17 : }
     161             : } // namespace
     162             : 
     163           3 : void init_dataset(py::module &m) {
     164           3 :   static_cast<void>(py::class_<Slice>(m, "Slice"));
     165             : 
     166           3 :   bind_helper_view<items_view, Dataset>(m, "Dataset");
     167           3 :   bind_helper_view<str_items_view, Coords>(m, "Coords");
     168           3 :   bind_helper_view<items_view, Masks>(m, "Masks");
     169           3 :   bind_helper_view<keys_view, Dataset>(m, "Dataset");
     170           3 :   bind_helper_view<str_keys_view, Coords>(m, "Coords");
     171           3 :   bind_helper_view<keys_view, Masks>(m, "Masks");
     172           3 :   bind_helper_view<values_view, Dataset>(m, "Dataset");
     173           3 :   bind_helper_view<values_view, Coords>(m, "Coords");
     174           3 :   bind_helper_view<values_view, Masks>(m, "Masks");
     175             : 
     176           3 :   bind_mutable_view_no_dim<Coords>(m, "Coords",
     177             :                                    R"(dict-like collection of meta data
     178             : 
     179             : Returned by :py:func:`DataArray.coords`, :py:func:`DataArray.attrs`, :py:func:`DataArray.meta`,
     180             : and the corresponding properties of :py:class:`Dataset`.)");
     181           3 :   bind_mutable_view<Masks>(m, "Masks", R"(dict-like collection of masks.
     182             : 
     183             : Returned by :py:func:`DataArray.masks`)");
     184             : 
     185             :   py::class_<DataArray> dataArray(m, "DataArray", R"(
     186           3 :     Named variable with associated coords, masks, and attributes.)");
     187           3 :   py::options options;
     188           3 :   options.disable_function_signatures();
     189           6 :   dataArray.def(
     190           3 :       py::init([](const Variable &data, const py::object &coords,
     191             :                   const py::object &masks, const py::object &attrs,
     192             :                   const std::string &name) {
     193       19326 :         return DataArray{data, to_cpp_dict<Dim, Variable>(coords),
     194       19326 :                          to_cpp_dict<std::string, Variable>(masks),
     195       38643 :                          to_cpp_dict<Dim, Variable>(attrs), name};
     196             :       }),
     197           6 :       py::arg("data"), py::kw_only(), py::arg("coords") = py::dict(),
     198           6 :       py::arg("masks") = py::dict(), py::arg("attrs") = py::dict(),
     199           6 :       py::arg("name") = std::string{},
     200             :       R"doc(__init__(self, data: Variable, coords: Union[typing.Mapping[str, Variable], Iterable[Tuple[str, Variable]]] = {}, masks: Union[typing.Mapping[str, Variable], Iterable[Tuple[str, Variable]]] = {}, attrs: Union[typing.Mapping[str, Variable], Iterable[Tuple[str, Variable]]] = {}, name: str = '') -> None
     201             : 
     202             :           DataArray initializer.
     203             : 
     204             :           Parameters
     205             :           ----------
     206             :           data:
     207             :               Data and optionally variances.
     208             :           coords:
     209             :               Coordinates referenced by dimension.
     210             :           masks:
     211             :               Masks referenced by name.
     212             :           attrs:
     213             :               Attributes referenced by dimension.
     214             :           name:
     215             :               Name of the data array.
     216             :           )doc");
     217           3 :   options.enable_function_signatures();
     218             : 
     219           3 :   bind_data_array(dataArray);
     220             : 
     221             :   py::class_<Dataset> dataset(m, "Dataset", R"(
     222           3 :   Dict of data arrays with aligned dimensions.)");
     223           3 :   options.disable_function_signatures();
     224           3 :   dataset.def(
     225           3 :       py::init([](const py::object &data, const py::object &coords) {
     226         799 :         if (data.is_none() && coords.is_none())
     227           1 :           throw py::type_error("Dataset needs data or coordinates or both.");
     228         798 :         const auto data_dict = data.is_none() ? py::dict() : py::dict(data);
     229             :         const auto coords_dict =
     230         798 :             coords.is_none() ? py::dict() : py::dict(coords);
     231         798 :         auto d = dataset_from_data_and_coords(data_dict, coords_dict);
     232        1595 :         return d.is_valid() ? d : dataset_from_coords(coords_dict);
     233         800 :       }),
     234           6 :       py::arg("data") = py::none{}, py::kw_only(),
     235           6 :       py::arg("coords") = py::none{},
     236             :       R"doc(__init__(self, data: Union[typing.Mapping[str, Union[Variable, DataArray]], Iterable[Tuple[str, Union[Variable, DataArray]]]] = {}, coords: Union[typing.Mapping[str, Variable], Iterable[Tuple[str, Variable]]] = {}) -> None
     237             : 
     238             :       Dataset initializer.
     239             : 
     240             :       Parameters
     241             :       ----------
     242             :       data:
     243             :           Dictionary of name and data pairs.
     244             :       coords:
     245             :           Dictionary of name and coord pairs.
     246             :       )doc");
     247           3 :   options.enable_function_signatures();
     248             : 
     249             :   dataset
     250           3 :       .def("__setitem__",
     251           4 :            [](Dataset &self, const std::string &name, const Variable &data) {
     252           4 :              self.setData(name, data);
     253           3 :            })
     254           3 :       .def("__setitem__",
     255          13 :            [](Dataset &self, const std::string &name, const DataArray &data) {
     256          13 :              self.setData(name, data);
     257          12 :            })
     258           3 :       .def("__delitem__", &Dataset::erase,
     259           0 :            py::call_guard<py::gil_scoped_release>())
     260           3 :       .def("clear", &Dataset::clear,
     261             :            R"(Removes all data, preserving coordinates.)");
     262             : 
     263           3 :   bind_dataset_view_methods(dataset);
     264           3 :   bind_dict_update(dataset,
     265          11 :                    [](Dataset &self, const std::string &key,
     266          11 :                       const DataArray &value) { self.setData(key, value); });
     267             : 
     268           3 :   bind_dataset_coord_properties(dataset);
     269           3 :   bind_dataset_properties(dataset);
     270             : 
     271           3 :   bind_slice_methods(dataset);
     272             : 
     273           3 :   bind_in_place_binary<Dataset>(dataset);
     274           3 :   bind_in_place_binary<DataArray>(dataset);
     275           3 :   bind_in_place_binary<Variable>(dataset);
     276           3 :   bind_in_place_binary_scalars(dataset);
     277           3 :   bind_in_place_binary_scalars(dataArray);
     278             : 
     279           3 :   bind_binary<Dataset>(dataset);
     280           3 :   bind_binary<DataArray>(dataset);
     281           3 :   bind_binary<Variable>(dataset);
     282             : 
     283           3 :   dataArray.def("_rename_dims", &rename_dims<DataArray>);
     284           3 :   dataset.def("_rename_dims", &rename_dims<Dataset>);
     285             : 
     286           3 :   m.def(
     287             :       "merge",
     288           1 :       [](const Dataset &lhs, const Dataset &rhs) {
     289           1 :         return dataset::merge(lhs, rhs);
     290             :       },
     291           0 :       py::arg("lhs"), py::arg("rhs"), py::call_guard<py::gil_scoped_release>());
     292             : 
     293           3 :   m.def(
     294             :       "irreducible_mask",
     295          63 :       [](const Masks &masks, const std::string &dim) {
     296          63 :         py::gil_scoped_release release;
     297          63 :         auto mask = irreducible_mask(masks, Dim{dim});
     298          63 :         py::gil_scoped_acquire acquire;
     299         189 :         return mask.is_valid() ? py::cast(mask) : py::none();
     300          63 :       },
     301           0 :       py::arg("masks"), py::arg("dim"));
     302             : 
     303           3 :   m.def(
     304           6 :       "reciprocal", [](const DataArray &self) { return reciprocal(self); },
     305           0 :       py::arg("x"), py::call_guard<py::gil_scoped_release>());
     306             : 
     307           3 :   bind_astype(dataArray);
     308             : 
     309           3 :   bind_rebin<DataArray>(m);
     310           3 :   bind_rebin<Dataset>(m);
     311           3 : }

Generated by: LCOV version 1.14