LCOV - code coverage report
Current view: top level - python - dataset.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 173 188 92.0 %
Date: 2024-12-01 01:56:34 Functions: 32 33 97.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: BSD-3-Clause
       2             : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
       3             : /// @file
       4             : /// @author Simon Heybrock
       5             : 
       6             : #include "scipp/dataset/dataset.h"
       7             : #include "scipp/dataset/math.h"
       8             : #include "scipp/dataset/rebin.h"
       9             : #include "scipp/dataset/sized_dict.h"
      10             : 
      11             : #include "bind_data_access.h"
      12             : #include "bind_data_array.h"
      13             : #include "bind_operators.h"
      14             : #include "bind_slice_methods.h"
      15             : #include "pybind11.h"
      16             : #include "rename.h"
      17             : 
      18             : using namespace scipp;
      19             : using namespace scipp::dataset;
      20             : 
      21             : namespace py = pybind11;
      22             : 
      23             : namespace {
      24             : template <class T, class... Ignored>
      25           3 : void bind_dataset_properties(py::class_<T, Ignored...> &c) {
      26           3 :   c.def("drop_coords", [](T &self, const std::string &coord_name) {
      27           4 :     std::vector<scipp::Dim> coord_names_c = {scipp::Dim{coord_name}};
      28           8 :     return self.drop_coords(coord_names_c);
      29           4 :   });
      30           3 :   c.def("drop_coords",
      31           3 :         [](T &self, const std::vector<std::string> &coord_names) {
      32           3 :           std::vector<scipp::Dim> coord_names_c;
      33           3 :           std::transform(coord_names.begin(), coord_names.end(),
      34             :                          std::back_inserter(coord_names_c),
      35           6 :                          [](const auto &name) { return scipp::Dim{name}; });
      36           6 :           return self.drop_coords(coord_names_c);
      37           3 :         });
      38           3 : }
      39             : 
      40             : template <class T, class... Ignored>
      41           3 : void bind_dataset_coord_properties(py::class_<T, Ignored...> &c) {
      42             :   // TODO does this comment still apply?
      43             :   // For some reason the return value policy and/or keep-alive policy do not
      44             :   // work unless we wrap things in py::cpp_function.
      45           3 :   c.def_property_readonly(
      46         716 :       "coords", [](T &self) -> decltype(auto) { return self.coords(); },
      47             :       R"(
      48             :       Dict of coordinates.)");
      49             :   // Metadata for dataset is same as `coords` since dataset cannot have attrs
      50             :   // (unaligned coords).
      51           3 :   c.def_property_readonly(
      52           0 :       "meta", [](T &self) -> decltype(auto) { return self.meta(); },
      53             :       R"(
      54             :       Dict of coordinates.)");
      55           3 : }
      56             : 
      57             : template <class... Ignored>
      58           3 : void bind_dataset_view_methods(py::class_<Dataset, Ignored...> &c) {
      59           3 :   bind_common_operators(c);
      60           3 :   c.def("__len__", &Dataset::size);
      61           0 :   c.def(
      62             :       "__iter__",
      63           4 :       [](const Dataset &self) {
      64             :         return py::make_iterator(self.keys_begin(), self.keys_end(),
      65           4 :                                  py::return_value_policy::move);
      66             :       },
      67           3 :       py::return_value_policy::move, py::keep_alive<0, 1>());
      68           0 :   c.def(
      69         223 :       "keys", [](Dataset &self) { return keys_view(self); },
      70           3 :       py::return_value_policy::move, py::keep_alive<0, 1>(),
      71             :       R"(view on self's keys)");
      72           0 :   c.def(
      73          70 :       "values", [](Dataset &self) { return values_view(self); },
      74           3 :       py::return_value_policy::move, py::keep_alive<0, 1>(),
      75             :       R"(view on self's values)");
      76           0 :   c.def(
      77         507 :       "items", [](Dataset &self) { return items_view(self); },
      78           3 :       py::return_value_policy::move, py::keep_alive<0, 1>(),
      79             :       R"(view on self's items)");
      80           3 :   c.def("__getitem__", [](const Dataset &self, const std::string &name) {
      81         383 :     return self[name];
      82             :   });
      83           3 :   c.def("__contains__", [](const Dataset &self, const py::handle &key) {
      84             :     try {
      85          19 :       return self.contains(key.cast<std::string>());
      86           0 :     } catch (py::cast_error &) {
      87           0 :       return false; // if `key` is not a string, it cannot be contained
      88             :     }
      89             :   });
      90           3 :   c.def("_ipython_key_completions_", [](Dataset &self) {
      91           1 :     py::typing::List<py::str> out;
      92           1 :     const auto end = self.keys_end();
      93           3 :     for (auto it = self.keys_begin(); it != end; ++it) {
      94           2 :       out.append(*it);
      95             :     }
      96           2 :     return out;
      97           0 :   });
      98           3 :   bind_common_data_properties(c);
      99           3 :   bind_pop(c);
     100           3 : }
     101             : 
     102             : template <class T, class... Ignored>
     103           3 : void bind_data_array(py::class_<T, Ignored...> &c) {
     104           3 :   bind_data_array_properties(c);
     105           3 :   bind_common_operators(c);
     106           3 :   bind_data_properties(c);
     107           3 :   bind_slice_methods(c);
     108           3 :   bind_in_place_binary<DataArray>(c);
     109           3 :   bind_in_place_binary<Variable>(c);
     110           3 :   bind_binary<Dataset>(c);
     111           3 :   bind_binary<DataArray>(c);
     112           3 :   bind_binary<Variable>(c);
     113           3 :   bind_binary_scalars(c);
     114           3 :   bind_reverse_binary_scalars(c);
     115           3 :   bind_comparison<DataArray>(c);
     116           3 :   bind_comparison<Variable>(c);
     117           3 :   bind_unary(c);
     118           3 :   bind_logical<DataArray>(c);
     119           3 :   bind_logical<Variable>(c);
     120           3 :   bind_boolean_unary(c);
     121           3 : }
     122             : 
     123           6 : template <class T> void bind_rebin(py::module &m) {
     124           6 :   m.def(
     125             :       "rebin",
     126           5 :       [](const T &x, const std::string &dim, const Variable &bins) {
     127           5 :         return rebin(x, Dim{dim}, bins);
     128             :       },
     129           0 :       py::arg("x"), py::arg("dim"), py::arg("bins"),
     130           6 :       py::call_guard<py::gil_scoped_release>());
     131           6 : }
     132             : 
     133       30735 : template <class Key, class Value> auto to_cpp_dict(const py::dict &dict) {
     134       30735 :   core::Dict<Key, Value> out;
     135       40183 :   for (const auto &[key, val] : dict) {
     136        9448 :     out.insert_or_assign(Key{key.template cast<std::string>()},
     137             :                          val.template cast<Value &>());
     138             :   }
     139       30735 :   return out;
     140           0 : }
     141             : 
     142         798 : auto dataset_from_data_and_coords(const py::dict &data,
     143             :                                   const py::dict &coords) {
     144         798 :   Dataset d;
     145        1757 :   for (auto &&[name, item] : data) {
     146         959 :     if (py::isinstance<DataArray>(item)) {
     147         473 :       d.setDataInit(name.cast<std::string>(), item.cast<DataArray &>());
     148             :     } else {
     149         486 :       d.setDataInit(name.cast<std::string>(), item.cast<Variable &>());
     150             :     }
     151             :   }
     152         798 :   if (d.is_valid()) {
     153             :     // Need to use dataset_from_coords when there is no data to initialize
     154             :     // dimensions properly.
     155        1008 :     for (auto &&[dim, coord] : coords)
     156         227 :       d.setCoord(Dim{dim.cast<std::string>()}, coord.cast<Variable &>());
     157             :   }
     158         798 :   return d;
     159           0 : }
     160             : 
     161          17 : auto dataset_from_coords(const py::dict &py_coords) {
     162          17 :   typename Coords::holder_type coords;
     163          47 :   for (auto &&[dim, coord] : py_coords)
     164          30 :     coords.insert_or_assign(Dim{dim.cast<std::string>()},
     165             :                             coord.cast<Variable &>());
     166          35 :   return Dataset({}, std::move(coords));
     167          17 : }
     168             : } // namespace
     169             : 
     170           3 : void init_dataset(py::module &m) {
     171           3 :   static_cast<void>(py::class_<Slice>(m, "Slice"));
     172             : 
     173           3 :   bind_helper_view<items_view, Dataset>(m, "Dataset");
     174           3 :   bind_helper_view<str_items_view, Coords>(m, "Coords");
     175           3 :   bind_helper_view<items_view, Masks>(m, "Masks");
     176           3 :   bind_helper_view<keys_view, Dataset>(m, "Dataset");
     177           3 :   bind_helper_view<str_keys_view, Coords>(m, "Coords");
     178           3 :   bind_helper_view<keys_view, Masks>(m, "Masks");
     179           3 :   bind_helper_view<values_view, Dataset>(m, "Dataset");
     180           3 :   bind_helper_view<values_view, Coords>(m, "Coords");
     181           3 :   bind_helper_view<values_view, Masks>(m, "Masks");
     182             : 
     183           3 :   bind_mutable_view_no_dim<Coords>(m, "Coords",
     184             :                                    R"(dict-like collection of meta data
     185             : 
     186             : Returned by :py:func:`DataArray.coords`, :py:func:`DataArray.attrs`, :py:func:`DataArray.meta`,
     187             : and the corresponding properties of :py:class:`Dataset`.)");
     188           3 :   bind_mutable_view<Masks>(m, "Masks", R"(dict-like collection of masks.
     189             : 
     190             : Returned by :py:func:`DataArray.masks`)");
     191             : 
     192             :   py::class_<DataArray> dataArray(m, "DataArray", R"(
     193           3 :     Named variable with associated coords, masks, and attributes.)");
     194           3 :   py::options options;
     195           3 :   options.disable_function_signatures();
     196           6 :   dataArray.def(
     197           3 :       py::init([](const Variable &data, const py::object &coords,
     198             :                   const py::object &masks, const py::object &attrs,
     199             :                   const std::string &name) {
     200       20496 :         return DataArray{data, to_cpp_dict<Dim, Variable>(coords),
     201       20496 :                          to_cpp_dict<std::string, Variable>(masks),
     202       40983 :                          to_cpp_dict<Dim, Variable>(attrs), name};
     203             :       }),
     204           6 :       py::arg("data"), py::kw_only(), py::arg("coords") = py::dict(),
     205           6 :       py::arg("masks") = py::dict(), py::arg("attrs") = py::dict(),
     206           6 :       py::arg("name") = std::string{},
     207             :       R"doc(__init__(self, data: Variable, coords: Union[Mapping[str, Variable], Iterable[tuple[str, Variable]]] = {}, masks: Union[Mapping[str, Variable], Iterable[tuple[str, Variable]]] = {}, attrs: Union[Mapping[str, Variable], Iterable[tuple[str, Variable]]] = {}, name: str = '') -> None
     208             : 
     209             :           DataArray initializer.
     210             : 
     211             :           Parameters
     212             :           ----------
     213             :           data:
     214             :               Data and optionally variances.
     215             :           coords:
     216             :               Coordinates referenced by dimension.
     217             :           masks:
     218             :               Masks referenced by name.
     219             :           attrs:
     220             :               Attributes referenced by dimension.
     221             :           name:
     222             :               Name of the data array.
     223             :           )doc");
     224           3 :   options.enable_function_signatures();
     225             : 
     226           3 :   bind_data_array(dataArray);
     227             : 
     228             :   py::class_<Dataset> dataset(m, "Dataset", R"(
     229           3 :   Dict of data arrays with aligned dimensions.)");
     230           3 :   options.disable_function_signatures();
     231           3 :   dataset.def(
     232           3 :       py::init([](const py::object &data, const py::object &coords) {
     233         799 :         if (data.is_none() && coords.is_none())
     234           1 :           throw py::type_error("Dataset needs data or coordinates or both.");
     235         798 :         const auto data_dict = data.is_none() ? py::dict() : py::dict(data);
     236             :         const auto coords_dict =
     237         798 :             coords.is_none() ? py::dict() : py::dict(coords);
     238         798 :         auto d = dataset_from_data_and_coords(data_dict, coords_dict);
     239        1595 :         return d.is_valid() ? d : dataset_from_coords(coords_dict);
     240         800 :       }),
     241           6 :       py::arg("data") = py::none{}, py::kw_only(),
     242           6 :       py::arg("coords") = py::none{},
     243             :       R"doc(__init__(self, data: Union[Mapping[str, Union[Variable, DataArray]], Iterable[tuple[str, Union[Variable, DataArray]]]] = {}, coords: Union[Mapping[str, Variable], Iterable[tuple[str, Variable]]] = {}) -> None
     244             : 
     245             :       Dataset initializer.
     246             : 
     247             :       Parameters
     248             :       ----------
     249             :       data:
     250             :           Dictionary of name and data pairs.
     251             :       coords:
     252             :           Dictionary of name and coord pairs.
     253             :       )doc");
     254           3 :   options.enable_function_signatures();
     255             : 
     256             :   dataset
     257           3 :       .def("__setitem__",
     258           4 :            [](Dataset &self, const std::string &name, const Variable &data) {
     259           4 :              self.setData(name, data);
     260           3 :            })
     261           3 :       .def("__setitem__",
     262          13 :            [](Dataset &self, const std::string &name, const DataArray &data) {
     263          13 :              self.setData(name, data);
     264          12 :            })
     265           3 :       .def("__delitem__", &Dataset::erase,
     266           0 :            py::call_guard<py::gil_scoped_release>())
     267           3 :       .def("clear", &Dataset::clear,
     268             :            R"(Removes all data, preserving coordinates.)");
     269             : 
     270           3 :   bind_dataset_view_methods(dataset);
     271           3 :   bind_dict_update(dataset,
     272          11 :                    [](Dataset &self, const std::string &key,
     273          11 :                       const DataArray &value) { self.setData(key, value); });
     274             : 
     275           3 :   bind_dataset_coord_properties(dataset);
     276           3 :   bind_dataset_properties(dataset);
     277             : 
     278           3 :   bind_slice_methods(dataset);
     279             : 
     280           3 :   bind_in_place_binary<Dataset>(dataset);
     281           3 :   bind_in_place_binary<DataArray>(dataset);
     282           3 :   bind_in_place_binary<Variable>(dataset);
     283           3 :   bind_in_place_binary_scalars(dataset);
     284           3 :   bind_in_place_binary_scalars(dataArray);
     285             : 
     286           3 :   bind_binary<Dataset>(dataset);
     287           3 :   bind_binary<DataArray>(dataset);
     288           3 :   bind_binary<Variable>(dataset);
     289           3 :   bind_binary_scalars(dataset);
     290             : 
     291           3 :   dataArray.def("_rename_dims", &rename_dims<DataArray>);
     292           3 :   dataset.def("_rename_dims", &rename_dims<Dataset>);
     293             : 
     294           3 :   m.def(
     295             :       "merge",
     296           1 :       [](const Dataset &lhs, const Dataset &rhs) {
     297           1 :         return dataset::merge(lhs, rhs);
     298             :       },
     299           0 :       py::arg("lhs"), py::arg("rhs"), py::call_guard<py::gil_scoped_release>());
     300             : 
     301           3 :   m.def(
     302             :       "irreducible_mask",
     303          63 :       [](const Masks &masks, const std::string &dim) {
     304          63 :         py::gil_scoped_release release;
     305          63 :         auto mask = irreducible_mask(masks, Dim{dim});
     306          63 :         py::gil_scoped_acquire acquire;
     307         189 :         return mask.is_valid() ? py::cast(mask) : py::none();
     308          63 :       },
     309           0 :       py::arg("masks"), py::arg("dim"));
     310             : 
     311           3 :   m.def(
     312           6 :       "reciprocal", [](const DataArray &self) { return reciprocal(self); },
     313           0 :       py::arg("x"), py::call_guard<py::gil_scoped_release>());
     314             : 
     315           3 :   bind_astype(dataArray);
     316             : 
     317           3 :   bind_rebin<DataArray>(m);
     318           3 :   bind_rebin<Dataset>(m);
     319           3 : }

Generated by: LCOV version 1.14