Line data Source code
1 : // SPDX-License-Identifier: BSD-3-Clause
2 : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3 : /// @file
4 : /// @author Simon Heybrock
5 : #include "scipp/dataset/bins.h"
6 : #include "scipp/dataset/dataset.h"
7 : #include "scipp/dataset/string.h"
8 : #include "scipp/variable/bin_array_variable.tcc"
9 : #include "scipp/variable/bins.h"
10 : #include "scipp/variable/string.h"
11 :
12 : namespace scipp::variable {
13 :
14 : namespace {
15 :
16 : template <class Key, class Value>
17 136 : std::string compact_dict_entry(const Key &key, const Value &var) {
18 136 : std::stringstream s;
19 136 : s << "'" << key << "':" << format_variable_compact(var);
20 272 : return s.str();
21 136 : }
22 :
23 : template <class Key, class Value>
24 : std::string
25 64 : dict_to_compact_string(const scipp::dataset::SizedDict<Key, Value> &dict,
26 : const std::string &description,
27 : const std::string &margin) {
28 64 : std::stringstream s;
29 64 : const scipp::index max_length = 70;
30 64 : const auto indent = margin.size() + description.size() + 2;
31 64 : s << margin << description << "={";
32 64 : bool first_iter = true;
33 64 : auto current_line_length = indent;
34 200 : for (const auto &[key, var] : dict) {
35 136 : if (current_line_length > max_length) {
36 18 : s << ",\n" << std::string(indent, ' ');
37 18 : current_line_length = indent;
38 18 : first_iter = true;
39 : }
40 136 : const auto append = compact_dict_entry(key, var);
41 136 : auto length = append.size();
42 136 : if (first_iter)
43 82 : first_iter = false;
44 : else {
45 54 : s << ", ";
46 54 : length += 2;
47 : }
48 136 : s << append;
49 136 : current_line_length += length;
50 : }
51 64 : s << "}";
52 128 : return s.str();
53 64 : }
54 : } // namespace
55 :
56 : template <>
57 46 : std::string Formatter<core::bin<DataArray>>::format(const Variable &var) const {
58 46 : const auto &[indices, dim, buffer] = var.constituents<DataArray>();
59 46 : std::string margin(10, ' ');
60 46 : std::stringstream s;
61 46 : s << "binned data: dim='" + to_string(dim) + "', content=DataArray(";
62 46 : s << "\n" << margin << "dims=" << to_string(buffer.dims()) << ',';
63 46 : s << "\n" << margin << "data=" << format_variable_compact(buffer.data());
64 46 : if (!buffer.coords().empty())
65 46 : s << ",\n" << dict_to_compact_string(buffer.coords(), "coords", margin);
66 46 : if (!buffer.masks().empty())
67 18 : s << ",\n" << dict_to_compact_string(buffer.masks(), "masks", margin);
68 46 : if (!buffer.attrs().empty())
69 0 : s << ",\n" << dict_to_compact_string(buffer.attrs(), "attrs", margin);
70 138 : return s.str() + ')';
71 46 : }
72 :
73 : INSTANTIATE_BIN_ARRAY_VARIABLE(DatasetView, Dataset)
74 : INSTANTIATE_BIN_ARRAY_VARIABLE(DataArrayView, DataArray)
75 :
76 : } // namespace scipp::variable
77 :
78 : namespace scipp::dataset {
79 :
80 : namespace {
81 30 : Variable apply_mask(const DataArray &buffer, const Variable &indices,
82 : const Dim dim, const Variable &mask, const FillValue fill) {
83 : return make_bins(
84 : indices, dim,
85 60 : where(mask, special_like(Variable(buffer.data(), Dimensions{}), fill),
86 60 : buffer.data()));
87 : }
88 : } // namespace
89 :
90 : class BinVariableMakerDataArray : public variable::BinVariableMaker<DataArray> {
91 : private:
92 49 : Variable call_make_bins(const Variable &parent, const Variable &indices,
93 : const Dim dim, const DType type,
94 : const Dimensions &dims, const units::Unit &unit,
95 : const bool variances) const override {
96 49 : const auto &source = buffer(parent);
97 49 : if (parent.dims() !=
98 : indices
99 : .dims()) // would need to select and copy slices from source coords
100 0 : throw std::runtime_error(
101 0 : "Shape changing operations with bucket<DataArray> not supported yet");
102 : // The only caller is BinVariableMaker::create, which should ensure that
103 : // indices and buffer size are valid and compatible.
104 : auto data_buffer =
105 49 : variable::variableFactory().create(type, dims, unit, variances);
106 : // If the buffer size is unchanged and input indices match output indices we
107 : // can use a cheap and simple copy of the buffer's coords and masks.
108 : // Otherwise we fall back to a copy via the binned views of the respective
109 : // content buffers.
110 144 : if (source.dims() == Dimensions{dim, dims.volume()} &&
111 95 : indices == parent.bin_indices()) {
112 92 : auto buffer = DataArray(std::move(data_buffer), copy(source.coords()),
113 184 : copy(source.masks()), copy(source.attrs()));
114 46 : return make_bins_no_validate(indices, dim, std::move(buffer));
115 46 : } else {
116 3 : auto buffer = resize_default_init(source, dim, dims.volume());
117 6 : auto out = make_bins_no_validate(indices, dim, std::move(buffer));
118 : // Note the inefficiency here: The data is copied, even though it will be
119 : // replaced and overwritten. Since this branch is a special case it is not
120 : // worth the effort to avoid this.
121 3 : copy(parent, out);
122 3 : out.bin_buffer<DataArray>().setData(std::move(data_buffer));
123 3 : return out;
124 3 : }
125 49 : }
126 17713 : const Variable &data(const Variable &var) const override {
127 17713 : return buffer(var).data();
128 : }
129 150 : Variable data(Variable &var) const override { return buffer(var).data(); }
130 :
131 : [[nodiscard]] Variable
132 8672 : apply_event_masks(const Variable &var, const FillValue fill) const override {
133 17344 : if (const auto mask_union = irreducible_event_mask(var);
134 8672 : mask_union.is_valid()) {
135 30 : const auto &&[indices, dim, buffer] = var.constituents<DataArray>();
136 30 : return apply_mask(buffer, indices, dim, mask_union, fill);
137 8702 : }
138 8642 : return var;
139 : }
140 :
141 : [[nodiscard]] Variable
142 8695 : irreducible_event_mask(const Variable &var) const override {
143 8695 : const auto &&[indices, dim, buffer] = var.constituents<DataArray>();
144 17390 : return irreducible_mask(buffer.masks(), dim);
145 8695 : }
146 : };
147 :
148 : /// This is currently a dummy implemented just to make `is_bins` work.
149 : class BinVariableMakerDataset
150 : : public variable::BinVariableMakerCommon<Dataset> {
151 0 : Variable create(const DType, const Dimensions &, const units::Unit &,
152 : const bool, const parent_list &) const override {
153 0 : throw std::runtime_error("not implemented");
154 : }
155 0 : Dim elem_dim(const Variable &) const override {
156 0 : throw std::runtime_error("undefined");
157 : }
158 0 : DType elem_dtype(const Variable &) const override {
159 0 : throw std::runtime_error("undefined");
160 : }
161 0 : units::Unit elem_unit(const Variable &) const override {
162 0 : throw std::runtime_error("undefined");
163 : }
164 0 : void expect_can_set_elem_unit(const Variable &,
165 : const units::Unit &) const override {
166 0 : throw std::runtime_error("undefined");
167 : }
168 0 : void set_elem_unit(Variable &, const units::Unit &) const override {
169 0 : throw std::runtime_error("undefined");
170 : }
171 0 : bool has_variances(const Variable &) const override {
172 0 : throw std::runtime_error("undefined");
173 : }
174 0 : [[nodiscard]] Variable apply_event_masks(const Variable &,
175 : const FillValue) const override {
176 0 : throw except::NotImplementedError(
177 0 : "Event masks for bins containing datasets are not supported.");
178 : }
179 : [[nodiscard]] Variable
180 0 : irreducible_event_mask(const Variable &) const override {
181 0 : throw except::NotImplementedError(
182 0 : "Event masks for bins containing datasets are not supported.");
183 : }
184 : };
185 :
186 : REGISTER_FORMATTER(bin_DataArray, core::bin<DataArray>)
187 : REGISTER_FORMATTER(bin_Dataset, core::bin<Dataset>)
188 :
189 : namespace {
190 : auto register_variable_maker_bucket_DataArray(
191 : (variable::variableFactory().emplace(
192 : dtype<bucket<DataArray>>,
193 : std::make_unique<BinVariableMakerDataArray>()),
194 : variable::variableFactory().emplace(
195 : dtype<bucket<Dataset>>, std::make_unique<BinVariableMakerDataset>()),
196 : 0));
197 : } // namespace
198 : } // namespace scipp::dataset
|