Line data Source code
1 : // SPDX-License-Identifier: BSD-3-Clause
2 : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3 : /// @file
4 : /// @author Simon Heybrock
5 : #include "scipp/dataset/bins.h"
6 : #include "scipp/dataset/dataset.h"
7 : #include "scipp/dataset/string.h"
8 : #include "scipp/variable/bin_array_variable.tcc"
9 : #include "scipp/variable/bins.h"
10 : #include "scipp/variable/string.h"
11 :
12 : namespace scipp::variable {
13 :
14 : namespace {
15 :
16 : template <class Key, class Value>
17 184 : std::string compact_dict_entry(const Key &key, const Value &var) {
18 184 : std::stringstream s;
19 184 : s << "'" << key << "':" << format_variable_compact(var);
20 368 : return s.str();
21 184 : }
22 :
23 : template <class Key, class Value>
24 : std::string
25 73 : dict_to_compact_string(const scipp::dataset::SizedDict<Key, Value> &dict,
26 : const std::string &description,
27 : const std::string &margin) {
28 73 : std::stringstream s;
29 73 : const scipp::index max_length = 70;
30 73 : const auto indent = margin.size() + description.size() + 2;
31 73 : s << margin << description << "={";
32 73 : bool first_iter = true;
33 73 : auto current_line_length = indent;
34 257 : for (const auto &[key, var] : dict) {
35 184 : if (current_line_length > max_length) {
36 24 : s << ",\n" << std::string(indent, ' ');
37 24 : current_line_length = indent;
38 24 : first_iter = true;
39 : }
40 184 : const auto append = compact_dict_entry(key, var);
41 184 : auto length = append.size();
42 184 : if (first_iter)
43 97 : first_iter = false;
44 : else {
45 87 : s << ", ";
46 87 : length += 2;
47 : }
48 184 : s << append;
49 184 : current_line_length += length;
50 : }
51 73 : s << "}";
52 146 : return s.str();
53 73 : }
54 : } // namespace
55 :
56 : template <>
57 46 : std::string Formatter<core::bin<DataArray>>::format(const Variable &var) const {
58 46 : const auto &[indices, dim, buffer] = var.constituents<DataArray>();
59 46 : std::string margin(10, ' ');
60 46 : std::stringstream s;
61 46 : s << "binned data: dim='" + to_string(dim) + "', content=DataArray(";
62 46 : s << "\n" << margin << "dims=" << to_string(buffer.dims()) << ',';
63 46 : s << "\n" << margin << "data=" << format_variable_compact(buffer.data());
64 46 : if (!buffer.coords().empty())
65 46 : s << ",\n" << dict_to_compact_string(buffer.coords(), "coords", margin);
66 46 : if (!buffer.masks().empty())
67 27 : s << ",\n" << dict_to_compact_string(buffer.masks(), "masks", margin);
68 46 : if (!buffer.attrs().empty())
69 0 : s << ",\n" << dict_to_compact_string(buffer.attrs(), "attrs", margin);
70 138 : return s.str() + ')';
71 46 : }
72 :
73 : INSTANTIATE_BIN_ARRAY_VARIABLE(DatasetView, Dataset)
74 : INSTANTIATE_BIN_ARRAY_VARIABLE(DataArrayView, DataArray)
75 :
76 : } // namespace scipp::variable
77 :
78 : namespace scipp::dataset {
79 :
80 : namespace {
81 30 : Variable apply_mask(const DataArray &buffer, const Variable &indices,
82 : const Dim dim, const Variable &mask, const FillValue fill) {
83 : return make_bins(
84 : indices, dim,
85 60 : where(mask, special_like(Variable(buffer.data(), Dimensions{}), fill),
86 60 : buffer.data()));
87 : }
88 : } // namespace
89 :
90 : class BinVariableMakerDataArray : public variable::BinVariableMaker<DataArray> {
91 : private:
92 49 : Variable call_make_bins(const Variable &parent, const Variable &indices,
93 : const Dim dim, const DType type,
94 : const Dimensions &dims, const units::Unit &unit,
95 : const bool variances) const override {
96 49 : const auto &source = buffer(parent);
97 49 : if (parent.dims() !=
98 : indices
99 : .dims()) // would need to select and copy slices from source coords
100 0 : throw std::runtime_error(
101 0 : "Shape changing operations with bucket<DataArray> not supported yet");
102 : // The only caller is BinVariableMaker::create, which should ensure that
103 : // indices and buffer size are valid and compatible.
104 : auto data_buffer =
105 49 : variable::variableFactory().create(type, dims, unit, variances);
106 : // If the buffer size is unchanged and input indices match output indices we
107 : // can use a cheap and simple copy of the buffer's coords and masks.
108 : // Otherwise we fall back to a copy via the binned views of the respective
109 : // content buffers.
110 144 : if (source.dims() == Dimensions{dim, dims.volume()} &&
111 95 : indices == parent.bin_indices()) {
112 92 : auto buffer = DataArray(std::move(data_buffer), copy(source.coords()),
113 184 : copy(source.masks()), copy(source.attrs()));
114 46 : return make_bins_no_validate(indices, dim, std::move(buffer));
115 46 : } else {
116 3 : auto buffer = resize_default_init(source, dim, dims.volume());
117 6 : auto out = make_bins_no_validate(indices, dim, std::move(buffer));
118 : // Note the inefficiency here: The data is copied, even though it will be
119 : // replaced and overwritten. Since this branch is a special case it is not
120 : // worth the effort to avoid this.
121 3 : copy(parent, out);
122 3 : out.bin_buffer<DataArray>().setData(std::move(data_buffer));
123 3 : return out;
124 3 : }
125 49 : }
126 16769 : const Variable &data(const Variable &var) const override {
127 16769 : return buffer(var).data();
128 : }
129 150 : Variable data(Variable &var) const override { return buffer(var).data(); }
130 :
131 : [[nodiscard]] Variable
132 8200 : apply_event_masks(const Variable &var, const FillValue fill) const override {
133 16400 : if (const auto mask_union = irreducible_event_mask(var);
134 8200 : mask_union.is_valid()) {
135 30 : const auto &&[indices, dim, buffer] = var.constituents<DataArray>();
136 30 : return apply_mask(buffer, indices, dim, mask_union, fill);
137 8230 : }
138 8170 : return var;
139 : }
140 :
141 : [[nodiscard]] Variable
142 8223 : irreducible_event_mask(const Variable &var) const override {
143 8223 : const auto &&[indices, dim, buffer] = var.constituents<DataArray>();
144 16446 : return irreducible_mask(buffer.masks(), dim);
145 8223 : }
146 : };
147 :
148 : /// This is currently a dummy implemented just to make `is_bins` work.
149 : class BinVariableMakerDataset
150 : : public variable::BinVariableMakerCommon<Dataset> {
151 0 : Variable create(const DType, const Dimensions &, const units::Unit &,
152 : const bool, const parent_list &) const override {
153 0 : throw std::runtime_error("not implemented");
154 : }
155 0 : Dim elem_dim(const Variable &) const override {
156 0 : throw std::runtime_error("undefined");
157 : }
158 0 : DType elem_dtype(const Variable &) const override {
159 0 : throw std::runtime_error("undefined");
160 : }
161 0 : units::Unit elem_unit(const Variable &) const override {
162 0 : throw std::runtime_error("undefined");
163 : }
164 0 : void expect_can_set_elem_unit(const Variable &,
165 : const units::Unit &) const override {
166 0 : throw std::runtime_error("undefined");
167 : }
168 0 : void set_elem_unit(Variable &, const units::Unit &) const override {
169 0 : throw std::runtime_error("undefined");
170 : }
171 0 : bool has_variances(const Variable &) const override {
172 0 : throw std::runtime_error("undefined");
173 : }
174 0 : [[nodiscard]] Variable apply_event_masks(const Variable &,
175 : const FillValue) const override {
176 0 : throw except::NotImplementedError(
177 0 : "Event masks for bins containing datasets are not supported.");
178 : }
179 : [[nodiscard]] Variable
180 0 : irreducible_event_mask(const Variable &) const override {
181 0 : throw except::NotImplementedError(
182 0 : "Event masks for bins containing datasets are not supported.");
183 : }
184 : };
185 :
186 : REGISTER_FORMATTER(bin_DataArray, core::bin<DataArray>)
187 : REGISTER_FORMATTER(bin_Dataset, core::bin<Dataset>)
188 :
189 : namespace {
190 : auto register_variable_maker_bucket_DataArray(
191 : (variable::variableFactory().emplace(
192 : dtype<bucket<DataArray>>,
193 : std::make_unique<BinVariableMakerDataArray>()),
194 : variable::variableFactory().emplace(
195 : dtype<bucket<Dataset>>, std::make_unique<BinVariableMakerDataset>()),
196 : 0));
197 : } // namespace
198 : } // namespace scipp::dataset
|