LCOV - code coverage report
Current view: top level - core/include/scipp/core/element - map_to_bins.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 43 52 82.7 %
Date: 2024-04-28 01:25:40 Functions: 21 121 17.4 %

          Line data    Source code
       1             : // SPDX-License-Identifier: BSD-3-Clause
       2             : // Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
       3             : /// @file
       4             : /// @author Simon Heybrock
       5             : #pragma once
       6             : #include <limits>
       7             : 
       8             : #include "scipp/common/overloaded.h"
       9             : #include "scipp/core/eigen.h"
      10             : #include "scipp/core/element/arg_list.h"
      11             : #include "scipp/core/element/util.h"
      12             : #include "scipp/core/histogram.h"
      13             : #include "scipp/core/subbin_sizes.h"
      14             : #include "scipp/core/time_point.h"
      15             : #include "scipp/core/transform_common.h"
      16             : 
      17             : namespace scipp::core::element {
      18             : 
      19      293918 : auto map_to_bins_direct = [](auto &binned, auto &bins, const auto &data,
      20             :                              const auto &bin_indices) {
      21      293918 :   const auto size = scipp::size(bin_indices);
      22             :   using T = std::decay_t<decltype(data)>;
      23   176759895 :   for (scipp::index i = 0; i < size; ++i) {
      24   176465977 :     const auto i_bin = bin_indices[i];
      25   176465977 :     if (i_bin < 0)
      26      854691 :       continue;
      27             :     if constexpr (is_ValueAndVariance_v<T>) {
      28      242179 :       binned.variance[bins[i_bin]] = data.variance[i];
      29      242179 :       binned.value[bins[i_bin]++] = data.value[i];
      30             :     } else {
      31   175369107 :       binned[bins[i_bin]++] = data[i];
      32             :     }
      33             :   }
      34      293918 : };
      35             : 
      36             : constexpr bool is_powerof2(int v) { return v && ((v & (v - 1)) == 0); }
      37             : 
      38             : template <int chunksize>
      39          40 : auto map_to_bins_chunkwise = [](auto &binned, auto &bins, const auto &data,
      40             :                                 const auto &bin_indices) {
      41             :   // compiler is smart for div or mod 2**N, otherwise this would be too slow
      42             :   static_assert(is_powerof2(chunksize));
      43             :   using InnerIndex = int16_t;
      44             :   static_assert(chunksize <= std::numeric_limits<InnerIndex>::max());
      45          40 :   const auto size = scipp::size(bin_indices);
      46             :   using T = std::decay_t<decltype(data)>;
      47             : 
      48             :   using Val =
      49             :       std::conditional_t<is_ValueAndVariance_v<T>, typename T::value_type, T>;
      50             :   // TODO Ideally this buffer would be reused (on a per-thread basis)
      51             :   // for every application of the kernel.
      52             :   std::vector<std::tuple<std::vector<typename Val::value_type>,
      53             :                          std::vector<InnerIndex>>>
      54          40 :       chunks((bins.size() - 1) / chunksize + 1);
      55        3932 :   for (scipp::index i = 0; i < size;) {
      56             :     // We operate in blocks so the size of the map of buffers, i.e.,
      57             :     // additional memory use of the algorithm, is bounded. This also
      58             :     // avoids costly allocations from resize operations.
      59        3892 :     const scipp::index max = std::min(size, i + scipp::size(bins) * 8);
      60             :     // 1. Map to chunks
      61    37107278 :     for (; i < max; ++i) {
      62    37103386 :       const auto i_bin = bin_indices[i];
      63    37103386 :       if (i_bin < 0)
      64     6009138 :         continue;
      65    31094248 :       const InnerIndex j = i_bin % chunksize;
      66    31094248 :       const auto i_chunk = i_bin / chunksize;
      67    31094248 :       auto &[vals, ind] = chunks[i_chunk];
      68             :       if constexpr (is_ValueAndVariance_v<T>) {
      69           0 :         vals.emplace_back(data.value[i]);
      70           0 :         vals.emplace_back(data.variance[i]);
      71             :       } else {
      72    31094248 :         vals.emplace_back(data[i]);
      73             :       }
      74    31094248 :       ind.emplace_back(j);
      75             :     }
      76             :     // 2. Map chunks to bins
      77       42804 :     for (scipp::index i_chunk = 0; i_chunk < scipp::size(chunks); ++i_chunk) {
      78       38912 :       auto &[vals, ind] = chunks[i_chunk];
      79    31133160 :       for (scipp::index j = 0; j < scipp::size(ind); ++j) {
      80    31094248 :         const auto i_bin = chunksize * i_chunk + ind[j];
      81             :         if constexpr (is_ValueAndVariance_v<T>) {
      82           0 :           binned.value[bins[i_bin]] = vals[2 * j];
      83           0 :           binned.variance[bins[i_bin]++] = vals[2 * j + 1];
      84             :         } else {
      85    31094248 :           binned[bins[i_bin]++] = vals[j];
      86             :         }
      87             :       }
      88       38912 :       vals.clear();
      89       38912 :       ind.clear();
      90             :     }
      91             :   }
      92          40 : };
      93             : 
      94             : // - Each span covers an *input* bin.
      95             : // - `offsets` Start indices of the output bins
      96             : // - `bin_indices` Target output bin index (within input bin)
      97             : template <class T, class Index>
      98             : using bin_arg = std::tuple<scipp::span<T>, SubbinSizes, scipp::span<const T>,
      99             :                            scipp::span<const Index>>;
     100             : static constexpr auto bin = overloaded{
     101             :     element::arg_list<
     102             :         bin_arg<double, int64_t>, bin_arg<double, int32_t>,
     103             :         bin_arg<float, int64_t>, bin_arg<float, int32_t>,
     104             :         bin_arg<int64_t, int64_t>, bin_arg<int64_t, int32_t>,
     105             :         bin_arg<int32_t, int64_t>, bin_arg<int32_t, int32_t>,
     106             :         bin_arg<bool, int64_t>, bin_arg<bool, int32_t>,
     107             :         bin_arg<Eigen::Vector3d, int64_t>, bin_arg<Eigen::Vector3d, int32_t>,
     108             :         bin_arg<std::string, int64_t>, bin_arg<std::string, int32_t>,
     109             :         bin_arg<time_point, int64_t>, bin_arg<time_point, int32_t>>,
     110             :     transform_flags::expect_in_variance_if_out_variance,
     111       30146 :     [](units::Unit &binned, const units::Unit &, const units::Unit &data,
     112       30146 :        const units::Unit &) { binned = data; },
     113      293958 :     [](const auto &binned, const auto &offsets, const auto &data,
     114             :        const auto &bin_indices) {
     115      293958 :       auto bins(offsets.sizes());
     116             :       // If there are many bins, we have two performance issues:
     117             :       // 1. `bins` is large and will not fit into L1, L2, or L3 cache.
     118             :       // 2. Writes to output are very random, implying a cache miss for every
     119             :       //    event.
     120             :       // We can avoid some of this issue by first sorting into chunks, then
     121             :       // chunks into bins. For example, instead of mapping directly to 65536
     122             :       // bins, we may map to 256 chunks, and each chunk to 256 bins.
     123      293958 :       const bool many_bins = bins.size() > 512;
     124      293958 :       const bool multiple_events_per_bin = bins.size() * 4 < bin_indices.size();
     125      293958 :       if (many_bins && multiple_events_per_bin) { // avoid overhead
     126          40 :         if (bins.size() <= 128 * 128)
     127          40 :           map_to_bins_chunkwise<128>(binned, bins, data, bin_indices);
     128           0 :         else if (bins.size() <= 256 * 256)
     129           0 :           map_to_bins_chunkwise<256>(binned, bins, data, bin_indices);
     130           0 :         else if (bins.size() <= 512 * 512)
     131           0 :           map_to_bins_chunkwise<512>(binned, bins, data, bin_indices);
     132             :         else
     133           0 :           map_to_bins_chunkwise<1024>(binned, bins, data, bin_indices);
     134             :       } else {
     135      293918 :         map_to_bins_direct(binned, bins, data, bin_indices);
     136             :       }
     137      293958 :     }};
     138             : 
     139             : } // namespace scipp::core::element

Generated by: LCOV version 1.14