Coverage for install/scipp/format/formatter.py: 72%
118 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-12-01 01:59 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-12-01 01:59 +0000
1# SPDX-License-Identifier: BSD-3-Clause
2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3# @author Gregory Tucker, Jan-Lukas Wynen
5from collections.abc import Sequence
6from typing import Any
8import numpy as np
9import numpy.typing as npt
11import scipp
13from ..core.cpp_classes import DType, Unit, Variable
14from ..core.data_group import DataGroup
15from ._parse import FormatSpec, FormatType, Selection, parse
18def format_variable(self: Variable, format_spec: str) -> str:
19 """String formats the Variable according to the provided specification.
21 Parameters
22 ----------
23 format_spec:
24 Format specification;
25 only 'c' for Compact error-reporting supported at present.
27 Returns
28 -------
29 :
30 The formatted string.
31 """
33 spec = parse(format_spec, Variable)
34 return _VARIABLE_FORMATTERS[spec.format_type](self, spec)
37def _format_sizes(data: Variable) -> str:
38 return '(' + ', '.join(f'{dim}: {size}' for dim, size in data.sizes.items()) + ')'
41def _format_unit(data: Variable) -> str:
42 if data.unit is None:
43 return '<no unit>'
44 return f'[{data.unit}]'
47def _format_element(elem: Any, *, dtype: DType, spec: str) -> str:
48 if spec:
49 return f'{elem:{spec}}'
50 if dtype in (DType.float64, DType.float32):
51 # Replicate behavior of C++ formatter.
52 return f'{elem:g}'
53 if dtype == DType.string:
54 return f'"{elem}"'
55 return f'{elem}'
58def _format_scalar(data: Any, *, dtype: DType, spec: FormatSpec) -> str:
59 if spec.length == 0:
60 return '...'
61 return _format_element(data, dtype=dtype, spec=spec.nested)
64def _as_flat_array(data: npt.ArrayLike) -> npt.ArrayLike:
65 if isinstance(data, np.ndarray):
66 return data.flat
67 if 'ElementArray' in repr(type(data)):
68 return data
69 return np.array([data])
72def _format_array_flat(data: Any, *, dtype: DType, spec: FormatSpec) -> str:
73 if dtype in (
74 DType.Variable,
75 DType.DataArray,
76 DType.Dataset,
77 DType.VariableView,
78 DType.DataArrayView,
79 DType.DatasetView,
80 ):
81 return _format_array_flat_scipp_objects(data)
82 if dtype == DType.PyObject:
83 if 'ElementArray' in repr(type(data)):
84 # We can handle scalars of PyObject but not arrays.
85 return _format_array_flat_scipp_objects(data)
86 elif isinstance(data, DataGroup):
87 return _format_data_group_element(data)
88 data = _as_flat_array(data)
89 return _format_array_flat_regular(data, dtype=dtype, spec=spec)
92def _format_array_flat_scipp_objects(data: npt.ArrayLike) -> str:
93 # Fallback because ElementArrayView does not allow us to
94 # slice and access elements nicely.
95 return str(data)
98def _format_data_group_element(data: scipp.DataGroup) -> str:
99 return f'[{data}]'
102def _element_ranges(spec: FormatSpec) -> tuple[slice, slice]:
103 match spec.selection:
104 case Selection.edges:
105 return slice(None, spec.length // 2), slice(-spec.length // 2, None)
106 case Selection.begin:
107 return slice(None, spec.length), slice(0, 0)
108 case Selection.end:
109 return slice(0, 0), slice(-spec.length, None)
112def _format_array_flat_regular(
113 data: Sequence[Any], *, dtype: DType, spec: FormatSpec
114) -> str:
115 def _format_all_in(d: Sequence[Any]) -> list[str]:
116 return [_format_element(e, dtype=dtype, spec=spec.nested) for e in d]
118 if len(data) <= spec.length:
119 elements = _format_all_in(data)
120 elif spec.length == 0:
121 elements = ['...']
122 else:
123 left, right = _element_ranges(spec)
124 elements = []
125 if left != slice(0, 0):
126 elements.extend(_format_all_in(data[left]))
127 elements.append('...')
128 if right != slice(0, 0):
129 elements.extend(_format_all_in(data[right]))
130 return f'[{", ".join(elements)}]'
133def _format_variable_default(var: Variable, spec: FormatSpec) -> str:
134 dims = _format_sizes(var)
135 dtype = str(var.dtype)
136 unit = _format_unit(var)
137 if var.ndim == 0:
138 values = _format_scalar(var.value, dtype=var.dtype, spec=spec)
139 variances = (
140 _format_scalar(var.variance, dtype=var.dtype, spec=spec)
141 if var.variance is not None
142 else ''
143 )
144 else:
145 values = _format_array_flat(var.values, dtype=var.dtype, spec=spec)
146 variances = (
147 _format_array_flat(var.variances, dtype=var.dtype, spec=spec)
148 if var.variances is not None
149 else ''
150 )
152 return f'<scipp.Variable> {dims} {dtype:>9} {unit:>15} {values}' + (
153 ' ' + variances if variances else ''
154 )
157def _format_variable_compact(var: Variable, spec: FormatSpec) -> str:
158 if spec.has_nested or spec.has_length or spec.has_selection:
159 raise ValueError(f"Invalid format spec for compact formatter: '{spec}'")
160 if not _is_numeric(var.dtype):
161 raise ValueError(f"Compact formatting is not supported for dtype {var.dtype}")
163 values = var.values if var.shape else np.array((var.value,))
164 variances = var.variances if var.shape else np.array((var.variance,))
165 unt = "" if var.unit == Unit('dimensionless') else f" {var.unit}"
167 # Iterate over array values to handle no- and infinite-precision cases
168 if variances is None:
169 formatted = [_format_element_compact(v) for v in values]
170 else:
171 formatted = [
172 _format_element_compact(*_round(v, e))
173 for v, e in zip(values, variances, strict=True)
174 ]
175 return f"{', '.join(formatted)}{unt}"
178def _is_numeric(dtype: DType) -> bool:
179 return any(x in str(dtype) for x in ('float', 'int'))
182def _round(
183 value: float, variance: float | None
184) -> tuple[float, float | None, float | None]:
185 from numpy import floor, log10, power, round, sqrt
187 # Treat 'infinite' precision the same as no variance
188 if variance is None or variance == 0:
189 return value, None, None
191 # The uncertainty is the square root of the variance
192 error = sqrt(variance)
194 # Determine how many digits before (+) or after (-) the decimal place
195 # the error allows for one-digit uncertainty of precision
196 precision = floor(log10(error))
198 # By convention, if the first digit of the error rounds to 1,
199 # add an extra digit of precision, so there are two-digits of uncertainty
200 if round(error * power(10.0, -precision)) == 1:
201 precision -= 1
203 # Build powers of ten to enable rounding to the specified precision
204 negative_power = power(10.0, -precision)
205 positive_power = power(10.0, precision)
207 # Round the error, keeping the shifted value for the compact string
208 error = int(round(error * negative_power))
209 # Round the value, shifting back after rounding
210 value = round(value * negative_power) * positive_power
212 # If the precision is greater than that of 0.1
213 if precision > -1:
214 # pad the error to have the right number of trailing zeros
215 error *= int(positive_power)
217 return value, error, precision
220def _format_element_compact(
221 value: float, error: float | None = None, precision: float | None = None
222) -> str:
223 # Build the appropriate format string:
224 # No variance (or infinite precision) values take no formatting string
225 # Positive precision implies no decimals, with format '0.0f'
226 format = '' if precision is None else f'0.{max(0, int(-precision)):d}f'
228 # Format the value using the generated format string
229 formatted = "{v:{s}}".format(v=value, s=format)
231 # Append the error if there is non-infinite-precision variance
232 if error is not None:
233 formatted = f'{formatted}({error})'
235 return formatted
238_VARIABLE_FORMATTERS = {
239 FormatType.default: _format_variable_default,
240 FormatType.compact: _format_variable_compact,
241}