Coverage for install/scipp/format/formatter.py: 72%

1# SPDX-License-Identifier: BSD-3-Clause

3# @author Gregory Tucker, Jan-Lukas Wynen

5from collections.abc import Sequence

6from typing import Any

8import numpy as np

9import numpy.typing as npt

11import scipp

13from ..core.cpp_classes import DType, Unit, Variable

14from ..core.data_group import DataGroup

15from ._parse import FormatSpec, FormatType, Selection, parse

18def format_variable(self: Variable, format_spec: str) -> str:

19 """String formats the Variable according to the provided specification.

21 Parameters

22 ----------

23 format_spec:

24 Format specification;

25 only 'c' for Compact error-reporting supported at present.

27 Returns

28 -------

29 :

30 The formatted string.

31 """

33 spec = parse(format_spec, Variable)

34 return _VARIABLE_FORMATTERS[spec.format_type](self, spec)

37def _format_sizes(data: Variable) -> str:

38 return '(' + ', '.join(f'{dim}: {size}' for dim, size in data.sizes.items()) + ')'

41def _format_unit(data: Variable) -> str:

42 if data.unit is None:

43 return '<no unit>'

44 return f'[{data.unit}]'

47def _format_element(elem: Any, *, dtype: DType, spec: str) -> str:

48 if spec:

49 return f'{elem:{spec}}'

50 if dtype in (DType.float64, DType.float32):

51 # Replicate behavior of C++ formatter.

52 return f'{elem:g}'

53 if dtype == DType.string:

54 return f'"{elem}"'

55 return f'{elem}'

58def _format_scalar(data: Any, *, dtype: DType, spec: FormatSpec) -> str:

59 if spec.length == 0:

60 return '...'

61 return _format_element(data, dtype=dtype, spec=spec.nested)

64def _as_flat_array(data: npt.ArrayLike) -> npt.ArrayLike:

65 if isinstance(data, np.ndarray):

66 return data.flat

67 if 'ElementArray' in repr(type(data)):

68 return data

69 return np.array([data])

72def _format_array_flat(data: Any, *, dtype: DType, spec: FormatSpec) -> str:

73 if dtype in (

74 DType.Variable,

75 DType.DataArray,

76 DType.Dataset,

77 DType.VariableView,

78 DType.DataArrayView,

79 DType.DatasetView,

80 ):

81 return _format_array_flat_scipp_objects(data)

82 if dtype == DType.PyObject:

83 if 'ElementArray' in repr(type(data)):

84 # We can handle scalars of PyObject but not arrays.

85 return _format_array_flat_scipp_objects(data)

86 elif isinstance(data, DataGroup):

87 return _format_data_group_element(data)

88 data = _as_flat_array(data)

89 return _format_array_flat_regular(data, dtype=dtype, spec=spec)

92def _format_array_flat_scipp_objects(data: npt.ArrayLike) -> str:

93 # Fallback because ElementArrayView does not allow us to

94 # slice and access elements nicely.

95 return str(data)

98def _format_data_group_element(data: scipp.DataGroup) -> str:

99 return f'[{data}]'

100

101

102def _element_ranges(spec: FormatSpec) -> tuple[slice, slice]:

103 match spec.selection:

104 case Selection.edges:

105 return slice(None, spec.length // 2), slice(-spec.length // 2, None)

106 case Selection.begin:

107 return slice(None, spec.length), slice(0, 0)

108 case Selection.end:

109 return slice(0, 0), slice(-spec.length, None)

110

111

112def _format_array_flat_regular(

113 data: Sequence[Any], *, dtype: DType, spec: FormatSpec

114) -> str:

115 def _format_all_in(d: Sequence[Any]) -> list[str]:

116 return [_format_element(e, dtype=dtype, spec=spec.nested) for e in d]

117

118 if len(data) <= spec.length:

119 elements = _format_all_in(data)

120 elif spec.length == 0:

121 elements = ['...']

122 else:

123 left, right = _element_ranges(spec)

124 elements = []

125 if left != slice(0, 0):

126 elements.extend(_format_all_in(data[left]))

127 elements.append('...')

128 if right != slice(0, 0):

129 elements.extend(_format_all_in(data[right]))

130 return f'[{", ".join(elements)}]'

131

132

133def _format_variable_default(var: Variable, spec: FormatSpec) -> str:

134 dims = _format_sizes(var)

135 dtype = str(var.dtype)

136 unit = _format_unit(var)

137 if var.ndim == 0:

138 values = _format_scalar(var.value, dtype=var.dtype, spec=spec)

139 variances = (

140 _format_scalar(var.variance, dtype=var.dtype, spec=spec)

141 if var.variance is not None

142 else ''

143 )

144 else:

145 values = _format_array_flat(var.values, dtype=var.dtype, spec=spec)

146 variances = (

147 _format_array_flat(var.variances, dtype=var.dtype, spec=spec)

148 if var.variances is not None

149 else ''

150 )

151

152 return f'<scipp.Variable> {dims} {dtype:>9} {unit:>15} {values}' + (

153 ' ' + variances if variances else ''

154 )

155

156

157def _format_variable_compact(var: Variable, spec: FormatSpec) -> str:

158 if spec.has_nested or spec.has_length or spec.has_selection:

159 raise ValueError(f"Invalid format spec for compact formatter: '{spec}'")

160 if not _is_numeric(var.dtype):

161 raise ValueError(f"Compact formatting is not supported for dtype {var.dtype}")

162

163 values = var.values if var.shape else np.array((var.value,))

164 variances = var.variances if var.shape else np.array((var.variance,))

165 unt = "" if var.unit == Unit('dimensionless') else f" {var.unit}"

166

167 # Iterate over array values to handle no- and infinite-precision cases

168 if variances is None:

169 formatted = [_format_element_compact(v) for v in values]

170 else:

171 formatted = [

172 _format_element_compact(*_round(v, e))

173 for v, e in zip(values, variances, strict=True)

174 ]

175 return f"{', '.join(formatted)}{unt}"

176

177

178def _is_numeric(dtype: DType) -> bool:

179 return any(x in str(dtype) for x in ('float', 'int'))

180

181

182def _round(

183 value: float, variance: float | None

184) -> tuple[float, float | None, float | None]:

185 from numpy import floor, log10, power, round, sqrt

186

187 # Treat 'infinite' precision the same as no variance

188 if variance is None or variance == 0:

189 return value, None, None

190

191 # The uncertainty is the square root of the variance

192 error = sqrt(variance)

193

194 # Determine how many digits before (+) or after (-) the decimal place

195 # the error allows for one-digit uncertainty of precision

196 precision = floor(log10(error))

197

198 # By convention, if the first digit of the error rounds to 1,

199 # add an extra digit of precision, so there are two-digits of uncertainty

200 if round(error * power(10.0, -precision)) == 1:

201 precision -= 1

202

203 # Build powers of ten to enable rounding to the specified precision

204 negative_power = power(10.0, -precision)

205 positive_power = power(10.0, precision)

206

207 # Round the error, keeping the shifted value for the compact string

208 error = int(round(error * negative_power))

209 # Round the value, shifting back after rounding

210 value = round(value * negative_power) * positive_power

211

212 # If the precision is greater than that of 0.1

213 if precision > -1:

214 # pad the error to have the right number of trailing zeros

215 error *= int(positive_power)

216

217 return value, error, precision

218

219

220def _format_element_compact(

221 value: float, error: float | None = None, precision: float | None = None

222) -> str:

223 # Build the appropriate format string:

224 # No variance (or infinite precision) values take no formatting string

225 # Positive precision implies no decimals, with format '0.0f'

226 format = '' if precision is None else f'0.{max(0, int(-precision)):d}f'

227

228 # Format the value using the generated format string

229 formatted = "{v:{s}}".format(v=value, s=format)

230

231 # Append the error if there is non-infinite-precision variance

232 if error is not None:

233 formatted = f'{formatted}({error})'

234

235 return formatted

236

237

238_VARIABLE_FORMATTERS = {

239 FormatType.default: _format_variable_default,

240 FormatType.compact: _format_variable_compact,

241}