Coverage for install/scipp/format/formatter.py: 72%

118 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-12-01 01:59 +0000

1# SPDX-License-Identifier: BSD-3-Clause 

2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) 

3# @author Gregory Tucker, Jan-Lukas Wynen 

4 

5from collections.abc import Sequence 

6from typing import Any 

7 

8import numpy as np 

9import numpy.typing as npt 

10 

11import scipp 

12 

13from ..core.cpp_classes import DType, Unit, Variable 

14from ..core.data_group import DataGroup 

15from ._parse import FormatSpec, FormatType, Selection, parse 

16 

17 

18def format_variable(self: Variable, format_spec: str) -> str: 

19 """String formats the Variable according to the provided specification. 

20 

21 Parameters 

22 ---------- 

23 format_spec: 

24 Format specification; 

25 only 'c' for Compact error-reporting supported at present. 

26 

27 Returns 

28 ------- 

29 : 

30 The formatted string. 

31 """ 

32 

33 spec = parse(format_spec, Variable) 

34 return _VARIABLE_FORMATTERS[spec.format_type](self, spec) 

35 

36 

37def _format_sizes(data: Variable) -> str: 

38 return '(' + ', '.join(f'{dim}: {size}' for dim, size in data.sizes.items()) + ')' 

39 

40 

41def _format_unit(data: Variable) -> str: 

42 if data.unit is None: 

43 return '<no unit>' 

44 return f'[{data.unit}]' 

45 

46 

47def _format_element(elem: Any, *, dtype: DType, spec: str) -> str: 

48 if spec: 

49 return f'{elem:{spec}}' 

50 if dtype in (DType.float64, DType.float32): 

51 # Replicate behavior of C++ formatter. 

52 return f'{elem:g}' 

53 if dtype == DType.string: 

54 return f'"{elem}"' 

55 return f'{elem}' 

56 

57 

58def _format_scalar(data: Any, *, dtype: DType, spec: FormatSpec) -> str: 

59 if spec.length == 0: 

60 return '...' 

61 return _format_element(data, dtype=dtype, spec=spec.nested) 

62 

63 

64def _as_flat_array(data: npt.ArrayLike) -> npt.ArrayLike: 

65 if isinstance(data, np.ndarray): 

66 return data.flat 

67 if 'ElementArray' in repr(type(data)): 

68 return data 

69 return np.array([data]) 

70 

71 

72def _format_array_flat(data: Any, *, dtype: DType, spec: FormatSpec) -> str: 

73 if dtype in ( 

74 DType.Variable, 

75 DType.DataArray, 

76 DType.Dataset, 

77 DType.VariableView, 

78 DType.DataArrayView, 

79 DType.DatasetView, 

80 ): 

81 return _format_array_flat_scipp_objects(data) 

82 if dtype == DType.PyObject: 

83 if 'ElementArray' in repr(type(data)): 

84 # We can handle scalars of PyObject but not arrays. 

85 return _format_array_flat_scipp_objects(data) 

86 elif isinstance(data, DataGroup): 

87 return _format_data_group_element(data) 

88 data = _as_flat_array(data) 

89 return _format_array_flat_regular(data, dtype=dtype, spec=spec) 

90 

91 

92def _format_array_flat_scipp_objects(data: npt.ArrayLike) -> str: 

93 # Fallback because ElementArrayView does not allow us to 

94 # slice and access elements nicely. 

95 return str(data) 

96 

97 

98def _format_data_group_element(data: scipp.DataGroup) -> str: 

99 return f'[{data}]' 

100 

101 

102def _element_ranges(spec: FormatSpec) -> tuple[slice, slice]: 

103 match spec.selection: 

104 case Selection.edges: 

105 return slice(None, spec.length // 2), slice(-spec.length // 2, None) 

106 case Selection.begin: 

107 return slice(None, spec.length), slice(0, 0) 

108 case Selection.end: 

109 return slice(0, 0), slice(-spec.length, None) 

110 

111 

112def _format_array_flat_regular( 

113 data: Sequence[Any], *, dtype: DType, spec: FormatSpec 

114) -> str: 

115 def _format_all_in(d: Sequence[Any]) -> list[str]: 

116 return [_format_element(e, dtype=dtype, spec=spec.nested) for e in d] 

117 

118 if len(data) <= spec.length: 

119 elements = _format_all_in(data) 

120 elif spec.length == 0: 

121 elements = ['...'] 

122 else: 

123 left, right = _element_ranges(spec) 

124 elements = [] 

125 if left != slice(0, 0): 

126 elements.extend(_format_all_in(data[left])) 

127 elements.append('...') 

128 if right != slice(0, 0): 

129 elements.extend(_format_all_in(data[right])) 

130 return f'[{", ".join(elements)}]' 

131 

132 

133def _format_variable_default(var: Variable, spec: FormatSpec) -> str: 

134 dims = _format_sizes(var) 

135 dtype = str(var.dtype) 

136 unit = _format_unit(var) 

137 if var.ndim == 0: 

138 values = _format_scalar(var.value, dtype=var.dtype, spec=spec) 

139 variances = ( 

140 _format_scalar(var.variance, dtype=var.dtype, spec=spec) 

141 if var.variance is not None 

142 else '' 

143 ) 

144 else: 

145 values = _format_array_flat(var.values, dtype=var.dtype, spec=spec) 

146 variances = ( 

147 _format_array_flat(var.variances, dtype=var.dtype, spec=spec) 

148 if var.variances is not None 

149 else '' 

150 ) 

151 

152 return f'<scipp.Variable> {dims} {dtype:>9} {unit:>15} {values}' + ( 

153 ' ' + variances if variances else '' 

154 ) 

155 

156 

157def _format_variable_compact(var: Variable, spec: FormatSpec) -> str: 

158 if spec.has_nested or spec.has_length or spec.has_selection: 

159 raise ValueError(f"Invalid format spec for compact formatter: '{spec}'") 

160 if not _is_numeric(var.dtype): 

161 raise ValueError(f"Compact formatting is not supported for dtype {var.dtype}") 

162 

163 values = var.values if var.shape else np.array((var.value,)) 

164 variances = var.variances if var.shape else np.array((var.variance,)) 

165 unt = "" if var.unit == Unit('dimensionless') else f" {var.unit}" 

166 

167 # Iterate over array values to handle no- and infinite-precision cases 

168 if variances is None: 

169 formatted = [_format_element_compact(v) for v in values] 

170 else: 

171 formatted = [ 

172 _format_element_compact(*_round(v, e)) 

173 for v, e in zip(values, variances, strict=True) 

174 ] 

175 return f"{', '.join(formatted)}{unt}" 

176 

177 

178def _is_numeric(dtype: DType) -> bool: 

179 return any(x in str(dtype) for x in ('float', 'int')) 

180 

181 

182def _round( 

183 value: float, variance: float | None 

184) -> tuple[float, float | None, float | None]: 

185 from numpy import floor, log10, power, round, sqrt 

186 

187 # Treat 'infinite' precision the same as no variance 

188 if variance is None or variance == 0: 

189 return value, None, None 

190 

191 # The uncertainty is the square root of the variance 

192 error = sqrt(variance) 

193 

194 # Determine how many digits before (+) or after (-) the decimal place 

195 # the error allows for one-digit uncertainty of precision 

196 precision = floor(log10(error)) 

197 

198 # By convention, if the first digit of the error rounds to 1, 

199 # add an extra digit of precision, so there are two-digits of uncertainty 

200 if round(error * power(10.0, -precision)) == 1: 

201 precision -= 1 

202 

203 # Build powers of ten to enable rounding to the specified precision 

204 negative_power = power(10.0, -precision) 

205 positive_power = power(10.0, precision) 

206 

207 # Round the error, keeping the shifted value for the compact string 

208 error = int(round(error * negative_power)) 

209 # Round the value, shifting back after rounding 

210 value = round(value * negative_power) * positive_power 

211 

212 # If the precision is greater than that of 0.1 

213 if precision > -1: 

214 # pad the error to have the right number of trailing zeros 

215 error *= int(positive_power) 

216 

217 return value, error, precision 

218 

219 

220def _format_element_compact( 

221 value: float, error: float | None = None, precision: float | None = None 

222) -> str: 

223 # Build the appropriate format string: 

224 # No variance (or infinite precision) values take no formatting string 

225 # Positive precision implies no decimals, with format '0.0f' 

226 format = '' if precision is None else f'0.{max(0, int(-precision)):d}f' 

227 

228 # Format the value using the generated format string 

229 formatted = "{v:{s}}".format(v=value, s=format) 

230 

231 # Append the error if there is non-infinite-precision variance 

232 if error is not None: 

233 formatted = f'{formatted}({error})' 

234 

235 return formatted 

236 

237 

238_VARIABLE_FORMATTERS = { 

239 FormatType.default: _format_variable_default, 

240 FormatType.compact: _format_variable_compact, 

241}