Coverage for install/scipp/format/formatter.py: 73%

116 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-04-28 01:28 +0000

1# SPDX-License-Identifier: BSD-3-Clause 

2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) 

3# @author Gregory Tucker, Jan-Lukas Wynen 

4 

5from typing import Any, List, Tuple 

6 

7import numpy as np 

8 

9import scipp 

10 

11from ..core.cpp_classes import DType, Unit, Variable 

12from ..core.data_group import DataGroup 

13from ._parse import FormatSpec, FormatType, parse 

14 

15 

16def format_variable(self, format_spec: str) -> str: 

17 """String formats the Variable according to the provided specification. 

18 

19 Parameters 

20 ---------- 

21 format_spec: 

22 Format specification; 

23 only 'c' for Compact error-reporting supported at present. 

24 

25 Returns 

26 ------- 

27 : 

28 The formatted string. 

29 """ 

30 

31 spec = parse(format_spec, Variable) 

32 return _VARIABLE_FORMATTERS[spec.format_type](self, spec) 

33 

34 

35def _format_sizes(data: Variable) -> str: 

36 return '(' + ', '.join(f'{dim}: {size}' for dim, size in data.sizes.items()) + ')' 

37 

38 

39def _format_unit(data: Variable) -> str: 

40 if data.unit is None: 

41 return '<no unit>' 

42 return f'[{data.unit}]' 

43 

44 

45def _format_element(elem: Any, *, dtype: DType, spec: str): 

46 if spec: 

47 return f'{elem:{spec}}' 

48 if dtype in (DType.float64, DType.float32): 

49 # Replicate behavior of C++ formatter. 

50 return f'{elem:g}' 

51 if dtype == DType.string: 

52 return f'"{elem}"' 

53 return f'{elem}' 

54 

55 

56def _format_scalar(data: Any, *, dtype: DType, spec: FormatSpec) -> str: 

57 if spec.length == 0: 

58 return '...' 

59 return _format_element(data, dtype=dtype, spec=spec.nested) 

60 

61 

62def _as_flat_array(data): 

63 if isinstance(data, np.ndarray): 

64 return data.flat 

65 if 'ElementArray' in repr(type(data)): 

66 return data 

67 return np.array([data]) 

68 

69 

70def _format_array_flat(data, *, dtype: DType, spec: FormatSpec) -> str: 

71 if dtype in ( 

72 DType.Variable, 

73 DType.DataArray, 

74 DType.Dataset, 

75 DType.VariableView, 

76 DType.DataArrayView, 

77 DType.DatasetView, 

78 ): 

79 return _format_array_flat_scipp_objects(data) 

80 if dtype == DType.PyObject: 

81 if 'ElementArray' in repr(type(data)): 

82 # We can handle scalars of PyObject but not arrays. 

83 return _format_array_flat_scipp_objects(data) 

84 elif isinstance(data, DataGroup): 

85 return _format_data_group_element(data) 

86 data = _as_flat_array(data) 

87 return _format_array_flat_regular(data, dtype=dtype, spec=spec) 

88 

89 

90def _format_array_flat_scipp_objects(data) -> str: 

91 # Fallback because ElementArrayView does not allow us to 

92 # slice and access elements nicely. 

93 return str(data) 

94 

95 

96def _format_data_group_element(data: scipp.DataGroup): 

97 return f'[{data}]' 

98 

99 

100def _element_ranges(spec: FormatSpec) -> Tuple[slice, slice]: 

101 if spec.selection == '^': 

102 return slice(None, spec.length // 2), slice(-spec.length // 2, None) 

103 if spec.selection == '<': 

104 return slice(None, spec.length), slice(0, 0) 

105 if spec.selection == '>': 

106 return slice(0, 0), slice(-spec.length, None) 

107 

108 

109def _format_array_flat_regular( 

110 data: np.ndarray, *, dtype: DType, spec: FormatSpec 

111) -> str: 

112 def _format_all_in(d) -> List[str]: 

113 return [_format_element(e, dtype=dtype, spec=spec.nested) for e in d] 

114 

115 if len(data) <= spec.length: 

116 elements = _format_all_in(data) 

117 elif spec.length == 0: 

118 elements = ['...'] 

119 else: 

120 left, right = _element_ranges(spec) 

121 elements = [] 

122 if left != slice(0, 0): 

123 elements.extend(_format_all_in(data[left])) 

124 elements.append('...') 

125 if right != slice(0, 0): 

126 elements.extend(_format_all_in(data[right])) 

127 return f'[{", ".join(elements)}]' 

128 

129 

130def _format_variable_default(var: Variable, spec: FormatSpec) -> str: 

131 dims = _format_sizes(var) 

132 dtype = str(var.dtype) 

133 unit = _format_unit(var) 

134 if var.ndim == 0: 

135 values = _format_scalar(var.value, dtype=var.dtype, spec=spec) 

136 variances = ( 

137 _format_scalar(var.variance, dtype=var.dtype, spec=spec) 

138 if var.variance is not None 

139 else '' 

140 ) 

141 else: 

142 values = _format_array_flat(var.values, dtype=var.dtype, spec=spec) 

143 variances = ( 

144 _format_array_flat(var.variances, dtype=var.dtype, spec=spec) 

145 if var.variances is not None 

146 else '' 

147 ) 

148 

149 return f'<scipp.Variable> {dims} {dtype:>9} {unit:>15} {values}' + ( 

150 ' ' + variances if variances else '' 

151 ) 

152 

153 

154def _format_variable_compact(var: Variable, spec: FormatSpec) -> str: 

155 if spec.has_nested or spec.has_length or spec.has_selection: 

156 raise ValueError(f"Invalid format spec for compact formatter: '{spec}'") 

157 if not _is_numeric(var.dtype): 

158 raise ValueError(f"Compact formatting is not supported for dtype {var.dtype}") 

159 

160 values = var.values if var.shape else np.array((var.value,)) 

161 variances = var.variances if var.shape else np.array((var.variance,)) 

162 unt = "" if var.unit == Unit('dimensionless') else f" {var.unit}" 

163 

164 # Iterate over array values to handle no- and infinite-precision cases 

165 if variances is None: 

166 formatted = [_format_element_compact(v) for v in values] 

167 else: 

168 formatted = [ 

169 _format_element_compact(*_round(v, e)) for v, e in zip(values, variances) 

170 ] 

171 return f"{', '.join(formatted)}{unt}" 

172 

173 

174def _is_numeric(dtype: DType) -> bool: 

175 dtype = str(dtype) 

176 return any(x in dtype for x in ('float', 'int')) 

177 

178 

179def _round(value, variance): 

180 from numpy import floor, log10, power, round, sqrt 

181 

182 # Treat 'infinite' precision the same as no variance 

183 if variance is None or variance == 0: 

184 return value, None, None 

185 

186 # The uncertainty is the square root of the variance 

187 error = sqrt(variance) 

188 

189 # Determine how many digits before (+) or after (-) the decimal place 

190 # the error allows for one-digit uncertainty of precision 

191 precision = floor(log10(error)) 

192 

193 # By convention, if the first digit of the error rounds to 1, 

194 # add an extra digit of precision, so there are two-digits of uncertainty 

195 if round(error * power(10.0, -precision)) == 1: 

196 precision -= 1 

197 

198 # Build powers of ten to enable rounding to the specified precision 

199 negative_power = power(10.0, -precision) 

200 positive_power = power(10.0, precision) 

201 

202 # Round the error, keeping the shifted value for the compact string 

203 error = int(round(error * negative_power)) 

204 # Round the value, shifting back after rounding 

205 value = round(value * negative_power) * positive_power 

206 

207 # If the precision is greater than that of 0.1 

208 if precision > -1: 

209 # pad the error to have the right number of trailing zeros 

210 error *= int(positive_power) 

211 

212 return value, error, precision 

213 

214 

215def _format_element_compact(value, error=None, precision=None): 

216 # Build the appropriate format string: 

217 # No variance (or infinite precision) values take no formatting string 

218 # Positive precision implies no decimals, with format '0.0f' 

219 format = '' if precision is None else f'0.{max(0, int(-precision)):d}f' 

220 

221 # Format the value using the generated format string 

222 formatted = "{v:{s}}".format(v=value, s=format) 

223 

224 # Append the error if there is non-infinite-precision variance 

225 if error is not None: 

226 formatted = f'{formatted}({error})' 

227 

228 return formatted 

229 

230 

231_VARIABLE_FORMATTERS = { 

232 FormatType.default: _format_variable_default, 

233 FormatType.compact: _format_variable_compact, 

234}