Coverage for install/scipp/visualization/table.py: 71%

125 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-11-17 01:51 +0000

1# SPDX-License-Identifier: BSD-3-Clause 

2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) 

3 

4from typing import Any 

5 

6import numpy as np 

7 

8from .. import DataArray, Dataset, DType, Variable 

9from ..typing import VariableLike 

10 

11CENTER = 'text-align: center;' 

12LEFT_BORDER = 'border-left:1px solid #a9a9a9;' 

13BOTTOM_BORDER = 'border-bottom:2px solid #a9a9a9;' 

14 

15 

16def _string_in_cell(v: Variable) -> str: 

17 if v.bins is not None: 

18 return f'len={v.value.shape}' 

19 if v.dtype not in (DType.float32, DType.float64): 

20 return str(v.value) 

21 if v.variance is None: 

22 return f'{v.value:.3f}' 

23 err = np.sqrt(v.variance) 

24 if err == 0.0: 

25 prec = 3 

26 else: 

27 try: 

28 prec = max(3, -int(np.floor(np.log10(err)))) 

29 except OverflowError: 

30 prec = 3 # happens when err is non-finite 

31 return f'{v.value:.{prec}f}±{err:.{prec}f}' 

32 

33 

34def _var_name_with_unit(name: str, var: Variable) -> str: 

35 out = f'<span style="font-weight: bold;">{name}</span>' 

36 unit = var.bins.unit if var.bins is not None else var.unit 

37 if unit is not None: 

38 out += ' [𝟙]' if unit == 'dimensionless' else f' [{unit}]' # noqa: RUF001 

39 return out 

40 

41 

42def _add_td_tags(cell_list: list[str], border: str = '') -> list[str]: 

43 td = f' style="{border}"' if border else '' 

44 td = f'<td{td}>' 

45 return [f'{td}{cell}</td>' for cell in cell_list] 

46 

47 

48def _make_variable_column( 

49 name: str, 

50 var: Variable, 

51 indices: list[int | None] | range, 

52 need_bin_edge: bool, 

53 is_bin_edge: bool, 

54 border: str = '', 

55) -> list[str]: 

56 head = [_var_name_with_unit(name, var)] 

57 rows = [] 

58 for i in indices: 

59 if i is None: 

60 rows.append('...') 

61 else: 

62 rows.append(_string_in_cell(var[i])) 

63 if need_bin_edge: 

64 if is_bin_edge: 

65 rows.append(_string_in_cell(var[-1])) 

66 else: 

67 rows.append('') 

68 return _add_td_tags(head, border=border + BOTTOM_BORDER) + _add_td_tags( 

69 rows, border=border 

70 ) 

71 

72 

73def _make_data_array_table( 

74 da: DataArray, 

75 indices: list[int | None] | range, 

76 bin_edges: bool, 

77 no_left_border: bool = False, 

78) -> list[list[str]]: 

79 out = [ 

80 _make_variable_column( 

81 name='', 

82 var=da.data, 

83 indices=indices, 

84 need_bin_edge=bin_edges, 

85 is_bin_edge=False, 

86 border='' if no_left_border else LEFT_BORDER, 

87 ) 

88 ] 

89 

90 for name, var in sorted(da.masks.items()): 

91 out.append( 

92 _make_variable_column( 

93 name=name, 

94 var=var, 

95 indices=indices, 

96 need_bin_edge=bin_edges, 

97 is_bin_edge=False, 

98 ) 

99 ) 

100 

101 for name, var in sorted(da.deprecated_attrs.items()): 

102 out.append( 

103 _make_variable_column( 

104 name=name, 

105 var=var, 

106 indices=indices, 

107 need_bin_edge=bin_edges, 

108 is_bin_edge=da.deprecated_attrs.is_edges(name), 

109 ) 

110 ) 

111 

112 return out 

113 

114 

115def _make_entries_header(ds: Dataset) -> str: 

116 out = '<tr>' 

117 if ds.coords: 

118 out += f'<th colspan="{len(ds.coords)}"></th>' 

119 for name, da in sorted(ds.items()): 

120 ncols = 1 + len(da.masks) + len(da.deprecated_attrs) 

121 out += f'<th style="{CENTER}" colspan="{ncols}">{name}</th>' 

122 out += '</tr>' 

123 return out 

124 

125 

126def _make_sections_header(ds: Dataset) -> str: 

127 out = '<tr>' 

128 if ds.coords: 

129 out += f'<th style="{CENTER}" colspan="{len(ds.coords)}">Coordinates</th>' 

130 for i, (_, da) in enumerate(sorted(ds.items())): 

131 border = '' if (i == 0) and (not ds.coords) else LEFT_BORDER 

132 out += f'<th style="{CENTER + border}">Data</th>' 

133 if da.masks: 

134 out += f'<th style="{CENTER}" colspan="{len(da.masks)}">Masks</th>' 

135 if da.deprecated_attrs: 

136 out += ( 

137 f'<th style="{CENTER}" ' 

138 f'colspan="{len(da.deprecated_attrs)}">Attributes</th>' 

139 ) 

140 out += '</tr>' 

141 return out 

142 

143 

144def _to_html_table(header: str, body: list[list[str]]) -> str: 

145 out = '<table>' + header 

146 ncols = len(body) 

147 nrows = len(body[0]) 

148 for i in range(nrows): 

149 out += '<tr>' + ''.join(body[j][i] for j in range(ncols)) + '</tr>' 

150 out += '</table>' 

151 return out 

152 

153 

154def _find_bin_edges(ds: Dataset) -> bool: 

155 for key in ds.coords: 

156 if ds.coords.is_edges(key): 

157 return True 

158 for da in ds.values(): 

159 for key in da.deprecated_attrs: 

160 if da.deprecated_attrs.is_edges(key): 

161 return True 

162 return False 

163 

164 

165def _strip_scalars_and_broadcast_masks(ds: Dataset) -> Dataset: 

166 out = {} 

167 for key, da in ds.items(): 

168 out[key] = DataArray( 

169 data=da.data, 

170 coords={ 

171 key: var for key, var in da.coords.items() if var.dims == da.data.dims 

172 }, 

173 attrs={ 

174 key: var 

175 for key, var in da.deprecated_attrs.items() 

176 if var.dims == da.data.dims 

177 }, 

178 masks={key: var.broadcast(sizes=da.sizes) for key, var in da.masks.items()}, 

179 ) 

180 return Dataset(out) 

181 

182 

183def _to_dataset(obj: VariableLike | dict[str, Variable | DataArray]) -> Dataset: 

184 if isinstance(obj, DataArray): 

185 return Dataset({obj.name: obj}) 

186 if isinstance(obj, Variable): 

187 return Dataset(data={"": obj}) 

188 if isinstance(obj, dict): 

189 return Dataset(obj) 

190 if isinstance(obj, Dataset): 

191 return obj 

192 raise TypeError(f'Unsupported argument type: {type(obj)}') 

193 

194 

195def table( 

196 obj: Variable | DataArray | Dataset | dict[str, Variable | DataArray], 

197 max_rows: int = 20, 

198) -> Any: 

199 """Create an HTML table from the contents of the supplied object. 

200 

201 Possible inputs are: 

202 - Variable 

203 - DataArray 

204 - Dataset 

205 - dict of Variable 

206 - dict of DataArray 

207 

208 Inputs must be one-dimensional. Zero-dimensional data members, attributes and 

209 coordinates are stripped. Zero-dimensional masks are broadcast. 

210 

211 Parameters 

212 ---------- 

213 obj: 

214 Input to be turned into a html table. 

215 max_rows: 

216 Maximum number of rows to display. 

217 """ 

218 ds = _to_dataset(obj) 

219 

220 if ds.ndim != 1: 

221 raise ValueError("Table can only be generated for one-dimensional objects.") 

222 

223 ds = _strip_scalars_and_broadcast_masks(ds) 

224 

225 # Limit the number of rows to be printed 

226 size = ds.shape[0] 

227 if size > max_rows: 

228 half = int(max_rows / 2) 

229 indices: list[int | None] | range = [ 

230 *range(half), 

231 None, 

232 *range(size - half, size), 

233 ] 

234 else: 

235 indices = range(size) 

236 

237 bin_edges = _find_bin_edges(ds) 

238 

239 header = _make_sections_header(ds) 

240 if len(ds) > 1: 

241 header = _make_entries_header(ds) + header 

242 

243 # First attach coords 

244 body = [ 

245 _make_variable_column( 

246 name=name, 

247 var=var, 

248 indices=indices, 

249 need_bin_edge=bin_edges, 

250 is_bin_edge=ds.coords.is_edges(name), 

251 ) 

252 for name, var in sorted(ds.coords.items()) 

253 ] 

254 

255 # Rest of the table from DataArrays 

256 for i, (_, da) in enumerate(sorted(ds.items())): 

257 body += _make_data_array_table( 

258 da=da, 

259 indices=indices, 

260 bin_edges=bin_edges, 

261 no_left_border=(i == 0) and (not ds.coords), 

262 ) 

263 

264 html = _to_html_table(header=header, body=body) 

265 from IPython.display import HTML 

266 

267 return HTML(html) # type: ignore[no-untyped-call]