Coverage for install/scipp/visualization/table.py: 79%
125 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-12-01 01:59 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-12-01 01:59 +0000
1# SPDX-License-Identifier: BSD-3-Clause
2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
4from typing import Any
6import numpy as np
8from .. import DataArray, Dataset, DType, Variable
9from ..typing import VariableLike
11CENTER = 'text-align: center;'
12LEFT_BORDER = 'border-left:1px solid #a9a9a9;'
13BOTTOM_BORDER = 'border-bottom:2px solid #a9a9a9;'
16def _string_in_cell(v: Variable) -> str:
17 if v.bins is not None:
18 return f'len={v.value.shape}'
19 if v.dtype not in (DType.float32, DType.float64):
20 return str(v.value)
21 if v.variance is None:
22 return f'{v.value:.3f}'
23 err = np.sqrt(v.variance)
24 if err == 0.0:
25 prec = 3
26 else:
27 try:
28 prec = max(3, -int(np.floor(np.log10(err))))
29 except OverflowError:
30 prec = 3 # happens when err is non-finite
31 return f'{v.value:.{prec}f}±{err:.{prec}f}'
34def _var_name_with_unit(name: str, var: Variable) -> str:
35 out = f'<span style="font-weight: bold;">{name}</span>'
36 unit = var.bins.unit if var.bins is not None else var.unit
37 if unit is not None:
38 out += ' [𝟙]' if unit == 'dimensionless' else f' [{unit}]' # noqa: RUF001
39 return out
42def _add_td_tags(cell_list: list[str], border: str = '') -> list[str]:
43 td = f' style="{border}"' if border else ''
44 td = f'<td{td}>'
45 return [f'{td}{cell}</td>' for cell in cell_list]
48def _make_variable_column(
49 name: str,
50 var: Variable,
51 indices: list[int | None] | range,
52 need_bin_edge: bool,
53 is_bin_edge: bool,
54 border: str = '',
55) -> list[str]:
56 head = [_var_name_with_unit(name, var)]
57 rows = []
58 for i in indices:
59 if i is None:
60 rows.append('...')
61 else:
62 rows.append(_string_in_cell(var[i]))
63 if need_bin_edge:
64 if is_bin_edge:
65 rows.append(_string_in_cell(var[-1]))
66 else:
67 rows.append('')
68 return _add_td_tags(head, border=border + BOTTOM_BORDER) + _add_td_tags(
69 rows, border=border
70 )
73def _make_data_array_table(
74 da: DataArray,
75 indices: list[int | None] | range,
76 bin_edges: bool,
77 no_left_border: bool = False,
78) -> list[list[str]]:
79 out = [
80 _make_variable_column(
81 name='',
82 var=da.data,
83 indices=indices,
84 need_bin_edge=bin_edges,
85 is_bin_edge=False,
86 border='' if no_left_border else LEFT_BORDER,
87 )
88 ]
90 for name, var in sorted(da.masks.items()):
91 out.append(
92 _make_variable_column(
93 name=name,
94 var=var,
95 indices=indices,
96 need_bin_edge=bin_edges,
97 is_bin_edge=False,
98 )
99 )
101 for name, var in sorted(da.deprecated_attrs.items()):
102 out.append(
103 _make_variable_column(
104 name=name,
105 var=var,
106 indices=indices,
107 need_bin_edge=bin_edges,
108 is_bin_edge=da.deprecated_attrs.is_edges(name),
109 )
110 )
112 return out
115def _make_entries_header(ds: Dataset) -> str:
116 out = '<tr>'
117 if ds.coords:
118 out += f'<th colspan="{len(ds.coords)}"></th>'
119 for name, da in sorted(ds.items()):
120 ncols = 1 + len(da.masks) + len(da.deprecated_attrs)
121 out += f'<th style="{CENTER}" colspan="{ncols}">{name}</th>'
122 out += '</tr>'
123 return out
126def _make_sections_header(ds: Dataset) -> str:
127 out = '<tr>'
128 if ds.coords:
129 out += f'<th style="{CENTER}" colspan="{len(ds.coords)}">Coordinates</th>'
130 for i, (_, da) in enumerate(sorted(ds.items())):
131 border = '' if (i == 0) and (not ds.coords) else LEFT_BORDER
132 out += f'<th style="{CENTER + border}">Data</th>'
133 if da.masks:
134 out += f'<th style="{CENTER}" colspan="{len(da.masks)}">Masks</th>'
135 if da.deprecated_attrs:
136 out += (
137 f'<th style="{CENTER}" '
138 f'colspan="{len(da.deprecated_attrs)}">Attributes</th>'
139 )
140 out += '</tr>'
141 return out
144def _to_html_table(header: str, body: list[list[str]]) -> str:
145 out = '<table>' + header
146 ncols = len(body)
147 nrows = len(body[0])
148 for i in range(nrows):
149 out += '<tr>' + ''.join(body[j][i] for j in range(ncols)) + '</tr>'
150 out += '</table>'
151 return out
154def _find_bin_edges(ds: Dataset) -> bool:
155 for key in ds.coords:
156 if ds.coords.is_edges(key):
157 return True
158 for da in ds.values():
159 for key in da.deprecated_attrs:
160 if da.deprecated_attrs.is_edges(key):
161 return True
162 return False
165def _strip_scalars_and_broadcast_masks(ds: Dataset) -> Dataset:
166 out = {}
167 for key, da in ds.items():
168 out[key] = DataArray(
169 data=da.data,
170 coords={
171 key: var for key, var in da.coords.items() if var.dims == da.data.dims
172 },
173 attrs={
174 key: var
175 for key, var in da.deprecated_attrs.items()
176 if var.dims == da.data.dims
177 },
178 masks={key: var.broadcast(sizes=da.sizes) for key, var in da.masks.items()},
179 )
180 return Dataset(out)
183def _to_dataset(obj: VariableLike | dict[str, Variable | DataArray]) -> Dataset:
184 if isinstance(obj, DataArray):
185 return Dataset({obj.name: obj})
186 if isinstance(obj, Variable):
187 return Dataset(data={"": obj})
188 if isinstance(obj, dict):
189 return Dataset(obj)
190 if isinstance(obj, Dataset):
191 return obj
192 raise TypeError(f'Unsupported argument type: {type(obj)}')
195def table(
196 obj: Variable | DataArray | Dataset | dict[str, Variable | DataArray],
197 max_rows: int = 20,
198) -> Any:
199 """Create an HTML table from the contents of the supplied object.
201 Possible inputs are:
202 - Variable
203 - DataArray
204 - Dataset
205 - dict of Variable
206 - dict of DataArray
208 Inputs must be one-dimensional. Zero-dimensional data members, attributes and
209 coordinates are stripped. Zero-dimensional masks are broadcast.
211 Parameters
212 ----------
213 obj:
214 Input to be turned into a html table.
215 max_rows:
216 Maximum number of rows to display.
217 """
218 ds = _to_dataset(obj)
220 if ds.ndim != 1:
221 raise ValueError("Table can only be generated for one-dimensional objects.")
223 ds = _strip_scalars_and_broadcast_masks(ds)
225 # Limit the number of rows to be printed
226 size = ds.shape[0]
227 if size > max_rows:
228 half = int(max_rows / 2)
229 indices: list[int | None] | range = [
230 *range(half),
231 None,
232 *range(size - half, size),
233 ]
234 else:
235 indices = range(size)
237 bin_edges = _find_bin_edges(ds)
239 header = _make_sections_header(ds)
240 if len(ds) > 1:
241 header = _make_entries_header(ds) + header
243 # First attach coords
244 body = [
245 _make_variable_column(
246 name=name,
247 var=var,
248 indices=indices,
249 need_bin_edge=bin_edges,
250 is_bin_edge=ds.coords.is_edges(name),
251 )
252 for name, var in sorted(ds.coords.items())
253 ]
255 # Rest of the table from DataArrays
256 for i, (_, da) in enumerate(sorted(ds.items())):
257 body += _make_data_array_table(
258 da=da,
259 indices=indices,
260 bin_edges=bin_edges,
261 no_left_border=(i == 0) and (not ds.coords),
262 )
264 html = _to_html_table(header=header, body=body)
265 from IPython.display import HTML
267 return HTML(html) # type: ignore[no-untyped-call]