Coverage for install/scipp/visualization/formatting_html.py: 92%
238 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-04-28 01:28 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-04-28 01:28 +0000
1# SPDX-License-Identifier: BSD-3-Clause
2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
4# Original source from
5# https://github.com/jsignell/xarray/blob/1d960933ab252e0f79f7e050e6c9261d55568057/xarray/core/formatting_html.py
7import collections
8import operator
9import uuid
10from functools import partial, reduce
11from html import escape as html_escape
12from re import escape as re_escape
14from .._scipp import core as sc
15from ..core import stddevs
16from ..utils import value_to_string
17from .resources import load_icons, load_style
19BIN_EDGE_LABEL = "[bin-edge]"
20STDDEV_PREFIX = "σ = " # noqa: RUF001
21VARIANCES_SYMBOL = "σ²"
22SPARSE_PREFIX = "len={}"
25def escape(content: str) -> str:
26 """
27 Escape dollar-sign($) as well as html special characters.
28 Dollar-sign($) without any escape causes unexpected MathJax conversion
29 in the Jupyter notebook.
30 """
31 return html_escape(content).replace('$', re_escape('$'))
34def _format_array(data, size, ellipsis_after):
35 i = 0
36 s = []
37 while i < size:
38 if i == ellipsis_after and size > 2 * ellipsis_after + 1:
39 s.append("...")
40 i = size - ellipsis_after
41 elem = data[i]
42 if isinstance(elem, sc.DataArray):
43 dims = ', '.join(f'{dim}: {s}' for dim, s in elem.sizes.items())
44 coords = ', '.join(elem.coords)
45 if elem.unit == sc.units.one:
46 s.append(f'{{dims=[{dims}], coords=[{coords}]}}')
47 else:
48 s.append(f'{{dims=[{dims}], unit={elem.unit}, coords=[{coords}]}}')
49 else:
50 s.append(value_to_string(elem))
51 i += 1
52 return escape(", ".join(s))
55def _make_row(data_html):
56 return f"<div>{data_html}</div>"
59def _format_non_events(var, has_variances):
60 size = reduce(operator.mul, var.shape, 1)
61 if len(var.dims):
62 var = sc.flatten(var, var.dims, 'ignored')
63 if has_variances:
64 data = stddevs(var).values
65 else:
66 data = var.values
67 # avoid unintentional indexing into value of 0-D data
68 if len(var.shape) == 0:
69 data = [data]
70 s = _format_array(data, size, ellipsis_after=2)
71 if has_variances:
72 s = f'{STDDEV_PREFIX}{s}'
73 return s
76def _repr_item(bin_dim, item):
77 shape = item.shape[bin_dim]
78 return SPARSE_PREFIX.format(shape)
81def _get_events(var, variances, ellipsis_after):
82 dim = var.bins.constituents['dim']
83 dims = var.bins.constituents['data'].dims
84 bin_dim = dict(zip(dims, range(len(dims))))[dim]
85 s = []
86 if not isinstance(var.values, (sc.Variable, sc.DataArray, sc.Dataset)):
87 size = len(var.values)
88 i = 0
90 data = retrieve(var, variances=variances)
91 while i < size:
92 if i == ellipsis_after and size > 2 * ellipsis_after + 1:
93 s.append("...")
94 i = size - ellipsis_after
95 item = data[i]
96 s.append(_repr_item(bin_dim, item))
97 i += 1
98 else:
99 s.append(
100 _repr_item(
101 bin_dim,
102 var.value,
103 )
104 )
105 return s
108def _format_events(var, has_variances):
109 s = _get_events(var, has_variances, ellipsis_after=2)
110 return f'binned data [{", ".join(s)}]'
113def _ordered_dict(data):
114 data_ordered = collections.OrderedDict(
115 sorted(data.items(), key=lambda t: str(t[0]))
116 )
117 return data_ordered
120def inline_variable_repr(var, has_variances=False):
121 if var.bins is None:
122 if isinstance(var, sc.DataArray):
123 return _format_non_events(var.data, has_variances)
124 else:
125 return _format_non_events(var, has_variances)
126 else:
127 return _format_events(var, has_variances)
130def retrieve(var, variances=False, single=False):
131 if not variances:
132 return var.value if single else var.values
133 else:
134 return var.variance if single else var.variances
137def _short_data_repr_html_non_events(var, variances=False):
138 return repr(retrieve(var, variances))
141def _short_data_repr_html_events(var):
142 string = str(var.data) if isinstance(var, sc.DataArray) else str(var)
143 if isinstance(var.bins.constituents['data'], sc.Dataset):
144 return string
145 start = 'binned data: '
146 ind = string.find(start) + len(start)
147 return string[ind:].replace(', content=', ',\ncontent=')
150def short_data_repr_html(var, variances=False):
151 """Format "data" for DataArray and Variable."""
152 if var.bins is not None:
153 data_repr = _short_data_repr_html_events(var)
154 else:
155 data_repr = _short_data_repr_html_non_events(var, variances)
156 return escape(data_repr)
159def format_dims(dims, sizes, coords):
160 if not dims:
161 return ""
163 dim_css_map = {
164 dim: " class='sc-has-index'" if dim in coords else "" for dim in dims
165 }
167 dims_li = "".join(
168 f"<li><span{dim_css_map[dim]}>"
169 f"{escape(str(dim))}</span>: "
170 f"{size if size is not None else 'Events' }</li>"
171 for dim, size in zip(dims, sizes)
172 )
174 return f"<ul class='sc-dim-list'>{dims_li}</ul>"
177def _icon(icon_name):
178 # icon_name is defined in icon-svg-inline.html
179 return (
180 "<svg class='icon sc-{0}'>" "<use xlink:href='#{0}'>" "</use>" "</svg>".format(
181 icon_name
182 )
183 )
186def summarize_coord(dim, var, ds=None):
187 return summarize_variable(str(dim), var, is_aligned=var.aligned, embedded_in=ds)
190def summarize_mask(dim, var, ds=None):
191 return summarize_variable(str(dim), var, is_aligned=False, embedded_in=ds)
194def summarize_coords(coords, ds=None):
195 vars_li = "".join(
196 "<li class='sc-var-item'>" f"{summarize_coord(dim, var, ds)}" "</span></li>"
197 for dim, var in _ordered_dict(coords).items()
198 )
199 return f"<ul class='sc-var-list'>{vars_li}</ul>"
202def summarize_masks(masks, ds=None):
203 vars_li = "".join(
204 "<li class='sc-var-item'>" f"{summarize_mask(dim, var, ds)}" "</span></li>"
205 for dim, var in _ordered_dict(masks).items()
206 )
207 return f"<ul class='sc-var-list'>{vars_li}</ul>"
210def summarize_attrs(attrs, embedded_in=None):
211 attrs_li = "".join(
212 "<li class='sc-var-item'>{}</li>".format(
213 summarize_variable(
214 name,
215 var,
216 has_attrs=False,
217 embedded_in=embedded_in,
218 is_aligned=False,
219 )
220 )
221 for name, var in _ordered_dict(attrs).items()
222 )
223 return f"<ul class='sc-var-list'>{attrs_li}</ul>"
226def _find_bin_edges(var, ds):
227 """
228 Checks if the coordinate contains bin-edges.
229 """
230 return [
231 dim for dim, length in var.sizes.items() if ds.sizes.get(dim, 1) + 1 == length
232 ]
235def _make_inline_attributes(var, has_attrs, embedded_in):
236 disabled = "disabled"
237 attrs_ul = ""
238 attrs_sections = []
240 if has_attrs and hasattr(var, "masks"):
241 if len(var.masks) > 0:
242 attrs_sections.append(mask_section(var.masks))
243 disabled = ""
245 if has_attrs and hasattr(var, "deprecated_attrs"):
246 if len(var.attrs) > 0:
247 attrs_sections.append(attr_section(var.deprecated_attrs, embedded_in))
248 disabled = ""
250 if len(attrs_sections) > 0:
251 attrs_sections = "".join(
252 f"<li class='sc-section-item sc-subsection'>{s}</li>"
253 for s in attrs_sections
254 )
255 attrs_ul = (
256 "<div class='sc-wrap'>"
257 f"<ul class='sc-sections'>{attrs_sections}</ul>"
258 "</div>"
259 )
261 return disabled, attrs_ul
264def _make_dim_labels(dim, bin_edges=None):
265 # Note: the space needs to be here, otherwise
266 # there is a trailing whitespace when no dimension
267 # label has been added
268 if bin_edges and dim in bin_edges:
269 return f" {BIN_EDGE_LABEL}"
270 else:
271 return ""
274def _make_dim_str(var, bin_edges, add_dim_size=False):
275 dims_text = ', '.join(
276 '{}{}{}'.format(
277 str(dim),
278 _make_dim_labels(dim, bin_edges),
279 f': {size}' if add_dim_size and size is not None else '',
280 )
281 for dim, size in zip(var.dims, var.shape)
282 )
283 return dims_text
286def _format_common(is_index):
287 cssclass_aligned = " class='sc-aligned'" if is_index else ""
289 # "unique" ids required to expand/collapse subsections
290 attrs_id = "attrs-" + str(uuid.uuid4())
291 data_id = "data-" + str(uuid.uuid4())
292 attrs_icon = _icon("icon-file-text2")
293 data_icon = _icon("icon-database")
295 return cssclass_aligned, attrs_id, attrs_icon, data_id, data_icon
298def summarize_variable(
299 name, var, is_aligned=False, has_attrs=False, embedded_in=None, add_dim_size=False
300):
301 """
302 Formats the variable data into the format expected when displaying
303 as a standalone variable (when a single variable or data array is
304 displayed) or as part of a dataset.
305 """
306 dims_str = "({})".format(
307 _make_dim_str(
308 var,
309 _find_bin_edges(var, embedded_in) if embedded_in is not None else None,
310 add_dim_size,
311 )
312 )
313 if var.unit is None:
314 unit = ''
315 else:
316 unit = '𝟙' if var.unit == sc.units.dimensionless else str(var.unit) # noqa: RUF001
318 disabled, attrs_ul = _make_inline_attributes(var, has_attrs, embedded_in)
320 preview = _make_row(inline_variable_repr(var))
321 data_repr = short_data_repr_html(var)
322 if var.bins is None:
323 data_repr = "Values:<br>" + data_repr
324 variances_preview = None
325 if var.variances is not None:
326 variances_preview = _make_row(inline_variable_repr(var, has_variances=True))
327 data_repr += f"<br><br>Variances ({VARIANCES_SYMBOL}):<br>\
328{short_data_repr_html(var, variances=True)}"
330 cssclass_aligned, attrs_id, attrs_icon, data_id, data_icon = _format_common(
331 is_aligned
332 )
334 if name is None:
335 html = [
336 f"<div class='sc-standalone-var-name'><span{cssclass_aligned}>"
337 f"{escape(dims_str)}</span></div>"
338 ]
339 else:
340 html = [
341 f"<div class='sc-var-name'><span{cssclass_aligned}>{escape(str(name))}"
342 "</span></div>",
343 f"<div class='sc-var-dims'>{escape(dims_str)}</div>",
344 ]
345 html += [
346 f"<div class='sc-var-dtype'>{escape(str(var.dtype))}</div>",
347 f"<div class='sc-var-unit'>{escape(unit)}</div>",
348 f"<div class='sc-value-preview sc-preview'><span>{preview}</span>",
349 "{}</div>".format(
350 f'<span>{variances_preview}</span>' if variances_preview is not None else ''
351 ),
352 f"<input id='{attrs_id}' class='sc-var-attrs-in' ",
353 f"type='checkbox' {disabled}>",
354 f"<label for='{attrs_id}' "
355 f"class='{'' if has_attrs else 'sc-hide-icon'}'"
356 " title='Show/Hide attributes'>",
357 f"{attrs_icon}</label>",
358 f"<input id='{data_id}' class='sc-var-data-in' type='checkbox'>",
359 f"<label for='{data_id}' title='Show/Hide data repr'>",
360 f"{data_icon}</label>",
361 f"<div class='sc-var-attrs'>{attrs_ul}</div>" if attrs_ul else "",
362 f"<pre class='sc-var-data'>{data_repr}</pre>",
363 ]
364 return "".join(html)
367def summarize_data(dataset):
368 has_attrs = isinstance(dataset, sc.Dataset)
369 vars_li = "".join(
370 "<li class='sc-var-item'>{}</li>".format(
371 summarize_variable(
372 name,
373 var.data,
374 has_attrs=has_attrs,
375 embedded_in=dataset if has_attrs else None,
376 )
377 )
378 for name, var in _ordered_dict(dataset).items()
379 )
380 return f"<ul class='sc-var-list'>{vars_li}</ul>"
383def collapsible_section(
384 name, inline_details="", details="", n_items=None, enabled=True, collapsed=False
385):
386 # "unique" id to expand/collapse the section
387 data_id = "section-" + str(uuid.uuid4())
389 has_items = n_items is not None and n_items
390 n_items_span = "" if n_items is None else f" <span>({n_items})</span>"
391 enabled = "" if enabled and has_items else "disabled"
392 collapsed = "" if collapsed or not has_items else "checked"
393 tip = " title='Expand/collapse section'" if enabled else ""
395 return (
396 f"<input id='{data_id}' class='sc-section-summary-in' "
397 f"type='checkbox' {enabled} {collapsed}>"
398 f"<label for='{data_id}' class='sc-section-summary' {tip}>"
399 f"{name}:{n_items_span}</label>"
400 f"<div class='sc-section-inline-details'>{inline_details}</div>"
401 f"<div class='sc-section-details'>{details}</div>"
402 )
405def _mapping_section(
406 mapping,
407 *extra_details_func_args,
408 name=None,
409 details_func=None,
410 max_items_collapse=None,
411 enabled=True,
412):
413 n_items = 1 if isinstance(mapping, sc.DataArray) else len(mapping)
414 collapsed = n_items >= max_items_collapse
416 return collapsible_section(
417 name,
418 details=details_func(mapping, *extra_details_func_args),
419 n_items=n_items,
420 enabled=enabled,
421 collapsed=collapsed,
422 )
425def dim_section(dataset):
426 coords = dataset.coords if hasattr(dataset, "coords") else {}
427 dim_list = format_dims(dataset.dims, dataset.shape, coords)
429 return collapsible_section(
430 "Dimensions", inline_details=dim_list, enabled=False, collapsed=True
431 )
434def summarize_array(var, is_variable=False):
435 vars_li = "".join(
436 "<li class='sc-var-item'>"
437 f"{summarize_variable(None, var, add_dim_size=is_variable)}</li>"
438 )
439 return f"<ul class='sc-var-list'>{vars_li}</ul>"
442def variable_section(var):
443 return summarize_array(var, is_variable=True)
446coord_section = partial(
447 _mapping_section,
448 name="Coordinates",
449 details_func=summarize_coords,
450 max_items_collapse=25,
451)
453mask_section = partial(
454 _mapping_section, name="Masks", details_func=summarize_masks, max_items_collapse=10
455)
457data_section = partial(
458 _mapping_section,
459 name="Data",
460 details_func=summarize_data,
461 max_items_collapse=15,
462)
464attr_section = partial(
465 _mapping_section,
466 name="Attributes",
467 details_func=summarize_attrs,
468 max_items_collapse=10,
469)
472def _obj_repr(header_components, sections):
473 header = f"<div class='sc-header'>" f"{''.join(h for h in header_components)}</div>"
474 sections = "".join(f"<li class='sc-section-item'>{s}</li>" for s in sections)
476 return (
477 "<div>"
478 f"{load_icons()}"
479 f"{load_style()}"
480 "<div class='sc-wrap sc-root'>"
481 f"{header}"
482 f"<ul class='sc-sections'>{sections}</ul>"
483 "</div>"
484 "</div>"
485 )
488def _format_size(obj):
489 view_size = obj.__sizeof__()
490 underlying_size = obj.underlying_size()
491 res = f"({human_readable_size(view_size)}"
492 if view_size != underlying_size:
493 res += (
494 " <span class='sc-underlying-size'>out of "
495 f"{human_readable_size(underlying_size)}</span>"
496 )
497 return res + ")"
500def dataset_repr(ds):
501 obj_type = "scipp.{}".format(type(ds).__name__)
502 header_components = [
503 f"<div class='sc-obj-type'>{escape(obj_type)} " + _format_size(ds) + "</div>"
504 ]
506 sections = [dim_section(ds)]
508 if len(ds.coords) > 0:
509 sections.append(coord_section(ds.coords, ds))
511 sections.append(data_section(ds if isinstance(ds, sc.Dataset) else {'': ds}))
513 if not isinstance(ds, sc.Dataset):
514 if len(ds.masks) > 0:
515 sections.append(mask_section(ds.masks, ds))
516 if len(ds.deprecated_attrs) > 0:
517 sections.append(attr_section(ds.deprecated_attrs, ds))
519 return _obj_repr(header_components, sections)
522def variable_repr(var):
523 obj_type = "scipp.{}".format(type(var).__name__)
525 header_components = [
526 f"<div class='sc-obj-type'>{escape(obj_type)} " + _format_size(var) + "</div>"
527 ]
529 sections = [variable_section(var)]
531 return _obj_repr(header_components, sections)
534def human_readable_size(size_in_bytes):
535 if size_in_bytes / (1024 * 1024 * 1024) > 1:
536 return f'{size_in_bytes/(1024*1024*1024):.2f} GB'
537 if size_in_bytes / (1024 * 1024) > 1:
538 return f'{size_in_bytes/(1024*1024):.2f} MB'
539 if size_in_bytes / (1024) > 1:
540 return f'{size_in_bytes/(1024):.2f} KB'
542 return f'{size_in_bytes} Bytes'