Coverage for install/scipp/visualization/formatting_html.py: 92%
246 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-17 01:51 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-17 01:51 +0000
1# SPDX-License-Identifier: BSD-3-Clause
2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
4# Original source from
5# https://github.com/jsignell/xarray/blob/1d960933ab252e0f79f7e050e6c9261d55568057/xarray/core/formatting_html.py
7import collections
8import operator
9import uuid
10from collections.abc import Callable, Iterable, Mapping, Sequence
11from functools import partial, reduce
12from html import escape as html_escape
13from re import escape as re_escape
14from typing import Any, TypeVar
16from .._scipp import core as sc
17from ..core import Coords, DataArray, Dataset, Masks, Variable, stddevs
18from ..utils import value_to_string
19from .resources import load_icons, load_style
21BIN_EDGE_LABEL = "[bin-edge]"
22STDDEV_PREFIX = "σ = " # noqa: RUF001
23VARIANCES_SYMBOL = "σ²"
24SPARSE_PREFIX = "len={}"
26_T = TypeVar('_T')
29def escape(content: str) -> str:
30 """
31 Escape dollar-sign($) as well as html special characters.
32 Dollar-sign($) without any escape causes unexpected MathJax conversion
33 in the Jupyter notebook.
34 """
35 return html_escape(content).replace('$', re_escape('$'))
38def _format_array(data: Sequence[Any], size: int, ellipsis_after: int) -> str:
39 i = 0
40 s = []
41 while i < size:
42 if i == ellipsis_after and size > 2 * ellipsis_after + 1:
43 s.append("...")
44 i = size - ellipsis_after
45 elem = data[i]
46 if isinstance(elem, DataArray):
47 dims = ', '.join(f'{dim}: {s}' for dim, s in elem.sizes.items())
48 coords = ', '.join(elem.coords)
49 if elem.unit == sc.units.one:
50 s.append(f'{{dims=[{dims}], coords=[{coords}]}}')
51 else:
52 s.append(f'{{dims=[{dims}], unit={elem.unit}, coords=[{coords}]}}')
53 else:
54 s.append(value_to_string(elem))
55 i += 1
56 return escape(", ".join(s))
59def _make_row(data_html: object) -> str:
60 return f"<div>{data_html}</div>"
63def _format_non_events(var: Variable, has_variances: bool) -> str:
64 size = reduce(operator.mul, var.shape, 1)
65 if len(var.dims):
66 var = sc.flatten(var, var.dims, 'ignored')
67 if has_variances:
68 data = stddevs(var).values
69 else:
70 data = var.values
71 # avoid unintentional indexing into value of 0-D data
72 if len(var.shape) == 0:
73 data = [data]
74 s = _format_array(data, size, ellipsis_after=2)
75 if has_variances:
76 s = f'{STDDEV_PREFIX}{s}'
77 return s
80def _repr_item(bin_dim: int, item: Variable) -> str:
81 shape = item.shape[bin_dim]
82 return SPARSE_PREFIX.format(shape)
85def _get_events(
86 var: Variable | DataArray, has_variances: bool, ellipsis_after: int
87) -> list[str]:
88 constituents = var.bins.constituents # type: ignore[union-attr]
89 dim = constituents['dim']
90 dims = constituents['data'].dims
91 bin_dim = dict(zip(dims, range(len(dims)), strict=True))[dim]
92 s = []
93 if not isinstance(var.values, Variable | DataArray | Dataset):
94 size = len(var.values)
95 i = 0
97 data = retrieve(var, has_variances=has_variances)
98 while i < size:
99 if i == ellipsis_after and size > 2 * ellipsis_after + 1:
100 s.append("...")
101 i = size - ellipsis_after
102 item = data[i]
103 s.append(_repr_item(bin_dim, item))
104 i += 1
105 else:
106 s.append(
107 _repr_item(
108 bin_dim,
109 var.value,
110 )
111 )
112 return s
115def _format_events(var: Variable | DataArray, has_variances: bool) -> str:
116 s = _get_events(var, has_variances, ellipsis_after=2)
117 return f'binned data [{", ".join(s)}]'
120def _ordered_dict(
121 data: Mapping[str, _T],
122) -> collections.OrderedDict[str, _T]:
123 data_ordered = collections.OrderedDict(
124 sorted(data.items(), key=lambda t: str(t[0]))
125 )
126 return data_ordered
129def inline_variable_repr(var: Variable | DataArray, has_variances: bool = False) -> str:
130 if var.bins is None:
131 if isinstance(var, DataArray):
132 return _format_non_events(var.data, has_variances)
133 else:
134 return _format_non_events(var, has_variances)
135 else:
136 return _format_events(var, has_variances)
139def retrieve(
140 var: Variable | DataArray, has_variances: bool = False, single: bool = False
141) -> Any:
142 if not has_variances:
143 return var.value if single else var.values
144 else:
145 return var.variance if single else var.variances
148def _short_data_repr_html_non_events(
149 var: Variable | DataArray, has_variances: bool = False
150) -> str:
151 return repr(retrieve(var, has_variances))
154def _short_data_repr_html_events(var: Variable | DataArray) -> str:
155 string = str(var.data) if isinstance(var, DataArray) else str(var)
156 if isinstance(var.bins.constituents['data'], Dataset): # type: ignore[union-attr]
157 return string
158 start = 'binned data: '
159 ind = string.find(start) + len(start)
160 return string[ind:].replace(', content=', ',\ncontent=')
163def short_data_repr_html(var: Variable | DataArray, has_variances: bool = False) -> str:
164 """Format "data" for DataArray and Variable."""
165 if var.bins is not None:
166 data_repr = _short_data_repr_html_events(var)
167 else:
168 data_repr = _short_data_repr_html_non_events(var, has_variances)
169 return escape(data_repr)
172def format_dims(
173 dims: Iterable[str] | None,
174 sizes: Iterable[int | None],
175 coords: Mapping[str, Variable],
176) -> str:
177 if dims is None:
178 return ""
180 dim_css_map = {
181 dim: " class='sc-has-index'" if dim in coords else "" for dim in dims
182 }
184 dims_li = "".join(
185 f"<li><span{dim_css_map[dim]}>"
186 f"{escape(str(dim))}</span>: "
187 f"{size if size is not None else 'Events' }</li>"
188 for dim, size in zip(dims, sizes, strict=True)
189 )
191 return f"<ul class='sc-dim-list'>{dims_li}</ul>"
194def _icon(icon_name: str) -> str:
195 # icon_name is defined in icon-svg-inline.html
196 return (
197 f"<svg class='icon sc-{icon_name}'><use xlink:href='#{icon_name}'></use></svg>"
198 )
201def summarize_coord(
202 dim: str, var: Variable, ds: DataArray | Dataset | None = None
203) -> str:
204 return summarize_variable(dim, var, is_aligned=var.aligned, embedded_in=ds)
207def summarize_mask(
208 dim: str, var: Variable, ds: DataArray | Dataset | None = None
209) -> str:
210 return summarize_variable(dim, var, is_aligned=False, embedded_in=ds)
213def summarize_coords(coords: Coords, ds: DataArray | Dataset | None = None) -> str:
214 vars_li = "".join(
215 "<li class='sc-var-item'>" f"{summarize_coord(dim, var, ds)}" "</span></li>"
216 for dim, var in _ordered_dict(coords).items()
217 )
218 return f"<ul class='sc-var-list'>{vars_li}</ul>"
221def summarize_masks(masks: Masks, ds: DataArray | Dataset | None = None) -> str:
222 vars_li = "".join(
223 "<li class='sc-var-item'>" f"{summarize_mask(dim, var, ds)}" "</span></li>"
224 for dim, var in _ordered_dict(masks).items()
225 )
226 return f"<ul class='sc-var-list'>{vars_li}</ul>"
229def summarize_attrs(
230 attrs: Coords, embedded_in: DataArray | Dataset | None = None
231) -> str:
232 attrs_li = "".join(
233 "<li class='sc-var-item'>{}</li>".format(
234 summarize_variable(
235 name,
236 var,
237 has_attrs=False,
238 embedded_in=embedded_in,
239 is_aligned=False,
240 )
241 )
242 for name, var in _ordered_dict(attrs).items()
243 )
244 return f"<ul class='sc-var-list'>{attrs_li}</ul>"
247def _find_bin_edges(var: Variable | DataArray, ds: DataArray | Dataset) -> list[str]:
248 """
249 Checks if the coordinate contains bin-edges.
250 """
251 return [
252 dim for dim, length in var.sizes.items() if ds.sizes.get(dim, 1) + 1 == length
253 ]
256def _make_inline_attributes(
257 var: Variable | DataArray, has_attrs: bool, embedded_in: DataArray | Dataset | None
258) -> tuple[str, str]:
259 disabled = "disabled"
260 attrs_ul = ""
261 attrs_sections = []
263 if has_attrs and hasattr(var, "masks"):
264 if len(var.masks) > 0:
265 attrs_sections.append(mask_section(var.masks))
266 disabled = ""
268 if has_attrs and hasattr(var, "deprecated_attrs"):
269 if len(var.deprecated_attrs) > 0:
270 attrs_sections.append(attr_section(var.deprecated_attrs, embedded_in))
271 disabled = ""
273 if len(attrs_sections) > 0:
274 attrs_sections_str = "".join(
275 f"<li class='sc-section-item sc-subsection'>{s}</li>"
276 for s in attrs_sections
277 )
278 attrs_ul = (
279 "<div class='sc-wrap'>"
280 f"<ul class='sc-sections'>{attrs_sections_str}</ul>"
281 "</div>"
282 )
284 return disabled, attrs_ul
287def _make_dim_labels(dim: str, bin_edges: Sequence[str] | None = None) -> str:
288 # Note: the space needs to be here, otherwise
289 # there is a trailing whitespace when no dimension
290 # label has been added
291 if bin_edges and dim in bin_edges:
292 return f" {BIN_EDGE_LABEL}"
293 else:
294 return ""
297def _make_dim_str(
298 var: Variable | DataArray,
299 bin_edges: Sequence[str] | None,
300 add_dim_size: bool = False,
301) -> str:
302 dims_text = ', '.join(
303 '{}{}{}'.format(
304 str(dim),
305 _make_dim_labels(dim, bin_edges),
306 f': {size}' if add_dim_size and size is not None else '',
307 )
308 for dim, size in zip(var.dims, var.shape, strict=True)
309 )
310 return dims_text
313def _format_common(is_index: bool) -> tuple[str, str, str, str, str]:
314 cssclass_aligned = " class='sc-aligned'" if is_index else ""
316 # "unique" ids required to expand/collapse subsections
317 attrs_id = "attrs-" + str(uuid.uuid4())
318 data_id = "data-" + str(uuid.uuid4())
319 attrs_icon = _icon("icon-file-text2")
320 data_icon = _icon("icon-database")
322 return cssclass_aligned, attrs_id, attrs_icon, data_id, data_icon
325def summarize_variable(
326 name: str | None,
327 var: Variable | DataArray,
328 is_aligned: bool = False,
329 has_attrs: bool = False,
330 embedded_in: DataArray | Dataset | None = None,
331 add_dim_size: bool = False,
332) -> str:
333 """
334 Formats the variable data into the format expected when displaying
335 as a standalone variable (when a single variable or data array is
336 displayed) or as part of a dataset.
337 """
338 dims_str = "({})".format(
339 _make_dim_str(
340 var,
341 _find_bin_edges(var, embedded_in) if embedded_in is not None else None,
342 add_dim_size,
343 )
344 )
345 if var.unit is None:
346 unit = ''
347 else:
348 unit = '𝟙' if var.unit == sc.units.dimensionless else str(var.unit) # noqa: RUF001
350 disabled, attrs_ul = _make_inline_attributes(var, has_attrs, embedded_in)
352 preview = _make_row(inline_variable_repr(var))
353 data_repr = short_data_repr_html(var)
354 if var.bins is None:
355 data_repr = "Values:<br>" + data_repr
356 variances_preview = None
357 if var.variances is not None:
358 variances_preview = _make_row(inline_variable_repr(var, has_variances=True))
359 data_repr += f"<br><br>Variances ({VARIANCES_SYMBOL}):<br>\
360{short_data_repr_html(var, has_variances=True)}"
362 cssclass_aligned, attrs_id, attrs_icon, data_id, data_icon = _format_common(
363 is_aligned
364 )
366 if name is None:
367 html = [
368 f"<div class='sc-standalone-var-name'><span{cssclass_aligned}>"
369 f"{escape(dims_str)}</span></div>"
370 ]
371 else:
372 html = [
373 f"<div class='sc-var-name'><span{cssclass_aligned}>{escape(str(name))}"
374 "</span></div>",
375 f"<div class='sc-var-dims'>{escape(dims_str)}</div>",
376 ]
377 html += [
378 f"<div class='sc-var-dtype'>{escape(str(var.dtype))}</div>",
379 f"<div class='sc-var-unit'>{escape(unit)}</div>",
380 f"<div class='sc-value-preview sc-preview'><span>{preview}</span>",
381 "{}</div>".format(
382 f'<span>{variances_preview}</span>' if variances_preview is not None else ''
383 ),
384 f"<input id='{attrs_id}' class='sc-var-attrs-in' ",
385 f"type='checkbox' {disabled}>",
386 f"<label for='{attrs_id}' "
387 f"class='{'' if has_attrs else 'sc-hide-icon'}'"
388 " title='Show/Hide attributes'>",
389 f"{attrs_icon}</label>",
390 f"<input id='{data_id}' class='sc-var-data-in' type='checkbox'>",
391 f"<label for='{data_id}' title='Show/Hide data repr'>",
392 f"{data_icon}</label>",
393 f"<div class='sc-var-attrs'>{attrs_ul}</div>" if attrs_ul else "",
394 f"<pre class='sc-var-data'>{data_repr}</pre>",
395 ]
396 return "".join(html)
399def summarize_data(dataset: Mapping[str, DataArray] | Dataset) -> str:
400 if isinstance(dataset, Dataset):
401 has_attrs = True
402 embedded_in: Dataset | None = dataset
403 else:
404 has_attrs = False
405 embedded_in = None
406 vars_li = "".join(
407 "<li class='sc-var-item'>{}</li>".format(
408 summarize_variable(
409 name,
410 var.data,
411 has_attrs=has_attrs,
412 embedded_in=embedded_in,
413 )
414 )
415 for name, var in _ordered_dict(dataset).items()
416 )
417 return f"<ul class='sc-var-list'>{vars_li}</ul>"
420def collapsible_section(
421 name: str,
422 inline_details: str = "",
423 details: str = "",
424 n_items: int | None = None,
425 enabled: bool = True,
426 collapsed: bool = False,
427) -> str:
428 # "unique" id to expand/collapse the section
429 data_id = "section-" + str(uuid.uuid4())
431 has_items = n_items is not None and n_items
432 n_items_span = "" if n_items is None else f" <span>({n_items})</span>"
433 disabled = "" if enabled and has_items else "disabled"
434 checked = "" if collapsed or not has_items else "checked"
435 tip = " title='Expand/collapse section'" if enabled else ""
437 return (
438 f"<input id='{data_id}' class='sc-section-summary-in' "
439 f"type='checkbox' {disabled} {checked}>"
440 f"<label for='{data_id}' class='sc-section-summary' {tip}>"
441 f"{name}:{n_items_span}</label>"
442 f"<div class='sc-section-inline-details'>{inline_details}</div>"
443 f"<div class='sc-section-details'>{details}</div>"
444 )
447def _mapping_section(
448 mapping: DataArray | Mapping[str, Variable],
449 *extra_details_func_args: Any,
450 name: str,
451 max_items_collapse: int,
452 details_func: Callable[..., str],
453 enabled: bool = True,
454) -> str:
455 n_items = 1 if isinstance(mapping, DataArray) else len(mapping)
456 collapsed = n_items >= max_items_collapse
458 return collapsible_section(
459 name,
460 details=details_func(mapping, *extra_details_func_args),
461 n_items=n_items,
462 enabled=enabled,
463 collapsed=collapsed,
464 )
467def dim_section(ds: DataArray | Dataset) -> str:
468 coords = ds.coords if hasattr(ds, "coords") else {}
469 dim_list = format_dims(ds.dims, ds.shape, coords)
471 return collapsible_section(
472 "Dimensions", inline_details=dim_list, enabled=False, collapsed=True
473 )
476def summarize_array(var: Variable, is_variable: bool = False) -> str:
477 vars_li = "".join(
478 "<li class='sc-var-item'>"
479 f"{summarize_variable(None, var, add_dim_size=is_variable)}</li>"
480 )
481 return f"<ul class='sc-var-list'>{vars_li}</ul>"
484def variable_section(var: Variable) -> str:
485 return summarize_array(var, is_variable=True)
488coord_section = partial(
489 _mapping_section,
490 name="Coordinates",
491 details_func=summarize_coords,
492 max_items_collapse=25,
493)
495mask_section = partial(
496 _mapping_section, name="Masks", details_func=summarize_masks, max_items_collapse=10
497)
499data_section = partial(
500 _mapping_section,
501 name="Data",
502 details_func=summarize_data,
503 max_items_collapse=15,
504)
506attr_section = partial(
507 _mapping_section,
508 name="Attributes",
509 details_func=summarize_attrs,
510 max_items_collapse=10,
511)
514def _obj_repr(header_components: Iterable[str], sections: Iterable[str]) -> str:
515 header = f"<div class='sc-header'>" f"{''.join(h for h in header_components)}</div>"
516 sections = "".join(f"<li class='sc-section-item'>{s}</li>" for s in sections)
518 return (
519 "<div>"
520 f"{load_icons()}"
521 f"{load_style()}"
522 "<div class='sc-wrap sc-root'>"
523 f"{header}"
524 f"<ul class='sc-sections'>{sections}</ul>"
525 "</div>"
526 "</div>"
527 )
530def _format_size(obj: Variable | DataArray | Dataset) -> str:
531 view_size = obj.__sizeof__()
532 underlying_size = obj.underlying_size()
533 res = f"({human_readable_size(view_size)}"
534 if view_size != underlying_size:
535 res += (
536 " <span class='sc-underlying-size'>out of "
537 f"{human_readable_size(underlying_size)}</span>"
538 )
539 return res + ")"
542def data_array_dataset_repr(ds: DataArray | Dataset) -> str:
543 obj_type = f"scipp.{type(ds).__name__}"
544 header_components = [
545 f"<div class='sc-obj-type'>{escape(obj_type)} " + _format_size(ds) + "</div>"
546 ]
548 sections = [dim_section(ds)]
550 if len(ds.coords) > 0:
551 sections.append(coord_section(ds.coords, ds))
553 sections.append(data_section(ds if isinstance(ds, Dataset) else {'': ds}))
555 if not isinstance(ds, Dataset):
556 if len(ds.masks) > 0:
557 sections.append(mask_section(ds.masks, ds))
558 if len(ds.deprecated_attrs) > 0:
559 sections.append(attr_section(ds.deprecated_attrs, ds))
561 return _obj_repr(header_components, sections)
564def variable_repr(var: Variable) -> str:
565 obj_type = f"scipp.{type(var).__name__}"
567 header_components = [
568 f"<div class='sc-obj-type'>{escape(obj_type)} " + _format_size(var) + "</div>"
569 ]
571 sections = [variable_section(var)]
573 return _obj_repr(header_components, sections)
576def human_readable_size(size_in_bytes: int) -> str:
577 if size_in_bytes / (1024 * 1024 * 1024) > 1:
578 return f'{size_in_bytes/(1024*1024*1024):.2f} GB'
579 if size_in_bytes / (1024 * 1024) > 1:
580 return f'{size_in_bytes/(1024*1024):.2f} MB'
581 if size_in_bytes / (1024) > 1:
582 return f'{size_in_bytes/(1024):.2f} KB'
584 return f'{size_in_bytes} Bytes'