Coverage for install/scipp/visualization/formatting_html.py: 92%

246 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-11-17 01:51 +0000

1# SPDX-License-Identifier: BSD-3-Clause 

2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) 

3 

4# Original source from 

5# https://github.com/jsignell/xarray/blob/1d960933ab252e0f79f7e050e6c9261d55568057/xarray/core/formatting_html.py 

6 

7import collections 

8import operator 

9import uuid 

10from collections.abc import Callable, Iterable, Mapping, Sequence 

11from functools import partial, reduce 

12from html import escape as html_escape 

13from re import escape as re_escape 

14from typing import Any, TypeVar 

15 

16from .._scipp import core as sc 

17from ..core import Coords, DataArray, Dataset, Masks, Variable, stddevs 

18from ..utils import value_to_string 

19from .resources import load_icons, load_style 

20 

21BIN_EDGE_LABEL = "[bin-edge]" 

22STDDEV_PREFIX = "σ = " # noqa: RUF001 

23VARIANCES_SYMBOL = "σ²" 

24SPARSE_PREFIX = "len={}" 

25 

26_T = TypeVar('_T') 

27 

28 

29def escape(content: str) -> str: 

30 """ 

31 Escape dollar-sign($) as well as html special characters. 

32 Dollar-sign($) without any escape causes unexpected MathJax conversion 

33 in the Jupyter notebook. 

34 """ 

35 return html_escape(content).replace('$', re_escape('$')) 

36 

37 

38def _format_array(data: Sequence[Any], size: int, ellipsis_after: int) -> str: 

39 i = 0 

40 s = [] 

41 while i < size: 

42 if i == ellipsis_after and size > 2 * ellipsis_after + 1: 

43 s.append("...") 

44 i = size - ellipsis_after 

45 elem = data[i] 

46 if isinstance(elem, DataArray): 

47 dims = ', '.join(f'{dim}: {s}' for dim, s in elem.sizes.items()) 

48 coords = ', '.join(elem.coords) 

49 if elem.unit == sc.units.one: 

50 s.append(f'{{dims=[{dims}], coords=[{coords}]}}') 

51 else: 

52 s.append(f'{{dims=[{dims}], unit={elem.unit}, coords=[{coords}]}}') 

53 else: 

54 s.append(value_to_string(elem)) 

55 i += 1 

56 return escape(", ".join(s)) 

57 

58 

59def _make_row(data_html: object) -> str: 

60 return f"<div>{data_html}</div>" 

61 

62 

63def _format_non_events(var: Variable, has_variances: bool) -> str: 

64 size = reduce(operator.mul, var.shape, 1) 

65 if len(var.dims): 

66 var = sc.flatten(var, var.dims, 'ignored') 

67 if has_variances: 

68 data = stddevs(var).values 

69 else: 

70 data = var.values 

71 # avoid unintentional indexing into value of 0-D data 

72 if len(var.shape) == 0: 

73 data = [data] 

74 s = _format_array(data, size, ellipsis_after=2) 

75 if has_variances: 

76 s = f'{STDDEV_PREFIX}{s}' 

77 return s 

78 

79 

80def _repr_item(bin_dim: int, item: Variable) -> str: 

81 shape = item.shape[bin_dim] 

82 return SPARSE_PREFIX.format(shape) 

83 

84 

85def _get_events( 

86 var: Variable | DataArray, has_variances: bool, ellipsis_after: int 

87) -> list[str]: 

88 constituents = var.bins.constituents # type: ignore[union-attr] 

89 dim = constituents['dim'] 

90 dims = constituents['data'].dims 

91 bin_dim = dict(zip(dims, range(len(dims)), strict=True))[dim] 

92 s = [] 

93 if not isinstance(var.values, Variable | DataArray | Dataset): 

94 size = len(var.values) 

95 i = 0 

96 

97 data = retrieve(var, has_variances=has_variances) 

98 while i < size: 

99 if i == ellipsis_after and size > 2 * ellipsis_after + 1: 

100 s.append("...") 

101 i = size - ellipsis_after 

102 item = data[i] 

103 s.append(_repr_item(bin_dim, item)) 

104 i += 1 

105 else: 

106 s.append( 

107 _repr_item( 

108 bin_dim, 

109 var.value, 

110 ) 

111 ) 

112 return s 

113 

114 

115def _format_events(var: Variable | DataArray, has_variances: bool) -> str: 

116 s = _get_events(var, has_variances, ellipsis_after=2) 

117 return f'binned data [{", ".join(s)}]' 

118 

119 

120def _ordered_dict( 

121 data: Mapping[str, _T], 

122) -> collections.OrderedDict[str, _T]: 

123 data_ordered = collections.OrderedDict( 

124 sorted(data.items(), key=lambda t: str(t[0])) 

125 ) 

126 return data_ordered 

127 

128 

129def inline_variable_repr(var: Variable | DataArray, has_variances: bool = False) -> str: 

130 if var.bins is None: 

131 if isinstance(var, DataArray): 

132 return _format_non_events(var.data, has_variances) 

133 else: 

134 return _format_non_events(var, has_variances) 

135 else: 

136 return _format_events(var, has_variances) 

137 

138 

139def retrieve( 

140 var: Variable | DataArray, has_variances: bool = False, single: bool = False 

141) -> Any: 

142 if not has_variances: 

143 return var.value if single else var.values 

144 else: 

145 return var.variance if single else var.variances 

146 

147 

148def _short_data_repr_html_non_events( 

149 var: Variable | DataArray, has_variances: bool = False 

150) -> str: 

151 return repr(retrieve(var, has_variances)) 

152 

153 

154def _short_data_repr_html_events(var: Variable | DataArray) -> str: 

155 string = str(var.data) if isinstance(var, DataArray) else str(var) 

156 if isinstance(var.bins.constituents['data'], Dataset): # type: ignore[union-attr] 

157 return string 

158 start = 'binned data: ' 

159 ind = string.find(start) + len(start) 

160 return string[ind:].replace(', content=', ',\ncontent=') 

161 

162 

163def short_data_repr_html(var: Variable | DataArray, has_variances: bool = False) -> str: 

164 """Format "data" for DataArray and Variable.""" 

165 if var.bins is not None: 

166 data_repr = _short_data_repr_html_events(var) 

167 else: 

168 data_repr = _short_data_repr_html_non_events(var, has_variances) 

169 return escape(data_repr) 

170 

171 

172def format_dims( 

173 dims: Iterable[str] | None, 

174 sizes: Iterable[int | None], 

175 coords: Mapping[str, Variable], 

176) -> str: 

177 if dims is None: 

178 return "" 

179 

180 dim_css_map = { 

181 dim: " class='sc-has-index'" if dim in coords else "" for dim in dims 

182 } 

183 

184 dims_li = "".join( 

185 f"<li><span{dim_css_map[dim]}>" 

186 f"{escape(str(dim))}</span>: " 

187 f"{size if size is not None else 'Events' }</li>" 

188 for dim, size in zip(dims, sizes, strict=True) 

189 ) 

190 

191 return f"<ul class='sc-dim-list'>{dims_li}</ul>" 

192 

193 

194def _icon(icon_name: str) -> str: 

195 # icon_name is defined in icon-svg-inline.html 

196 return ( 

197 f"<svg class='icon sc-{icon_name}'><use xlink:href='#{icon_name}'></use></svg>" 

198 ) 

199 

200 

201def summarize_coord( 

202 dim: str, var: Variable, ds: DataArray | Dataset | None = None 

203) -> str: 

204 return summarize_variable(dim, var, is_aligned=var.aligned, embedded_in=ds) 

205 

206 

207def summarize_mask( 

208 dim: str, var: Variable, ds: DataArray | Dataset | None = None 

209) -> str: 

210 return summarize_variable(dim, var, is_aligned=False, embedded_in=ds) 

211 

212 

213def summarize_coords(coords: Coords, ds: DataArray | Dataset | None = None) -> str: 

214 vars_li = "".join( 

215 "<li class='sc-var-item'>" f"{summarize_coord(dim, var, ds)}" "</span></li>" 

216 for dim, var in _ordered_dict(coords).items() 

217 ) 

218 return f"<ul class='sc-var-list'>{vars_li}</ul>" 

219 

220 

221def summarize_masks(masks: Masks, ds: DataArray | Dataset | None = None) -> str: 

222 vars_li = "".join( 

223 "<li class='sc-var-item'>" f"{summarize_mask(dim, var, ds)}" "</span></li>" 

224 for dim, var in _ordered_dict(masks).items() 

225 ) 

226 return f"<ul class='sc-var-list'>{vars_li}</ul>" 

227 

228 

229def summarize_attrs( 

230 attrs: Coords, embedded_in: DataArray | Dataset | None = None 

231) -> str: 

232 attrs_li = "".join( 

233 "<li class='sc-var-item'>{}</li>".format( 

234 summarize_variable( 

235 name, 

236 var, 

237 has_attrs=False, 

238 embedded_in=embedded_in, 

239 is_aligned=False, 

240 ) 

241 ) 

242 for name, var in _ordered_dict(attrs).items() 

243 ) 

244 return f"<ul class='sc-var-list'>{attrs_li}</ul>" 

245 

246 

247def _find_bin_edges(var: Variable | DataArray, ds: DataArray | Dataset) -> list[str]: 

248 """ 

249 Checks if the coordinate contains bin-edges. 

250 """ 

251 return [ 

252 dim for dim, length in var.sizes.items() if ds.sizes.get(dim, 1) + 1 == length 

253 ] 

254 

255 

256def _make_inline_attributes( 

257 var: Variable | DataArray, has_attrs: bool, embedded_in: DataArray | Dataset | None 

258) -> tuple[str, str]: 

259 disabled = "disabled" 

260 attrs_ul = "" 

261 attrs_sections = [] 

262 

263 if has_attrs and hasattr(var, "masks"): 

264 if len(var.masks) > 0: 

265 attrs_sections.append(mask_section(var.masks)) 

266 disabled = "" 

267 

268 if has_attrs and hasattr(var, "deprecated_attrs"): 

269 if len(var.deprecated_attrs) > 0: 

270 attrs_sections.append(attr_section(var.deprecated_attrs, embedded_in)) 

271 disabled = "" 

272 

273 if len(attrs_sections) > 0: 

274 attrs_sections_str = "".join( 

275 f"<li class='sc-section-item sc-subsection'>{s}</li>" 

276 for s in attrs_sections 

277 ) 

278 attrs_ul = ( 

279 "<div class='sc-wrap'>" 

280 f"<ul class='sc-sections'>{attrs_sections_str}</ul>" 

281 "</div>" 

282 ) 

283 

284 return disabled, attrs_ul 

285 

286 

287def _make_dim_labels(dim: str, bin_edges: Sequence[str] | None = None) -> str: 

288 # Note: the space needs to be here, otherwise 

289 # there is a trailing whitespace when no dimension 

290 # label has been added 

291 if bin_edges and dim in bin_edges: 

292 return f" {BIN_EDGE_LABEL}" 

293 else: 

294 return "" 

295 

296 

297def _make_dim_str( 

298 var: Variable | DataArray, 

299 bin_edges: Sequence[str] | None, 

300 add_dim_size: bool = False, 

301) -> str: 

302 dims_text = ', '.join( 

303 '{}{}{}'.format( 

304 str(dim), 

305 _make_dim_labels(dim, bin_edges), 

306 f': {size}' if add_dim_size and size is not None else '', 

307 ) 

308 for dim, size in zip(var.dims, var.shape, strict=True) 

309 ) 

310 return dims_text 

311 

312 

313def _format_common(is_index: bool) -> tuple[str, str, str, str, str]: 

314 cssclass_aligned = " class='sc-aligned'" if is_index else "" 

315 

316 # "unique" ids required to expand/collapse subsections 

317 attrs_id = "attrs-" + str(uuid.uuid4()) 

318 data_id = "data-" + str(uuid.uuid4()) 

319 attrs_icon = _icon("icon-file-text2") 

320 data_icon = _icon("icon-database") 

321 

322 return cssclass_aligned, attrs_id, attrs_icon, data_id, data_icon 

323 

324 

325def summarize_variable( 

326 name: str | None, 

327 var: Variable | DataArray, 

328 is_aligned: bool = False, 

329 has_attrs: bool = False, 

330 embedded_in: DataArray | Dataset | None = None, 

331 add_dim_size: bool = False, 

332) -> str: 

333 """ 

334 Formats the variable data into the format expected when displaying 

335 as a standalone variable (when a single variable or data array is 

336 displayed) or as part of a dataset. 

337 """ 

338 dims_str = "({})".format( 

339 _make_dim_str( 

340 var, 

341 _find_bin_edges(var, embedded_in) if embedded_in is not None else None, 

342 add_dim_size, 

343 ) 

344 ) 

345 if var.unit is None: 

346 unit = '' 

347 else: 

348 unit = '𝟙' if var.unit == sc.units.dimensionless else str(var.unit) # noqa: RUF001 

349 

350 disabled, attrs_ul = _make_inline_attributes(var, has_attrs, embedded_in) 

351 

352 preview = _make_row(inline_variable_repr(var)) 

353 data_repr = short_data_repr_html(var) 

354 if var.bins is None: 

355 data_repr = "Values:<br>" + data_repr 

356 variances_preview = None 

357 if var.variances is not None: 

358 variances_preview = _make_row(inline_variable_repr(var, has_variances=True)) 

359 data_repr += f"<br><br>Variances ({VARIANCES_SYMBOL}):<br>\ 

360{short_data_repr_html(var, has_variances=True)}" 

361 

362 cssclass_aligned, attrs_id, attrs_icon, data_id, data_icon = _format_common( 

363 is_aligned 

364 ) 

365 

366 if name is None: 

367 html = [ 

368 f"<div class='sc-standalone-var-name'><span{cssclass_aligned}>" 

369 f"{escape(dims_str)}</span></div>" 

370 ] 

371 else: 

372 html = [ 

373 f"<div class='sc-var-name'><span{cssclass_aligned}>{escape(str(name))}" 

374 "</span></div>", 

375 f"<div class='sc-var-dims'>{escape(dims_str)}</div>", 

376 ] 

377 html += [ 

378 f"<div class='sc-var-dtype'>{escape(str(var.dtype))}</div>", 

379 f"<div class='sc-var-unit'>{escape(unit)}</div>", 

380 f"<div class='sc-value-preview sc-preview'><span>{preview}</span>", 

381 "{}</div>".format( 

382 f'<span>{variances_preview}</span>' if variances_preview is not None else '' 

383 ), 

384 f"<input id='{attrs_id}' class='sc-var-attrs-in' ", 

385 f"type='checkbox' {disabled}>", 

386 f"<label for='{attrs_id}' " 

387 f"class='{'' if has_attrs else 'sc-hide-icon'}'" 

388 " title='Show/Hide attributes'>", 

389 f"{attrs_icon}</label>", 

390 f"<input id='{data_id}' class='sc-var-data-in' type='checkbox'>", 

391 f"<label for='{data_id}' title='Show/Hide data repr'>", 

392 f"{data_icon}</label>", 

393 f"<div class='sc-var-attrs'>{attrs_ul}</div>" if attrs_ul else "", 

394 f"<pre class='sc-var-data'>{data_repr}</pre>", 

395 ] 

396 return "".join(html) 

397 

398 

399def summarize_data(dataset: Mapping[str, DataArray] | Dataset) -> str: 

400 if isinstance(dataset, Dataset): 

401 has_attrs = True 

402 embedded_in: Dataset | None = dataset 

403 else: 

404 has_attrs = False 

405 embedded_in = None 

406 vars_li = "".join( 

407 "<li class='sc-var-item'>{}</li>".format( 

408 summarize_variable( 

409 name, 

410 var.data, 

411 has_attrs=has_attrs, 

412 embedded_in=embedded_in, 

413 ) 

414 ) 

415 for name, var in _ordered_dict(dataset).items() 

416 ) 

417 return f"<ul class='sc-var-list'>{vars_li}</ul>" 

418 

419 

420def collapsible_section( 

421 name: str, 

422 inline_details: str = "", 

423 details: str = "", 

424 n_items: int | None = None, 

425 enabled: bool = True, 

426 collapsed: bool = False, 

427) -> str: 

428 # "unique" id to expand/collapse the section 

429 data_id = "section-" + str(uuid.uuid4()) 

430 

431 has_items = n_items is not None and n_items 

432 n_items_span = "" if n_items is None else f" <span>({n_items})</span>" 

433 disabled = "" if enabled and has_items else "disabled" 

434 checked = "" if collapsed or not has_items else "checked" 

435 tip = " title='Expand/collapse section'" if enabled else "" 

436 

437 return ( 

438 f"<input id='{data_id}' class='sc-section-summary-in' " 

439 f"type='checkbox' {disabled} {checked}>" 

440 f"<label for='{data_id}' class='sc-section-summary' {tip}>" 

441 f"{name}:{n_items_span}</label>" 

442 f"<div class='sc-section-inline-details'>{inline_details}</div>" 

443 f"<div class='sc-section-details'>{details}</div>" 

444 ) 

445 

446 

447def _mapping_section( 

448 mapping: DataArray | Mapping[str, Variable], 

449 *extra_details_func_args: Any, 

450 name: str, 

451 max_items_collapse: int, 

452 details_func: Callable[..., str], 

453 enabled: bool = True, 

454) -> str: 

455 n_items = 1 if isinstance(mapping, DataArray) else len(mapping) 

456 collapsed = n_items >= max_items_collapse 

457 

458 return collapsible_section( 

459 name, 

460 details=details_func(mapping, *extra_details_func_args), 

461 n_items=n_items, 

462 enabled=enabled, 

463 collapsed=collapsed, 

464 ) 

465 

466 

467def dim_section(ds: DataArray | Dataset) -> str: 

468 coords = ds.coords if hasattr(ds, "coords") else {} 

469 dim_list = format_dims(ds.dims, ds.shape, coords) 

470 

471 return collapsible_section( 

472 "Dimensions", inline_details=dim_list, enabled=False, collapsed=True 

473 ) 

474 

475 

476def summarize_array(var: Variable, is_variable: bool = False) -> str: 

477 vars_li = "".join( 

478 "<li class='sc-var-item'>" 

479 f"{summarize_variable(None, var, add_dim_size=is_variable)}</li>" 

480 ) 

481 return f"<ul class='sc-var-list'>{vars_li}</ul>" 

482 

483 

484def variable_section(var: Variable) -> str: 

485 return summarize_array(var, is_variable=True) 

486 

487 

488coord_section = partial( 

489 _mapping_section, 

490 name="Coordinates", 

491 details_func=summarize_coords, 

492 max_items_collapse=25, 

493) 

494 

495mask_section = partial( 

496 _mapping_section, name="Masks", details_func=summarize_masks, max_items_collapse=10 

497) 

498 

499data_section = partial( 

500 _mapping_section, 

501 name="Data", 

502 details_func=summarize_data, 

503 max_items_collapse=15, 

504) 

505 

506attr_section = partial( 

507 _mapping_section, 

508 name="Attributes", 

509 details_func=summarize_attrs, 

510 max_items_collapse=10, 

511) 

512 

513 

514def _obj_repr(header_components: Iterable[str], sections: Iterable[str]) -> str: 

515 header = f"<div class='sc-header'>" f"{''.join(h for h in header_components)}</div>" 

516 sections = "".join(f"<li class='sc-section-item'>{s}</li>" for s in sections) 

517 

518 return ( 

519 "<div>" 

520 f"{load_icons()}" 

521 f"{load_style()}" 

522 "<div class='sc-wrap sc-root'>" 

523 f"{header}" 

524 f"<ul class='sc-sections'>{sections}</ul>" 

525 "</div>" 

526 "</div>" 

527 ) 

528 

529 

530def _format_size(obj: Variable | DataArray | Dataset) -> str: 

531 view_size = obj.__sizeof__() 

532 underlying_size = obj.underlying_size() 

533 res = f"({human_readable_size(view_size)}" 

534 if view_size != underlying_size: 

535 res += ( 

536 " <span class='sc-underlying-size'>out of " 

537 f"{human_readable_size(underlying_size)}</span>" 

538 ) 

539 return res + ")" 

540 

541 

542def data_array_dataset_repr(ds: DataArray | Dataset) -> str: 

543 obj_type = f"scipp.{type(ds).__name__}" 

544 header_components = [ 

545 f"<div class='sc-obj-type'>{escape(obj_type)} " + _format_size(ds) + "</div>" 

546 ] 

547 

548 sections = [dim_section(ds)] 

549 

550 if len(ds.coords) > 0: 

551 sections.append(coord_section(ds.coords, ds)) 

552 

553 sections.append(data_section(ds if isinstance(ds, Dataset) else {'': ds})) 

554 

555 if not isinstance(ds, Dataset): 

556 if len(ds.masks) > 0: 

557 sections.append(mask_section(ds.masks, ds)) 

558 if len(ds.deprecated_attrs) > 0: 

559 sections.append(attr_section(ds.deprecated_attrs, ds)) 

560 

561 return _obj_repr(header_components, sections) 

562 

563 

564def variable_repr(var: Variable) -> str: 

565 obj_type = f"scipp.{type(var).__name__}" 

566 

567 header_components = [ 

568 f"<div class='sc-obj-type'>{escape(obj_type)} " + _format_size(var) + "</div>" 

569 ] 

570 

571 sections = [variable_section(var)] 

572 

573 return _obj_repr(header_components, sections) 

574 

575 

576def human_readable_size(size_in_bytes: int) -> str: 

577 if size_in_bytes / (1024 * 1024 * 1024) > 1: 

578 return f'{size_in_bytes/(1024*1024*1024):.2f} GB' 

579 if size_in_bytes / (1024 * 1024) > 1: 

580 return f'{size_in_bytes/(1024*1024):.2f} MB' 

581 if size_in_bytes / (1024) > 1: 

582 return f'{size_in_bytes/(1024):.2f} KB' 

583 

584 return f'{size_in_bytes} Bytes'