Coverage for install/scipp/visualization/formatting_html.py: 92%

238 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-04-28 01:28 +0000

1# SPDX-License-Identifier: BSD-3-Clause 

2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) 

3 

4# Original source from 

5# https://github.com/jsignell/xarray/blob/1d960933ab252e0f79f7e050e6c9261d55568057/xarray/core/formatting_html.py 

6 

7import collections 

8import operator 

9import uuid 

10from functools import partial, reduce 

11from html import escape as html_escape 

12from re import escape as re_escape 

13 

14from .._scipp import core as sc 

15from ..core import stddevs 

16from ..utils import value_to_string 

17from .resources import load_icons, load_style 

18 

19BIN_EDGE_LABEL = "[bin-edge]" 

20STDDEV_PREFIX = "σ = " # noqa: RUF001 

21VARIANCES_SYMBOL = "σ²" 

22SPARSE_PREFIX = "len={}" 

23 

24 

25def escape(content: str) -> str: 

26 """ 

27 Escape dollar-sign($) as well as html special characters. 

28 Dollar-sign($) without any escape causes unexpected MathJax conversion 

29 in the Jupyter notebook. 

30 """ 

31 return html_escape(content).replace('$', re_escape('$')) 

32 

33 

34def _format_array(data, size, ellipsis_after): 

35 i = 0 

36 s = [] 

37 while i < size: 

38 if i == ellipsis_after and size > 2 * ellipsis_after + 1: 

39 s.append("...") 

40 i = size - ellipsis_after 

41 elem = data[i] 

42 if isinstance(elem, sc.DataArray): 

43 dims = ', '.join(f'{dim}: {s}' for dim, s in elem.sizes.items()) 

44 coords = ', '.join(elem.coords) 

45 if elem.unit == sc.units.one: 

46 s.append(f'{{dims=[{dims}], coords=[{coords}]}}') 

47 else: 

48 s.append(f'{{dims=[{dims}], unit={elem.unit}, coords=[{coords}]}}') 

49 else: 

50 s.append(value_to_string(elem)) 

51 i += 1 

52 return escape(", ".join(s)) 

53 

54 

55def _make_row(data_html): 

56 return f"<div>{data_html}</div>" 

57 

58 

59def _format_non_events(var, has_variances): 

60 size = reduce(operator.mul, var.shape, 1) 

61 if len(var.dims): 

62 var = sc.flatten(var, var.dims, 'ignored') 

63 if has_variances: 

64 data = stddevs(var).values 

65 else: 

66 data = var.values 

67 # avoid unintentional indexing into value of 0-D data 

68 if len(var.shape) == 0: 

69 data = [data] 

70 s = _format_array(data, size, ellipsis_after=2) 

71 if has_variances: 

72 s = f'{STDDEV_PREFIX}{s}' 

73 return s 

74 

75 

76def _repr_item(bin_dim, item): 

77 shape = item.shape[bin_dim] 

78 return SPARSE_PREFIX.format(shape) 

79 

80 

81def _get_events(var, variances, ellipsis_after): 

82 dim = var.bins.constituents['dim'] 

83 dims = var.bins.constituents['data'].dims 

84 bin_dim = dict(zip(dims, range(len(dims))))[dim] 

85 s = [] 

86 if not isinstance(var.values, (sc.Variable, sc.DataArray, sc.Dataset)): 

87 size = len(var.values) 

88 i = 0 

89 

90 data = retrieve(var, variances=variances) 

91 while i < size: 

92 if i == ellipsis_after and size > 2 * ellipsis_after + 1: 

93 s.append("...") 

94 i = size - ellipsis_after 

95 item = data[i] 

96 s.append(_repr_item(bin_dim, item)) 

97 i += 1 

98 else: 

99 s.append( 

100 _repr_item( 

101 bin_dim, 

102 var.value, 

103 ) 

104 ) 

105 return s 

106 

107 

108def _format_events(var, has_variances): 

109 s = _get_events(var, has_variances, ellipsis_after=2) 

110 return f'binned data [{", ".join(s)}]' 

111 

112 

113def _ordered_dict(data): 

114 data_ordered = collections.OrderedDict( 

115 sorted(data.items(), key=lambda t: str(t[0])) 

116 ) 

117 return data_ordered 

118 

119 

120def inline_variable_repr(var, has_variances=False): 

121 if var.bins is None: 

122 if isinstance(var, sc.DataArray): 

123 return _format_non_events(var.data, has_variances) 

124 else: 

125 return _format_non_events(var, has_variances) 

126 else: 

127 return _format_events(var, has_variances) 

128 

129 

130def retrieve(var, variances=False, single=False): 

131 if not variances: 

132 return var.value if single else var.values 

133 else: 

134 return var.variance if single else var.variances 

135 

136 

137def _short_data_repr_html_non_events(var, variances=False): 

138 return repr(retrieve(var, variances)) 

139 

140 

141def _short_data_repr_html_events(var): 

142 string = str(var.data) if isinstance(var, sc.DataArray) else str(var) 

143 if isinstance(var.bins.constituents['data'], sc.Dataset): 

144 return string 

145 start = 'binned data: ' 

146 ind = string.find(start) + len(start) 

147 return string[ind:].replace(', content=', ',\ncontent=') 

148 

149 

150def short_data_repr_html(var, variances=False): 

151 """Format "data" for DataArray and Variable.""" 

152 if var.bins is not None: 

153 data_repr = _short_data_repr_html_events(var) 

154 else: 

155 data_repr = _short_data_repr_html_non_events(var, variances) 

156 return escape(data_repr) 

157 

158 

159def format_dims(dims, sizes, coords): 

160 if not dims: 

161 return "" 

162 

163 dim_css_map = { 

164 dim: " class='sc-has-index'" if dim in coords else "" for dim in dims 

165 } 

166 

167 dims_li = "".join( 

168 f"<li><span{dim_css_map[dim]}>" 

169 f"{escape(str(dim))}</span>: " 

170 f"{size if size is not None else 'Events' }</li>" 

171 for dim, size in zip(dims, sizes) 

172 ) 

173 

174 return f"<ul class='sc-dim-list'>{dims_li}</ul>" 

175 

176 

177def _icon(icon_name): 

178 # icon_name is defined in icon-svg-inline.html 

179 return ( 

180 "<svg class='icon sc-{0}'>" "<use xlink:href='#{0}'>" "</use>" "</svg>".format( 

181 icon_name 

182 ) 

183 ) 

184 

185 

186def summarize_coord(dim, var, ds=None): 

187 return summarize_variable(str(dim), var, is_aligned=var.aligned, embedded_in=ds) 

188 

189 

190def summarize_mask(dim, var, ds=None): 

191 return summarize_variable(str(dim), var, is_aligned=False, embedded_in=ds) 

192 

193 

194def summarize_coords(coords, ds=None): 

195 vars_li = "".join( 

196 "<li class='sc-var-item'>" f"{summarize_coord(dim, var, ds)}" "</span></li>" 

197 for dim, var in _ordered_dict(coords).items() 

198 ) 

199 return f"<ul class='sc-var-list'>{vars_li}</ul>" 

200 

201 

202def summarize_masks(masks, ds=None): 

203 vars_li = "".join( 

204 "<li class='sc-var-item'>" f"{summarize_mask(dim, var, ds)}" "</span></li>" 

205 for dim, var in _ordered_dict(masks).items() 

206 ) 

207 return f"<ul class='sc-var-list'>{vars_li}</ul>" 

208 

209 

210def summarize_attrs(attrs, embedded_in=None): 

211 attrs_li = "".join( 

212 "<li class='sc-var-item'>{}</li>".format( 

213 summarize_variable( 

214 name, 

215 var, 

216 has_attrs=False, 

217 embedded_in=embedded_in, 

218 is_aligned=False, 

219 ) 

220 ) 

221 for name, var in _ordered_dict(attrs).items() 

222 ) 

223 return f"<ul class='sc-var-list'>{attrs_li}</ul>" 

224 

225 

226def _find_bin_edges(var, ds): 

227 """ 

228 Checks if the coordinate contains bin-edges. 

229 """ 

230 return [ 

231 dim for dim, length in var.sizes.items() if ds.sizes.get(dim, 1) + 1 == length 

232 ] 

233 

234 

235def _make_inline_attributes(var, has_attrs, embedded_in): 

236 disabled = "disabled" 

237 attrs_ul = "" 

238 attrs_sections = [] 

239 

240 if has_attrs and hasattr(var, "masks"): 

241 if len(var.masks) > 0: 

242 attrs_sections.append(mask_section(var.masks)) 

243 disabled = "" 

244 

245 if has_attrs and hasattr(var, "deprecated_attrs"): 

246 if len(var.attrs) > 0: 

247 attrs_sections.append(attr_section(var.deprecated_attrs, embedded_in)) 

248 disabled = "" 

249 

250 if len(attrs_sections) > 0: 

251 attrs_sections = "".join( 

252 f"<li class='sc-section-item sc-subsection'>{s}</li>" 

253 for s in attrs_sections 

254 ) 

255 attrs_ul = ( 

256 "<div class='sc-wrap'>" 

257 f"<ul class='sc-sections'>{attrs_sections}</ul>" 

258 "</div>" 

259 ) 

260 

261 return disabled, attrs_ul 

262 

263 

264def _make_dim_labels(dim, bin_edges=None): 

265 # Note: the space needs to be here, otherwise 

266 # there is a trailing whitespace when no dimension 

267 # label has been added 

268 if bin_edges and dim in bin_edges: 

269 return f" {BIN_EDGE_LABEL}" 

270 else: 

271 return "" 

272 

273 

274def _make_dim_str(var, bin_edges, add_dim_size=False): 

275 dims_text = ', '.join( 

276 '{}{}{}'.format( 

277 str(dim), 

278 _make_dim_labels(dim, bin_edges), 

279 f': {size}' if add_dim_size and size is not None else '', 

280 ) 

281 for dim, size in zip(var.dims, var.shape) 

282 ) 

283 return dims_text 

284 

285 

286def _format_common(is_index): 

287 cssclass_aligned = " class='sc-aligned'" if is_index else "" 

288 

289 # "unique" ids required to expand/collapse subsections 

290 attrs_id = "attrs-" + str(uuid.uuid4()) 

291 data_id = "data-" + str(uuid.uuid4()) 

292 attrs_icon = _icon("icon-file-text2") 

293 data_icon = _icon("icon-database") 

294 

295 return cssclass_aligned, attrs_id, attrs_icon, data_id, data_icon 

296 

297 

298def summarize_variable( 

299 name, var, is_aligned=False, has_attrs=False, embedded_in=None, add_dim_size=False 

300): 

301 """ 

302 Formats the variable data into the format expected when displaying 

303 as a standalone variable (when a single variable or data array is 

304 displayed) or as part of a dataset. 

305 """ 

306 dims_str = "({})".format( 

307 _make_dim_str( 

308 var, 

309 _find_bin_edges(var, embedded_in) if embedded_in is not None else None, 

310 add_dim_size, 

311 ) 

312 ) 

313 if var.unit is None: 

314 unit = '' 

315 else: 

316 unit = '𝟙' if var.unit == sc.units.dimensionless else str(var.unit) # noqa: RUF001 

317 

318 disabled, attrs_ul = _make_inline_attributes(var, has_attrs, embedded_in) 

319 

320 preview = _make_row(inline_variable_repr(var)) 

321 data_repr = short_data_repr_html(var) 

322 if var.bins is None: 

323 data_repr = "Values:<br>" + data_repr 

324 variances_preview = None 

325 if var.variances is not None: 

326 variances_preview = _make_row(inline_variable_repr(var, has_variances=True)) 

327 data_repr += f"<br><br>Variances ({VARIANCES_SYMBOL}):<br>\ 

328{short_data_repr_html(var, variances=True)}" 

329 

330 cssclass_aligned, attrs_id, attrs_icon, data_id, data_icon = _format_common( 

331 is_aligned 

332 ) 

333 

334 if name is None: 

335 html = [ 

336 f"<div class='sc-standalone-var-name'><span{cssclass_aligned}>" 

337 f"{escape(dims_str)}</span></div>" 

338 ] 

339 else: 

340 html = [ 

341 f"<div class='sc-var-name'><span{cssclass_aligned}>{escape(str(name))}" 

342 "</span></div>", 

343 f"<div class='sc-var-dims'>{escape(dims_str)}</div>", 

344 ] 

345 html += [ 

346 f"<div class='sc-var-dtype'>{escape(str(var.dtype))}</div>", 

347 f"<div class='sc-var-unit'>{escape(unit)}</div>", 

348 f"<div class='sc-value-preview sc-preview'><span>{preview}</span>", 

349 "{}</div>".format( 

350 f'<span>{variances_preview}</span>' if variances_preview is not None else '' 

351 ), 

352 f"<input id='{attrs_id}' class='sc-var-attrs-in' ", 

353 f"type='checkbox' {disabled}>", 

354 f"<label for='{attrs_id}' " 

355 f"class='{'' if has_attrs else 'sc-hide-icon'}'" 

356 " title='Show/Hide attributes'>", 

357 f"{attrs_icon}</label>", 

358 f"<input id='{data_id}' class='sc-var-data-in' type='checkbox'>", 

359 f"<label for='{data_id}' title='Show/Hide data repr'>", 

360 f"{data_icon}</label>", 

361 f"<div class='sc-var-attrs'>{attrs_ul}</div>" if attrs_ul else "", 

362 f"<pre class='sc-var-data'>{data_repr}</pre>", 

363 ] 

364 return "".join(html) 

365 

366 

367def summarize_data(dataset): 

368 has_attrs = isinstance(dataset, sc.Dataset) 

369 vars_li = "".join( 

370 "<li class='sc-var-item'>{}</li>".format( 

371 summarize_variable( 

372 name, 

373 var.data, 

374 has_attrs=has_attrs, 

375 embedded_in=dataset if has_attrs else None, 

376 ) 

377 ) 

378 for name, var in _ordered_dict(dataset).items() 

379 ) 

380 return f"<ul class='sc-var-list'>{vars_li}</ul>" 

381 

382 

383def collapsible_section( 

384 name, inline_details="", details="", n_items=None, enabled=True, collapsed=False 

385): 

386 # "unique" id to expand/collapse the section 

387 data_id = "section-" + str(uuid.uuid4()) 

388 

389 has_items = n_items is not None and n_items 

390 n_items_span = "" if n_items is None else f" <span>({n_items})</span>" 

391 enabled = "" if enabled and has_items else "disabled" 

392 collapsed = "" if collapsed or not has_items else "checked" 

393 tip = " title='Expand/collapse section'" if enabled else "" 

394 

395 return ( 

396 f"<input id='{data_id}' class='sc-section-summary-in' " 

397 f"type='checkbox' {enabled} {collapsed}>" 

398 f"<label for='{data_id}' class='sc-section-summary' {tip}>" 

399 f"{name}:{n_items_span}</label>" 

400 f"<div class='sc-section-inline-details'>{inline_details}</div>" 

401 f"<div class='sc-section-details'>{details}</div>" 

402 ) 

403 

404 

405def _mapping_section( 

406 mapping, 

407 *extra_details_func_args, 

408 name=None, 

409 details_func=None, 

410 max_items_collapse=None, 

411 enabled=True, 

412): 

413 n_items = 1 if isinstance(mapping, sc.DataArray) else len(mapping) 

414 collapsed = n_items >= max_items_collapse 

415 

416 return collapsible_section( 

417 name, 

418 details=details_func(mapping, *extra_details_func_args), 

419 n_items=n_items, 

420 enabled=enabled, 

421 collapsed=collapsed, 

422 ) 

423 

424 

425def dim_section(dataset): 

426 coords = dataset.coords if hasattr(dataset, "coords") else {} 

427 dim_list = format_dims(dataset.dims, dataset.shape, coords) 

428 

429 return collapsible_section( 

430 "Dimensions", inline_details=dim_list, enabled=False, collapsed=True 

431 ) 

432 

433 

434def summarize_array(var, is_variable=False): 

435 vars_li = "".join( 

436 "<li class='sc-var-item'>" 

437 f"{summarize_variable(None, var, add_dim_size=is_variable)}</li>" 

438 ) 

439 return f"<ul class='sc-var-list'>{vars_li}</ul>" 

440 

441 

442def variable_section(var): 

443 return summarize_array(var, is_variable=True) 

444 

445 

446coord_section = partial( 

447 _mapping_section, 

448 name="Coordinates", 

449 details_func=summarize_coords, 

450 max_items_collapse=25, 

451) 

452 

453mask_section = partial( 

454 _mapping_section, name="Masks", details_func=summarize_masks, max_items_collapse=10 

455) 

456 

457data_section = partial( 

458 _mapping_section, 

459 name="Data", 

460 details_func=summarize_data, 

461 max_items_collapse=15, 

462) 

463 

464attr_section = partial( 

465 _mapping_section, 

466 name="Attributes", 

467 details_func=summarize_attrs, 

468 max_items_collapse=10, 

469) 

470 

471 

472def _obj_repr(header_components, sections): 

473 header = f"<div class='sc-header'>" f"{''.join(h for h in header_components)}</div>" 

474 sections = "".join(f"<li class='sc-section-item'>{s}</li>" for s in sections) 

475 

476 return ( 

477 "<div>" 

478 f"{load_icons()}" 

479 f"{load_style()}" 

480 "<div class='sc-wrap sc-root'>" 

481 f"{header}" 

482 f"<ul class='sc-sections'>{sections}</ul>" 

483 "</div>" 

484 "</div>" 

485 ) 

486 

487 

488def _format_size(obj): 

489 view_size = obj.__sizeof__() 

490 underlying_size = obj.underlying_size() 

491 res = f"({human_readable_size(view_size)}" 

492 if view_size != underlying_size: 

493 res += ( 

494 " <span class='sc-underlying-size'>out of " 

495 f"{human_readable_size(underlying_size)}</span>" 

496 ) 

497 return res + ")" 

498 

499 

500def dataset_repr(ds): 

501 obj_type = "scipp.{}".format(type(ds).__name__) 

502 header_components = [ 

503 f"<div class='sc-obj-type'>{escape(obj_type)} " + _format_size(ds) + "</div>" 

504 ] 

505 

506 sections = [dim_section(ds)] 

507 

508 if len(ds.coords) > 0: 

509 sections.append(coord_section(ds.coords, ds)) 

510 

511 sections.append(data_section(ds if isinstance(ds, sc.Dataset) else {'': ds})) 

512 

513 if not isinstance(ds, sc.Dataset): 

514 if len(ds.masks) > 0: 

515 sections.append(mask_section(ds.masks, ds)) 

516 if len(ds.deprecated_attrs) > 0: 

517 sections.append(attr_section(ds.deprecated_attrs, ds)) 

518 

519 return _obj_repr(header_components, sections) 

520 

521 

522def variable_repr(var): 

523 obj_type = "scipp.{}".format(type(var).__name__) 

524 

525 header_components = [ 

526 f"<div class='sc-obj-type'>{escape(obj_type)} " + _format_size(var) + "</div>" 

527 ] 

528 

529 sections = [variable_section(var)] 

530 

531 return _obj_repr(header_components, sections) 

532 

533 

534def human_readable_size(size_in_bytes): 

535 if size_in_bytes / (1024 * 1024 * 1024) > 1: 

536 return f'{size_in_bytes/(1024*1024*1024):.2f} GB' 

537 if size_in_bytes / (1024 * 1024) > 1: 

538 return f'{size_in_bytes/(1024*1024):.2f} MB' 

539 if size_in_bytes / (1024) > 1: 

540 return f'{size_in_bytes/(1024):.2f} KB' 

541 

542 return f'{size_in_bytes} Bytes'