Coverage for install/scipp/compat/dict.py: 82%

84 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-12-01 01:59 +0000

1# SPDX-License-Identifier: BSD-3-Clause 

2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) 

3# @author Neil Vaytet 

4 

5from __future__ import annotations 

6 

7from collections import defaultdict 

8 

9import numpy as np 

10 

11from ..core import ( 

12 DataArray, 

13 Dataset, 

14 DType, 

15 Variable, 

16 vector, 

17 vectors, 

18) 

19from ..spatial import linear_transform, linear_transforms 

20from ..typing import VariableLike 

21 

22 

23def to_dict(scipp_obj: VariableLike) -> dict: 

24 """Convert a Scipp object (Variable, DataArray or Dataset) 

25 to a Python :class:`dict`. 

26 

27 Parameters 

28 ---------- 

29 scipp_obj: 

30 Scipp object to be converted to a python dict. 

31 

32 Returns 

33 ------- 

34 : 

35 A dict containing all the information necessary to fully define 

36 the supplied Scipp object. 

37 

38 See Also 

39 -------- 

40 scipp.from_dict 

41 """ 

42 if isinstance(scipp_obj, Variable): 

43 return _variable_to_dict(scipp_obj) 

44 elif isinstance(scipp_obj, DataArray): 

45 return _data_array_to_dict(scipp_obj) 

46 elif isinstance(scipp_obj, Dataset): 

47 # TODO: This currently duplicates all coordinates that would otherwise 

48 # be at the Dataset level onto the individual DataArrays. We are also 

49 # manually duplicating all attributes, since these are not carried when 

50 # accessing items of a Dataset. 

51 out = {} 

52 for name, item in scipp_obj.items(): 

53 out[name] = _data_array_to_dict(item) 

54 return out 

55 

56 

57def _vec_parser(x, shp): 

58 """Parse vector_3_float to 2D NumPy array.""" 

59 return np.array(x) 

60 

61 

62def _variable_to_dict(v): 

63 """Convert a Scipp Variable to a python dict.""" 

64 out = { 

65 "dims": _dims_to_strings(v.dims), 

66 "shape": v.shape, 

67 "unit": v.unit, 

68 "dtype": v.dtype, 

69 } 

70 if not v.aligned: 

71 out["aligned"] = False 

72 

73 # Use defaultdict to return the raw values/variances by default 

74 dtype_parser = defaultdict(lambda: lambda x, y: x) 

75 # Using raw dtypes as dict keys doesn't appear to work, so we need to 

76 # convert to strings. 

77 dtype_parser.update( 

78 { 

79 str(DType.vector3): _vec_parser, 

80 str(DType.linear_transform3): _vec_parser, 

81 str(DType.string): _vec_parser, 

82 } 

83 ) 

84 

85 str_dtype = str(v.dtype) 

86 

87 # Check if variable is 0D: 

88 suffix = "s" if len(out["dims"]) > 0 else "" 

89 out["values"] = dtype_parser[str_dtype](getattr(v, "value" + suffix), v.shape) 

90 var = getattr(v, "variance" + suffix) 

91 out["variances"] = ( 

92 dtype_parser[str_dtype](var, v.shape) if var is not None else None 

93 ) 

94 return out 

95 

96 

97def _data_array_to_dict(da): 

98 """Convert a Scipp DataArray to a python dict.""" 

99 out = {"coords": {}, "masks": {}, "attrs": {}} 

100 for key in out.keys(): 

101 for name, item in getattr(da, key).items(): 

102 out[key][str(name)] = _variable_to_dict(item) 

103 out['coords'] = out.pop('coords') 

104 out["data"] = _variable_to_dict(da.data) 

105 out["name"] = da.name 

106 return out 

107 

108 

109def _dims_to_strings(dims): 

110 """Convert dims that may or may not be strings to strings.""" 

111 return tuple(str(dim) for dim in dims) 

112 

113 

114def from_dict(dict_obj: dict) -> VariableLike: 

115 """Convert a Python dict to a Scipp Variable, DataArray or Dataset. 

116 

117 If the input keys contain both `'coords'` and `'data'`, then a DataArray is 

118 returned. 

119 If the input keys contain both `'dims'` and `'values'`, as Variable is 

120 returned. 

121 Otherwise, a Dataset is returned. 

122 

123 Parameters 

124 ---------- 

125 dict_obj: 

126 A python dict to be converted to a scipp object. 

127 

128 Returns 

129 ------- 

130 : 

131 A Scipp Variable, DataArray or Dataset. 

132 

133 See Also 

134 -------- 

135 scipp.to_dict 

136 """ 

137 keys_as_set = set(dict_obj.keys()) 

138 if {"coords", "data"}.issubset(keys_as_set): 

139 # Case of a DataArray-like dict (most-likely) 

140 return _dict_to_data_array(dict_obj) 

141 elif keys_as_set.issubset( 

142 {"dims", "values", "variances", "unit", "dtype", "shape", "aligned"} 

143 ): 

144 # Case of a Variable-like dict (most-likely) 

145 return _dict_to_variable(dict_obj) 

146 else: 

147 # Case of a Dataset-like dict 

148 out = Dataset( 

149 {key: _dict_to_data_array(item) for key, item in dict_obj.items()} 

150 ) 

151 return out 

152 

153 

154def _dict_to_variable(d): 

155 """Convert a Python dict to a Scipp Variable.""" 

156 d = dict(d) 

157 # The Variable constructor does not accept both `shape` and `values`. If 

158 # `values` is present, remove `shape` from the list. 

159 keylist = set(d.keys()) 

160 if "values" in keylist and "shape" in keylist: 

161 keylist.remove("shape") 

162 out = {} 

163 

164 for key in keylist: 

165 if key == "dtype" and isinstance(d[key], str): 

166 out[key] = getattr(DType, d[key]) 

167 else: 

168 out[key] = d[key] 

169 # Hack for types that cannot be directly constructed using Variable() 

170 if out['dims']: 

171 init = {'vector3': vectors, 'linear_transform3': linear_transforms} 

172 else: 

173 init = {'vector3': vector, 'linear_transform3': linear_transform} 

174 make_var = init.get(str(out.get('dtype', None)), Variable) 

175 if make_var != Variable: 

176 if not out['dims']: 

177 out['value'] = out['values'] 

178 del out['values'] 

179 del out['dims'] 

180 for key in ['dtype', 'variance', 'variances']: 

181 if key in out: 

182 del out[key] 

183 var = make_var(**out) 

184 return var 

185 

186 

187def _dict_to_data_array(d): 

188 """Convert a Python dict to a Scipp DataArray.""" 

189 d = dict(d) 

190 if "data" not in d: 

191 raise KeyError( 

192 "To create a DataArray, the supplied dict must contain " 

193 f"'data'. Got {d.keys()}." 

194 ) 

195 out = {"coords": {}, "masks": {}, "attrs": {}} 

196 for key in out.keys(): 

197 if key in d: 

198 for name, item in d[key].items(): 

199 out[key][name] = _dict_to_variable(item) 

200 out["data"] = _dict_to_variable(d["data"]) 

201 return DataArray(**out)