Coverage for install/scipp/testing/strategies.py: 100%

99 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-04-28 01:28 +0000

1# SPDX-License-Identifier: BSD-3-Clause 

2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) 

3# @author Jan-Lukas Wynen 

4""" 

5Search strategies for hypothesis to generate inputs for tests. 

6""" 

7 

8from functools import partial 

9from typing import Any, Callable, Dict, Optional, Sequence, Union 

10 

11import numpy as np 

12from hypothesis import strategies as st 

13from hypothesis.core import Ex 

14from hypothesis.errors import InvalidArgument 

15from hypothesis.extra import numpy as npst 

16 

17from ..core import DataArray, DType, Unit, Variable 

18from ..core import variable as creation 

19 

20 

21def dims() -> st.SearchStrategy: 

22 # Allowing all graphic utf-8 characters and control characters 

23 # except NULL, which causes problems in C and C++ code (e.g. HDF5). 

24 return st.text( 

25 st.characters( 

26 whitelist_categories=['L', 'M', 'N', 'P', 'S', 'Zs', 'Cc'], 

27 blacklist_characters='\0', 

28 ), 

29 min_size=0, 

30 max_size=50, 

31 ) 

32 

33 

34def sizes_dicts( 

35 ndim: Optional[Union[int, st.SearchStrategy]] = None, 

36) -> st.SearchStrategy: 

37 if isinstance(ndim, st.SearchStrategy): 

38 return ndim.flatmap(lambda n: sizes_dicts(ndim=n)) 

39 keys = dims() 

40 values = st.integers(min_value=1, max_value=10) 

41 if ndim is None: 

42 # The constructor of sc.Variable in Python only supports 

43 # arrays with <= 4 dimensions. 

44 return st.dictionaries(keys=keys, values=values, min_size=0, max_size=4) 

45 return st.dictionaries(keys=keys, values=values, min_size=ndim, max_size=ndim) 

46 

47 

48def units() -> st.SearchStrategy: 

49 return st.sampled_from(('one', 'm', 'kg', 's', 'A', 'K', 'count')) 

50 

51 

52def integer_dtypes(sizes: Sequence[int] = (32, 64)) -> st.SearchStrategy: 

53 return st.sampled_from([f'int{size}' for size in sizes]) 

54 

55 

56def floating_dtypes(sizes: Sequence[int] = (32, 64)) -> st.SearchStrategy: 

57 return st.sampled_from([f'float{size}' for size in sizes]) 

58 

59 

60def scalar_numeric_dtypes() -> st.SearchStrategy: 

61 return st.sampled_from((integer_dtypes, floating_dtypes)).flatmap(lambda f: f()) 

62 

63 

64def _variables_from_fixed_args(args: dict) -> st.SearchStrategy: 

65 def make_array(variances: bool): 

66 elements = args['elements'] 

67 if elements is None and variances: 

68 # Make sure that variances are non-negative and 

69 # let the user decide otherwise. 

70 elements = st.floats( 

71 min_value=0.0, width=32 if np.dtype(args['dtype']) == np.float32 else 64 

72 ) 

73 

74 return npst.arrays( 

75 args['dtype'], 

76 tuple(args['sizes'].values()), 

77 elements=elements, 

78 fill=args['fill'], 

79 unique=args['unique'], 

80 ) 

81 

82 return st.builds( 

83 partial(creation.array, dims=list(args['sizes'].keys()), unit=args['unit']), 

84 values=make_array(False), 

85 variances=make_array(True) if args['with_variances'] else st.none(), 

86 ) 

87 

88 

89class _ConditionallyWithVariances: 

90 def __init__(self): 

91 self._strategy = st.booleans() 

92 

93 def __call__(self, draw, dtype) -> bool: 

94 if dtype in (DType.float32, DType.float64): 

95 return draw(self._strategy) 

96 return False 

97 

98 

99@st.composite 

100def _concrete_args(draw, args: dict) -> dict: 

101 def _draw(x): 

102 return draw(x) if isinstance(x, st.SearchStrategy) else x 

103 

104 concrete = {key: _draw(val) for key, val in args.items()} 

105 if isinstance(concrete['with_variances'], _ConditionallyWithVariances): 

106 concrete['with_variances'] = concrete['with_variances'](draw, concrete['dtype']) 

107 return concrete 

108 

109 

110def _variable_arg_strategies( 

111 *, 

112 ndim: Union[int, st.SearchStrategy, None] = None, 

113 sizes: Union[Dict[str, int], st.SearchStrategy, None] = None, 

114 unit: Union[str, Unit, st.SearchStrategy, None] = None, 

115 dtype: Union[str, DType, type, st.SearchStrategy, None] = None, 

116 with_variances: Union[bool, st.SearchStrategy, None] = None, 

117 elements: Union[float, st.SearchStrategy, None] = None, 

118 fill: Union[float, st.SearchStrategy, None] = None, 

119 unique: Union[bool, st.SearchStrategy, None] = None, 

120): 

121 if ndim is not None: 

122 if sizes is not None: 

123 raise InvalidArgument( 

124 'Arguments `ndim` and `sizes` cannot both be used. ' 

125 f'Got {ndim=}, {sizes=}.' 

126 ) 

127 if sizes is None: 

128 sizes = sizes_dicts(ndim) 

129 if unit is None: 

130 unit = units() 

131 if dtype is None: 

132 # TODO other dtypes? 

133 dtype = scalar_numeric_dtypes() 

134 if with_variances is None: 

135 with_variances = _ConditionallyWithVariances() 

136 return { 

137 'sizes': sizes, 

138 'unit': unit, 

139 'dtype': dtype, 

140 'with_variances': with_variances, 

141 'elements': elements, 

142 'fill': fill, 

143 'unique': unique, 

144 } 

145 

146 

147# This implementation is designed such that the individual strategies 

148# for default arguments are constructed only once, namely when 

149# `variables` is called. Sampling via `_concrete_args` then reuses 

150# those strategies. 

151# A previous implementation constructed those component strategies inside 

152# an `st.composite` function for every example drawn. This led to high 

153# memory consumption by hypothesis and failed 

154# `hypothesis.HealthCheck.data_too_large`. 

155def variables( 

156 *, 

157 ndim: Union[int, st.SearchStrategy, None] = None, 

158 sizes: Union[Dict[str, int], st.SearchStrategy, None] = None, 

159 unit: Union[str, Unit, st.SearchStrategy, None] = None, 

160 dtype: Union[str, DType, type, st.SearchStrategy, None] = None, 

161 with_variances: Union[bool, st.SearchStrategy, None] = None, 

162 elements: Union[float, st.SearchStrategy, None] = None, 

163 fill: Union[float, st.SearchStrategy, None] = None, 

164 unique: Union[bool, st.SearchStrategy, None] = None, 

165) -> st.SearchStrategy[Variable]: 

166 args = _variable_arg_strategies( 

167 ndim=ndim, 

168 sizes=sizes, 

169 unit=unit, 

170 dtype=dtype, 

171 with_variances=with_variances, 

172 elements=elements, 

173 fill=fill, 

174 unique=unique, 

175 ) 

176 return _concrete_args(args).flatmap(_variables_from_fixed_args) 

177 

178 

179def n_variables( 

180 n: int, 

181 *, 

182 ndim: Union[int, st.SearchStrategy, None] = None, 

183 sizes: Union[Dict[str, int], st.SearchStrategy, None] = None, 

184 unit: Union[str, Unit, st.SearchStrategy, None] = None, 

185 dtype: Union[str, DType, type, st.SearchStrategy, None] = None, 

186 with_variances: Union[bool, st.SearchStrategy, None] = None, 

187 elements: Union[float, st.SearchStrategy, None] = None, 

188 fill: Union[float, st.SearchStrategy, None] = None, 

189 unique: Union[bool, st.SearchStrategy, None] = None, 

190) -> st.SearchStrategy[tuple[Variable]]: 

191 args = _variable_arg_strategies( 

192 ndim=ndim, 

193 sizes=sizes, 

194 unit=unit, 

195 dtype=dtype, 

196 with_variances=with_variances, 

197 elements=elements, 

198 fill=fill, 

199 unique=unique, 

200 ) 

201 return _concrete_args(args).flatmap( 

202 lambda a: st.tuples(*(_variables_from_fixed_args(a) for _ in range(n))) 

203 ) 

204 

205 

206@st.composite 

207def coord_dicts( 

208 draw: Callable[[st.SearchStrategy[Ex]], Ex], 

209 *, 

210 sizes: dict[str, int], 

211 args: Optional[dict[str, Any]] = None, 

212 bin_edges: bool = True, 

213) -> dict[str, Variable]: 

214 args = args or {} 

215 args['sizes'] = sizes 

216 try: 

217 del args['ndim'] 

218 except KeyError: 

219 pass 

220 

221 if bin_edges: 

222 

223 def size_increment(): 

224 return draw(st.integers(min_value=0, max_value=1)) 

225 

226 else: 

227 

228 def size_increment(): 

229 return 0 

230 

231 if not sizes: 

232 return {} 

233 

234 names_and_sizes = draw( 

235 st.lists( 

236 st.sampled_from(list(sizes)) 

237 .map(lambda dim: (dim, sizes[dim] + size_increment())) 

238 .flatmap( 

239 lambda item: (st.just(item[0]) | dims()).map(lambda name: (name, item)) 

240 ), 

241 min_size=0, 

242 max_size=6, 

243 ) 

244 ) 

245 return { 

246 name: draw(variables(**{**args, 'sizes': {dim: size}})) 

247 for name, (dim, size) in names_and_sizes 

248 } 

249 

250 

251@st.composite 

252def dataarrays( 

253 draw: Callable[[st.SearchStrategy[Ex]], Ex], 

254 *, 

255 data_args: Optional[dict[str, Any]] = None, 

256 coords: bool = True, 

257 coord_args: Optional[dict[str, Any]] = None, 

258 masks: bool = True, 

259 mask_args: Optional[dict[str, Any]] = None, 

260 bin_edges: bool = True, 

261) -> DataArray: 

262 """Generate data arrays with coords and masks. 

263 

264 The data variable can be any variable supported by 

265 ``scipp.testing.strategies.variables``. 

266 The coordinates and masks are constrained to be one-dimensional where the 

267 dimension is one of the dims of the data. 

268 The name of a coordinate or mask may be, 

269 but is not required to be, a dimension name. 

270 

271 Parameters 

272 ---------- 

273 draw: 

274 Provided by Hypothesis. 

275 data_args: 

276 Arguments for creating the data variable. 

277 coords: 

278 Selects whether coords are generated. 

279 coord_args: 

280 Arguments for creating the coordinate variable. 

281 masks: 

282 Selects whether masks are generated. 

283 mask_args: 

284 Arguments for creating the mask variable. 

285 bin_edges: 

286 If ``True``, coords may be bin edges. 

287 

288 See Also 

289 -------- 

290 scipp.testing.strategies.variables: 

291 For allowed items in ``*_args`` dicts. 

292 """ 

293 data = draw(variables(**(data_args or {}))) 

294 if coords: 

295 coords_dict = draw( 

296 coord_dicts(sizes=data.sizes, args=coord_args, bin_edges=bin_edges) 

297 ) 

298 else: 

299 coords_dict = {} 

300 

301 if masks: 

302 mask_args = mask_args or {} 

303 mask_args['dtype'] = bool 

304 masks_dict = draw( 

305 coord_dicts(sizes=data.sizes, args=mask_args, bin_edges=False) 

306 ) 

307 else: 

308 masks_dict = {} 

309 

310 return DataArray( 

311 data, 

312 coords=coords_dict, 

313 masks=masks_dict, 

314 ) 

315 

316 

317__all__ = [ 

318 'dims', 

319 'sizes_dicts', 

320 'units', 

321 'integer_dtypes', 

322 'floating_dtypes', 

323 'scalar_numeric_dtypes', 

324 'variables', 

325 'n_variables', 

326 'coord_dicts', 

327 'dataarrays', 

328]