Coverage for install/scipp/testing/strategies.py: 100%

97 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-12-01 01:59 +0000

1# SPDX-License-Identifier: BSD-3-Clause 

2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) 

3# @author Jan-Lukas Wynen 

4""" 

5Search strategies for hypothesis to generate inputs for tests. 

6""" 

7 

8from collections.abc import Callable, Sequence 

9from functools import partial 

10from typing import Any 

11 

12import numpy as np 

13from hypothesis import strategies as st 

14from hypothesis.core import Ex # type: ignore[attr-defined] 

15from hypothesis.errors import InvalidArgument 

16from hypothesis.extra import numpy as npst 

17 

18from ..core import DataArray, DType, Unit, Variable 

19from ..core import variable as creation 

20 

21 

22def dims() -> st.SearchStrategy: 

23 # Allowing all graphic utf-8 characters and control characters 

24 # except NULL, which causes problems in C and C++ code (e.g. HDF5). 

25 return st.text( 

26 st.characters( 

27 whitelist_categories=['L', 'M', 'N', 'P', 'S', 'Zs', 'Cc'], 

28 blacklist_characters='\0', 

29 ), 

30 min_size=0, 

31 max_size=50, 

32 ) 

33 

34 

35def sizes_dicts( 

36 ndim: int | st.SearchStrategy | None = None, 

37) -> st.SearchStrategy: 

38 if isinstance(ndim, st.SearchStrategy): 

39 return ndim.flatmap(lambda n: sizes_dicts(ndim=n)) 

40 keys = dims() 

41 values = st.integers(min_value=1, max_value=10) 

42 if ndim is None: 

43 # The constructor of sc.Variable in Python only supports 

44 # arrays with <= 4 dimensions. 

45 return st.dictionaries(keys=keys, values=values, min_size=0, max_size=4) 

46 return st.dictionaries(keys=keys, values=values, min_size=ndim, max_size=ndim) 

47 

48 

49def units() -> st.SearchStrategy: 

50 return st.sampled_from(('one', 'm', 'kg', 's', 'A', 'K', 'count')) 

51 

52 

53def integer_dtypes(sizes: Sequence[int] = (32, 64)) -> st.SearchStrategy: 

54 return st.sampled_from([f'int{size}' for size in sizes]) 

55 

56 

57def floating_dtypes(sizes: Sequence[int] = (32, 64)) -> st.SearchStrategy: 

58 return st.sampled_from([f'float{size}' for size in sizes]) 

59 

60 

61def scalar_numeric_dtypes() -> st.SearchStrategy: 

62 return st.sampled_from((integer_dtypes, floating_dtypes)).flatmap(lambda f: f()) 

63 

64 

65def _variables_from_fixed_args(args: dict[str, Any]) -> st.SearchStrategy: 

66 def make_array(variances: bool) -> st.SearchStrategy: 

67 elements = args['elements'] 

68 if elements is None and variances: 

69 # Make sure that variances are non-negative and 

70 # let the user decide otherwise. 

71 elements = st.floats( 

72 min_value=0.0, width=32 if np.dtype(args['dtype']) == np.float32 else 64 

73 ) 

74 

75 return npst.arrays( 

76 args['dtype'], 

77 tuple(args['sizes'].values()), 

78 elements=elements, 

79 fill=args['fill'], 

80 unique=args['unique'], 

81 ) 

82 

83 return st.builds( 

84 partial(creation.array, dims=list(args['sizes'].keys()), unit=args['unit']), 

85 values=make_array(False), 

86 variances=make_array(True) if args['with_variances'] else st.none(), 

87 ) 

88 

89 

90class _ConditionallyWithVariances: 

91 def __init__(self) -> None: 

92 self._strategy = st.booleans() 

93 

94 def __call__(self, draw: st.DrawFn, dtype: DType) -> bool: 

95 if dtype in (DType.float32, DType.float64): 

96 return draw(self._strategy) 

97 return False 

98 

99 

100@st.composite 

101def _concrete_args( 

102 draw: st.DrawFn, args: dict[str, st.SearchStrategy | Any] 

103) -> dict[str, Any]: 

104 def _draw(x: st.SearchStrategy[Ex] | Ex) -> Ex: 

105 return draw(x) if isinstance(x, st.SearchStrategy) else x # type:ignore[no-any-return] 

106 

107 concrete = {key: _draw(val) for key, val in args.items()} 

108 if isinstance(concrete['with_variances'], _ConditionallyWithVariances): 

109 concrete['with_variances'] = concrete['with_variances'](draw, concrete['dtype']) 

110 return concrete 

111 

112 

113def _variable_arg_strategies( 

114 *, 

115 ndim: int | st.SearchStrategy | None = None, 

116 sizes: dict[str, int] | st.SearchStrategy | None = None, 

117 unit: str | Unit | st.SearchStrategy | None = None, 

118 dtype: str | DType | type | st.SearchStrategy | None = None, 

119 with_variances: bool 

120 | st.SearchStrategy 

121 | _ConditionallyWithVariances 

122 | None = None, 

123 elements: float | st.SearchStrategy | None = None, 

124 fill: float | st.SearchStrategy | None = None, 

125 unique: bool | st.SearchStrategy | None = None, 

126) -> dict[str, st.SearchStrategy | Any]: 

127 if ndim is not None: 

128 if sizes is not None: 

129 raise InvalidArgument( 

130 'Arguments `ndim` and `sizes` cannot both be used. ' 

131 f'Got {ndim=}, {sizes=}.' 

132 ) 

133 if sizes is None: 

134 sizes = sizes_dicts(ndim) 

135 if unit is None: 

136 unit = units() 

137 if dtype is None: 

138 # TODO other dtypes? 

139 dtype = scalar_numeric_dtypes() 

140 if with_variances is None: 

141 with_variances = _ConditionallyWithVariances() 

142 return { 

143 'sizes': sizes, 

144 'unit': unit, 

145 'dtype': dtype, 

146 'with_variances': with_variances, 

147 'elements': elements, 

148 'fill': fill, 

149 'unique': unique, 

150 } 

151 

152 

153# This implementation is designed such that the individual strategies 

154# for default arguments are constructed only once, namely when 

155# `variables` is called. Sampling via `_concrete_args` then reuses 

156# those strategies. 

157# A previous implementation constructed those component strategies inside 

158# an `st.composite` function for every example drawn. This led to high 

159# memory consumption by hypothesis and failed 

160# `hypothesis.HealthCheck.data_too_large`. 

161def variables( 

162 *, 

163 ndim: int | st.SearchStrategy | None = None, 

164 sizes: dict[str, int] | st.SearchStrategy | None = None, 

165 unit: str | Unit | st.SearchStrategy | None = None, 

166 dtype: str | DType | type | st.SearchStrategy | None = None, 

167 with_variances: bool | st.SearchStrategy | None = None, 

168 elements: float | st.SearchStrategy | None = None, 

169 fill: float | st.SearchStrategy | None = None, 

170 unique: bool | st.SearchStrategy | None = None, 

171) -> st.SearchStrategy[Variable]: 

172 args = _variable_arg_strategies( 

173 ndim=ndim, 

174 sizes=sizes, 

175 unit=unit, 

176 dtype=dtype, 

177 with_variances=with_variances, 

178 elements=elements, 

179 fill=fill, 

180 unique=unique, 

181 ) 

182 return _concrete_args(args).flatmap(_variables_from_fixed_args) 

183 

184 

185def n_variables( 

186 n: int, 

187 *, 

188 ndim: int | st.SearchStrategy | None = None, 

189 sizes: dict[str, int] | st.SearchStrategy | None = None, 

190 unit: str | Unit | st.SearchStrategy | None = None, 

191 dtype: str | DType | type | st.SearchStrategy | None = None, 

192 with_variances: bool | st.SearchStrategy | None = None, 

193 elements: float | st.SearchStrategy | None = None, 

194 fill: float | st.SearchStrategy | None = None, 

195 unique: bool | st.SearchStrategy | None = None, 

196) -> st.SearchStrategy[tuple[Variable]]: 

197 args = _variable_arg_strategies( 

198 ndim=ndim, 

199 sizes=sizes, 

200 unit=unit, 

201 dtype=dtype, 

202 with_variances=with_variances, 

203 elements=elements, 

204 fill=fill, 

205 unique=unique, 

206 ) 

207 return _concrete_args(args).flatmap( 

208 lambda a: st.tuples(*(_variables_from_fixed_args(a) for _ in range(n))) 

209 ) 

210 

211 

212@st.composite 

213def coord_dicts( 

214 draw: Callable[[st.SearchStrategy[Ex]], Ex], 

215 *, 

216 sizes: dict[str, int], 

217 args: dict[str, Any] | None = None, 

218 bin_edges: bool = True, 

219) -> dict[str, Variable]: 

220 args = args or {} 

221 args['sizes'] = sizes 

222 try: 

223 del args['ndim'] 

224 except KeyError: 

225 pass 

226 

227 if bin_edges: 

228 

229 def size_increment() -> int: 

230 return draw(st.integers(min_value=0, max_value=1)) # type:ignore[arg-type, return-value] 

231 

232 else: 

233 

234 def size_increment() -> int: 

235 return 0 

236 

237 if not sizes: 

238 return {} 

239 

240 names_and_sizes: list[tuple[Any, tuple[str, int]]] = draw( # type: ignore[assignment] 

241 st.lists( # type: ignore[arg-type] 

242 st.sampled_from(list(sizes)) 

243 .map(lambda dim: (dim, sizes[dim] + size_increment())) 

244 .flatmap( 

245 lambda item: (st.just(item[0]) | dims()).map(lambda name: (name, item)) 

246 ), 

247 min_size=0, 

248 max_size=6, 

249 ) 

250 ) 

251 return { 

252 name: draw(variables(**{**args, 'sizes': {dim: size}})) # type: ignore[arg-type, misc] 

253 for name, (dim, size) in names_and_sizes 

254 } 

255 

256 

257@st.composite 

258def dataarrays( 

259 draw: Callable[[st.SearchStrategy[Ex]], Ex], 

260 *, 

261 data_args: dict[str, Any] | None = None, 

262 coords: bool = True, 

263 coord_args: dict[str, Any] | None = None, 

264 masks: bool = True, 

265 mask_args: dict[str, Any] | None = None, 

266 bin_edges: bool = True, 

267) -> DataArray: 

268 """Generate data arrays with coords and masks. 

269 

270 The data variable can be any variable supported by 

271 ``scipp.testing.strategies.variables``. 

272 The coordinates and masks are constrained to be one-dimensional where the 

273 dimension is one of the dims of the data. 

274 The name of a coordinate or mask may be, 

275 but is not required to be, a dimension name. 

276 

277 Parameters 

278 ---------- 

279 draw: 

280 Provided by Hypothesis. 

281 data_args: 

282 Arguments for creating the data variable. 

283 coords: 

284 Selects whether coords are generated. 

285 coord_args: 

286 Arguments for creating the coordinate variable. 

287 masks: 

288 Selects whether masks are generated. 

289 mask_args: 

290 Arguments for creating the mask variable. 

291 bin_edges: 

292 If ``True``, coords may be bin edges. 

293 

294 See Also 

295 -------- 

296 scipp.testing.strategies.variables: 

297 For allowed items in ``*_args`` dicts. 

298 """ 

299 data: Variable = draw(variables(**(data_args or {}))) # type: ignore[arg-type, assignment] 

300 

301 coords_dict: dict[str, Variable] = ( 

302 draw( # type: ignore[assignment] 

303 coord_dicts(sizes=data.sizes, args=coord_args, bin_edges=bin_edges) # type: ignore[arg-type] 

304 ) 

305 if coords 

306 else {} 

307 ) 

308 

309 if masks: 

310 mask_args = mask_args or {} 

311 mask_args['dtype'] = bool 

312 masks_dict: dict[str, Variable] = draw( # type: ignore[assignment] 

313 coord_dicts(sizes=data.sizes, args=mask_args, bin_edges=False) # type: ignore[arg-type] 

314 ) 

315 else: 

316 masks_dict = {} 

317 

318 return DataArray( 

319 data, 

320 coords=coords_dict, 

321 masks=masks_dict, 

322 ) 

323 

324 

325__all__ = [ 

326 'dims', 

327 'sizes_dicts', 

328 'units', 

329 'integer_dtypes', 

330 'floating_dtypes', 

331 'scalar_numeric_dtypes', 

332 'variables', 

333 'n_variables', 

334 'coord_dicts', 

335 'dataarrays', 

336]