Coverage for install/scipp/testing/strategies.py: 100%

1# SPDX-License-Identifier: BSD-3-Clause

3# @author Jan-Lukas Wynen

4"""

5Search strategies for hypothesis to generate inputs for tests.

6"""

8from collections.abc import Callable, Sequence

9from functools import partial

10from typing import Any

12import numpy as np

13from hypothesis import strategies as st

14from hypothesis.core import Ex # type: ignore[attr-defined]

15from hypothesis.errors import InvalidArgument

16from hypothesis.extra import numpy as npst

18from ..core import DataArray, DType, Unit, Variable

19from ..core import variable as creation

22def dims() -> st.SearchStrategy:

23 # Allowing all graphic utf-8 characters and control characters

24 # except NULL, which causes problems in C and C++ code (e.g. HDF5).

25 return st.text(

26 st.characters(

27 whitelist_categories=['L', 'M', 'N', 'P', 'S', 'Zs', 'Cc'],

28 blacklist_characters='\0',

29 ),

30 min_size=0,

31 max_size=50,

32 )

35def sizes_dicts(

36 ndim: int | st.SearchStrategy | None = None,

37) -> st.SearchStrategy:

38 if isinstance(ndim, st.SearchStrategy):

39 return ndim.flatmap(lambda n: sizes_dicts(ndim=n))

40 keys = dims()

41 values = st.integers(min_value=1, max_value=10)

42 if ndim is None:

43 # The constructor of sc.Variable in Python only supports

44 # arrays with <= 4 dimensions.

45 return st.dictionaries(keys=keys, values=values, min_size=0, max_size=4)

46 return st.dictionaries(keys=keys, values=values, min_size=ndim, max_size=ndim)

49def units() -> st.SearchStrategy:

50 return st.sampled_from(('one', 'm', 'kg', 's', 'A', 'K', 'count'))

53def integer_dtypes(sizes: Sequence[int] = (32, 64)) -> st.SearchStrategy:

54 return st.sampled_from([f'int{size}' for size in sizes])

57def floating_dtypes(sizes: Sequence[int] = (32, 64)) -> st.SearchStrategy:

58 return st.sampled_from([f'float{size}' for size in sizes])

61def scalar_numeric_dtypes() -> st.SearchStrategy:

62 return st.sampled_from((integer_dtypes, floating_dtypes)).flatmap(lambda f: f())

65def _variables_from_fixed_args(args: dict[str, Any]) -> st.SearchStrategy:

66 def make_array(variances: bool) -> st.SearchStrategy:

67 elements = args['elements']

68 if elements is None and variances:

69 # Make sure that variances are non-negative and

70 # let the user decide otherwise.

71 elements = st.floats(

72 min_value=0.0, width=32 if np.dtype(args['dtype']) == np.float32 else 64

73 )

75 return npst.arrays(

76 args['dtype'],

77 tuple(args['sizes'].values()),

78 elements=elements,

79 fill=args['fill'],

80 unique=args['unique'],

81 )

83 return st.builds(

84 partial(creation.array, dims=list(args['sizes'].keys()), unit=args['unit']),

85 values=make_array(False),

86 variances=make_array(True) if args['with_variances'] else st.none(),

87 )

90class _ConditionallyWithVariances:

91 def __init__(self) -> None:

92 self._strategy = st.booleans()

94 def __call__(self, draw: st.DrawFn, dtype: DType) -> bool:

95 if dtype in (DType.float32, DType.float64):

96 return draw(self._strategy)

97 return False

100@st.composite

101def _concrete_args(

102 draw: st.DrawFn, args: dict[str, st.SearchStrategy | Any]

103) -> dict[str, Any]:

104 def _draw(x: st.SearchStrategy[Ex] | Ex) -> Ex:

105 return draw(x) if isinstance(x, st.SearchStrategy) else x # type:ignore[no-any-return]

106

107 concrete = {key: _draw(val) for key, val in args.items()}

108 if isinstance(concrete['with_variances'], _ConditionallyWithVariances):

109 concrete['with_variances'] = concrete['with_variances'](draw, concrete['dtype'])

110 return concrete

111

112

113def _variable_arg_strategies(

114 *,

115 ndim: int | st.SearchStrategy | None = None,

116 sizes: dict[str, int] | st.SearchStrategy | None = None,

117 unit: str | Unit | st.SearchStrategy | None = None,

118 dtype: str | DType | type | st.SearchStrategy | None = None,

119 with_variances: bool

120 | st.SearchStrategy

121 | _ConditionallyWithVariances

122 | None = None,

123 elements: float | st.SearchStrategy | None = None,

124 fill: float | st.SearchStrategy | None = None,

125 unique: bool | st.SearchStrategy | None = None,

126) -> dict[str, st.SearchStrategy | Any]:

127 if ndim is not None:

128 if sizes is not None:

129 raise InvalidArgument(

130 'Arguments `ndim` and `sizes` cannot both be used. '

131 f'Got {ndim=}, {sizes=}.'

132 )

133 if sizes is None:

134 sizes = sizes_dicts(ndim)

135 if unit is None:

136 unit = units()

137 if dtype is None:

138 # TODO other dtypes?

139 dtype = scalar_numeric_dtypes()

140 if with_variances is None:

141 with_variances = _ConditionallyWithVariances()

142 return {

143 'sizes': sizes,

144 'unit': unit,

145 'dtype': dtype,

146 'with_variances': with_variances,

147 'elements': elements,

148 'fill': fill,

149 'unique': unique,

150 }

151

152

153# This implementation is designed such that the individual strategies

154# for default arguments are constructed only once, namely when

155# `variables` is called. Sampling via `_concrete_args` then reuses

156# those strategies.

157# A previous implementation constructed those component strategies inside

158# an `st.composite` function for every example drawn. This led to high

159# memory consumption by hypothesis and failed

160# `hypothesis.HealthCheck.data_too_large`.

161def variables(

162 *,

163 ndim: int | st.SearchStrategy | None = None,

164 sizes: dict[str, int] | st.SearchStrategy | None = None,

165 unit: str | Unit | st.SearchStrategy | None = None,

166 dtype: str | DType | type | st.SearchStrategy | None = None,

167 with_variances: bool | st.SearchStrategy | None = None,

168 elements: float | st.SearchStrategy | None = None,

169 fill: float | st.SearchStrategy | None = None,

170 unique: bool | st.SearchStrategy | None = None,

171) -> st.SearchStrategy[Variable]:

172 args = _variable_arg_strategies(

173 ndim=ndim,

174 sizes=sizes,

175 unit=unit,

176 dtype=dtype,

177 with_variances=with_variances,

178 elements=elements,

179 fill=fill,

180 unique=unique,

181 )

182 return _concrete_args(args).flatmap(_variables_from_fixed_args)

183

184

185def n_variables(

186 n: int,

187 *,

188 ndim: int | st.SearchStrategy | None = None,

189 sizes: dict[str, int] | st.SearchStrategy | None = None,

190 unit: str | Unit | st.SearchStrategy | None = None,

191 dtype: str | DType | type | st.SearchStrategy | None = None,

192 with_variances: bool | st.SearchStrategy | None = None,

193 elements: float | st.SearchStrategy | None = None,

194 fill: float | st.SearchStrategy | None = None,

195 unique: bool | st.SearchStrategy | None = None,

196) -> st.SearchStrategy[tuple[Variable]]:

197 args = _variable_arg_strategies(

198 ndim=ndim,

199 sizes=sizes,

200 unit=unit,

201 dtype=dtype,

202 with_variances=with_variances,

203 elements=elements,

204 fill=fill,

205 unique=unique,

206 )

207 return _concrete_args(args).flatmap(

208 lambda a: st.tuples(*(_variables_from_fixed_args(a) for _ in range(n)))

209 )

210

211

212@st.composite

213def coord_dicts(

214 draw: Callable[[st.SearchStrategy[Ex]], Ex],

215 *,

216 sizes: dict[str, int],

217 args: dict[str, Any] | None = None,

218 bin_edges: bool = True,

219) -> dict[str, Variable]:

220 args = args or {}

221 args['sizes'] = sizes

222 try:

223 del args['ndim']

224 except KeyError:

225 pass

226

227 if bin_edges:

228

229 def size_increment() -> int:

230 return draw(st.integers(min_value=0, max_value=1)) # type:ignore[arg-type, return-value]

231

232 else:

233

234 def size_increment() -> int:

235 return 0

236

237 if not sizes:

238 return {}

239

240 names_and_sizes: list[tuple[Any, tuple[str, int]]] = draw( # type: ignore[assignment]

241 st.lists( # type: ignore[arg-type]

242 st.sampled_from(list(sizes))

243 .map(lambda dim: (dim, sizes[dim] + size_increment()))

244 .flatmap(

245 lambda item: (st.just(item[0]) | dims()).map(lambda name: (name, item))

246 ),

247 min_size=0,

248 max_size=6,

249 )

250 )

251 return {

252 name: draw(variables(**{**args, 'sizes': {dim: size}})) # type: ignore[arg-type, misc]

253 for name, (dim, size) in names_and_sizes

254 }

255

256

257@st.composite

258def dataarrays(

259 draw: Callable[[st.SearchStrategy[Ex]], Ex],

260 *,

261 data_args: dict[str, Any] | None = None,

262 coords: bool = True,

263 coord_args: dict[str, Any] | None = None,

264 masks: bool = True,

265 mask_args: dict[str, Any] | None = None,

266 bin_edges: bool = True,

267) -> DataArray:

268 """Generate data arrays with coords and masks.

269

270 The data variable can be any variable supported by

271 ``scipp.testing.strategies.variables``.

272 The coordinates and masks are constrained to be one-dimensional where the

273 dimension is one of the dims of the data.

274 The name of a coordinate or mask may be,

275 but is not required to be, a dimension name.

276

277 Parameters

278 ----------

279 draw:

280 Provided by Hypothesis.

281 data_args:

282 Arguments for creating the data variable.

283 coords:

284 Selects whether coords are generated.

285 coord_args:

286 Arguments for creating the coordinate variable.

287 masks:

288 Selects whether masks are generated.

289 mask_args:

290 Arguments for creating the mask variable.

291 bin_edges:

292 If ``True``, coords may be bin edges.

293

294 See Also

295 --------

296 scipp.testing.strategies.variables:

297 For allowed items in ``*_args`` dicts.

298 """

299 data: Variable = draw(variables(**(data_args or {}))) # type: ignore[arg-type, assignment]

300

301 coords_dict: dict[str, Variable] = (

302 draw( # type: ignore[assignment]

303 coord_dicts(sizes=data.sizes, args=coord_args, bin_edges=bin_edges) # type: ignore[arg-type]

304 )

305 if coords

306 else {}

307 )

308

309 if masks:

310 mask_args = mask_args or {}

311 mask_args['dtype'] = bool

312 masks_dict: dict[str, Variable] = draw( # type: ignore[assignment]

313 coord_dicts(sizes=data.sizes, args=mask_args, bin_edges=False) # type: ignore[arg-type]

314 )

315 else:

316 masks_dict = {}

317

318 return DataArray(

319 data,

320 coords=coords_dict,

321 masks=masks_dict,

322 )

323

324

325__all__ = [

326 'dims',

327 'sizes_dicts',

328 'units',

329 'integer_dtypes',

330 'floating_dtypes',

331 'scalar_numeric_dtypes',

332 'variables',

333 'n_variables',

334 'coord_dicts',

335 'dataarrays',

336]