Coverage for install/scipp/testing/strategies.py: 100%
97 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-12-01 01:59 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-12-01 01:59 +0000
1# SPDX-License-Identifier: BSD-3-Clause
2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3# @author Jan-Lukas Wynen
4"""
5Search strategies for hypothesis to generate inputs for tests.
6"""
8from collections.abc import Callable, Sequence
9from functools import partial
10from typing import Any
12import numpy as np
13from hypothesis import strategies as st
14from hypothesis.core import Ex # type: ignore[attr-defined]
15from hypothesis.errors import InvalidArgument
16from hypothesis.extra import numpy as npst
18from ..core import DataArray, DType, Unit, Variable
19from ..core import variable as creation
22def dims() -> st.SearchStrategy:
23 # Allowing all graphic utf-8 characters and control characters
24 # except NULL, which causes problems in C and C++ code (e.g. HDF5).
25 return st.text(
26 st.characters(
27 whitelist_categories=['L', 'M', 'N', 'P', 'S', 'Zs', 'Cc'],
28 blacklist_characters='\0',
29 ),
30 min_size=0,
31 max_size=50,
32 )
35def sizes_dicts(
36 ndim: int | st.SearchStrategy | None = None,
37) -> st.SearchStrategy:
38 if isinstance(ndim, st.SearchStrategy):
39 return ndim.flatmap(lambda n: sizes_dicts(ndim=n))
40 keys = dims()
41 values = st.integers(min_value=1, max_value=10)
42 if ndim is None:
43 # The constructor of sc.Variable in Python only supports
44 # arrays with <= 4 dimensions.
45 return st.dictionaries(keys=keys, values=values, min_size=0, max_size=4)
46 return st.dictionaries(keys=keys, values=values, min_size=ndim, max_size=ndim)
49def units() -> st.SearchStrategy:
50 return st.sampled_from(('one', 'm', 'kg', 's', 'A', 'K', 'count'))
53def integer_dtypes(sizes: Sequence[int] = (32, 64)) -> st.SearchStrategy:
54 return st.sampled_from([f'int{size}' for size in sizes])
57def floating_dtypes(sizes: Sequence[int] = (32, 64)) -> st.SearchStrategy:
58 return st.sampled_from([f'float{size}' for size in sizes])
61def scalar_numeric_dtypes() -> st.SearchStrategy:
62 return st.sampled_from((integer_dtypes, floating_dtypes)).flatmap(lambda f: f())
65def _variables_from_fixed_args(args: dict[str, Any]) -> st.SearchStrategy:
66 def make_array(variances: bool) -> st.SearchStrategy:
67 elements = args['elements']
68 if elements is None and variances:
69 # Make sure that variances are non-negative and
70 # let the user decide otherwise.
71 elements = st.floats(
72 min_value=0.0, width=32 if np.dtype(args['dtype']) == np.float32 else 64
73 )
75 return npst.arrays(
76 args['dtype'],
77 tuple(args['sizes'].values()),
78 elements=elements,
79 fill=args['fill'],
80 unique=args['unique'],
81 )
83 return st.builds(
84 partial(creation.array, dims=list(args['sizes'].keys()), unit=args['unit']),
85 values=make_array(False),
86 variances=make_array(True) if args['with_variances'] else st.none(),
87 )
90class _ConditionallyWithVariances:
91 def __init__(self) -> None:
92 self._strategy = st.booleans()
94 def __call__(self, draw: st.DrawFn, dtype: DType) -> bool:
95 if dtype in (DType.float32, DType.float64):
96 return draw(self._strategy)
97 return False
100@st.composite
101def _concrete_args(
102 draw: st.DrawFn, args: dict[str, st.SearchStrategy | Any]
103) -> dict[str, Any]:
104 def _draw(x: st.SearchStrategy[Ex] | Ex) -> Ex:
105 return draw(x) if isinstance(x, st.SearchStrategy) else x # type:ignore[no-any-return]
107 concrete = {key: _draw(val) for key, val in args.items()}
108 if isinstance(concrete['with_variances'], _ConditionallyWithVariances):
109 concrete['with_variances'] = concrete['with_variances'](draw, concrete['dtype'])
110 return concrete
113def _variable_arg_strategies(
114 *,
115 ndim: int | st.SearchStrategy | None = None,
116 sizes: dict[str, int] | st.SearchStrategy | None = None,
117 unit: str | Unit | st.SearchStrategy | None = None,
118 dtype: str | DType | type | st.SearchStrategy | None = None,
119 with_variances: bool
120 | st.SearchStrategy
121 | _ConditionallyWithVariances
122 | None = None,
123 elements: float | st.SearchStrategy | None = None,
124 fill: float | st.SearchStrategy | None = None,
125 unique: bool | st.SearchStrategy | None = None,
126) -> dict[str, st.SearchStrategy | Any]:
127 if ndim is not None:
128 if sizes is not None:
129 raise InvalidArgument(
130 'Arguments `ndim` and `sizes` cannot both be used. '
131 f'Got {ndim=}, {sizes=}.'
132 )
133 if sizes is None:
134 sizes = sizes_dicts(ndim)
135 if unit is None:
136 unit = units()
137 if dtype is None:
138 # TODO other dtypes?
139 dtype = scalar_numeric_dtypes()
140 if with_variances is None:
141 with_variances = _ConditionallyWithVariances()
142 return {
143 'sizes': sizes,
144 'unit': unit,
145 'dtype': dtype,
146 'with_variances': with_variances,
147 'elements': elements,
148 'fill': fill,
149 'unique': unique,
150 }
153# This implementation is designed such that the individual strategies
154# for default arguments are constructed only once, namely when
155# `variables` is called. Sampling via `_concrete_args` then reuses
156# those strategies.
157# A previous implementation constructed those component strategies inside
158# an `st.composite` function for every example drawn. This led to high
159# memory consumption by hypothesis and failed
160# `hypothesis.HealthCheck.data_too_large`.
161def variables(
162 *,
163 ndim: int | st.SearchStrategy | None = None,
164 sizes: dict[str, int] | st.SearchStrategy | None = None,
165 unit: str | Unit | st.SearchStrategy | None = None,
166 dtype: str | DType | type | st.SearchStrategy | None = None,
167 with_variances: bool | st.SearchStrategy | None = None,
168 elements: float | st.SearchStrategy | None = None,
169 fill: float | st.SearchStrategy | None = None,
170 unique: bool | st.SearchStrategy | None = None,
171) -> st.SearchStrategy[Variable]:
172 args = _variable_arg_strategies(
173 ndim=ndim,
174 sizes=sizes,
175 unit=unit,
176 dtype=dtype,
177 with_variances=with_variances,
178 elements=elements,
179 fill=fill,
180 unique=unique,
181 )
182 return _concrete_args(args).flatmap(_variables_from_fixed_args)
185def n_variables(
186 n: int,
187 *,
188 ndim: int | st.SearchStrategy | None = None,
189 sizes: dict[str, int] | st.SearchStrategy | None = None,
190 unit: str | Unit | st.SearchStrategy | None = None,
191 dtype: str | DType | type | st.SearchStrategy | None = None,
192 with_variances: bool | st.SearchStrategy | None = None,
193 elements: float | st.SearchStrategy | None = None,
194 fill: float | st.SearchStrategy | None = None,
195 unique: bool | st.SearchStrategy | None = None,
196) -> st.SearchStrategy[tuple[Variable]]:
197 args = _variable_arg_strategies(
198 ndim=ndim,
199 sizes=sizes,
200 unit=unit,
201 dtype=dtype,
202 with_variances=with_variances,
203 elements=elements,
204 fill=fill,
205 unique=unique,
206 )
207 return _concrete_args(args).flatmap(
208 lambda a: st.tuples(*(_variables_from_fixed_args(a) for _ in range(n)))
209 )
212@st.composite
213def coord_dicts(
214 draw: Callable[[st.SearchStrategy[Ex]], Ex],
215 *,
216 sizes: dict[str, int],
217 args: dict[str, Any] | None = None,
218 bin_edges: bool = True,
219) -> dict[str, Variable]:
220 args = args or {}
221 args['sizes'] = sizes
222 try:
223 del args['ndim']
224 except KeyError:
225 pass
227 if bin_edges:
229 def size_increment() -> int:
230 return draw(st.integers(min_value=0, max_value=1)) # type:ignore[arg-type, return-value]
232 else:
234 def size_increment() -> int:
235 return 0
237 if not sizes:
238 return {}
240 names_and_sizes: list[tuple[Any, tuple[str, int]]] = draw( # type: ignore[assignment]
241 st.lists( # type: ignore[arg-type]
242 st.sampled_from(list(sizes))
243 .map(lambda dim: (dim, sizes[dim] + size_increment()))
244 .flatmap(
245 lambda item: (st.just(item[0]) | dims()).map(lambda name: (name, item))
246 ),
247 min_size=0,
248 max_size=6,
249 )
250 )
251 return {
252 name: draw(variables(**{**args, 'sizes': {dim: size}})) # type: ignore[arg-type, misc]
253 for name, (dim, size) in names_and_sizes
254 }
257@st.composite
258def dataarrays(
259 draw: Callable[[st.SearchStrategy[Ex]], Ex],
260 *,
261 data_args: dict[str, Any] | None = None,
262 coords: bool = True,
263 coord_args: dict[str, Any] | None = None,
264 masks: bool = True,
265 mask_args: dict[str, Any] | None = None,
266 bin_edges: bool = True,
267) -> DataArray:
268 """Generate data arrays with coords and masks.
270 The data variable can be any variable supported by
271 ``scipp.testing.strategies.variables``.
272 The coordinates and masks are constrained to be one-dimensional where the
273 dimension is one of the dims of the data.
274 The name of a coordinate or mask may be,
275 but is not required to be, a dimension name.
277 Parameters
278 ----------
279 draw:
280 Provided by Hypothesis.
281 data_args:
282 Arguments for creating the data variable.
283 coords:
284 Selects whether coords are generated.
285 coord_args:
286 Arguments for creating the coordinate variable.
287 masks:
288 Selects whether masks are generated.
289 mask_args:
290 Arguments for creating the mask variable.
291 bin_edges:
292 If ``True``, coords may be bin edges.
294 See Also
295 --------
296 scipp.testing.strategies.variables:
297 For allowed items in ``*_args`` dicts.
298 """
299 data: Variable = draw(variables(**(data_args or {}))) # type: ignore[arg-type, assignment]
301 coords_dict: dict[str, Variable] = (
302 draw( # type: ignore[assignment]
303 coord_dicts(sizes=data.sizes, args=coord_args, bin_edges=bin_edges) # type: ignore[arg-type]
304 )
305 if coords
306 else {}
307 )
309 if masks:
310 mask_args = mask_args or {}
311 mask_args['dtype'] = bool
312 masks_dict: dict[str, Variable] = draw( # type: ignore[assignment]
313 coord_dicts(sizes=data.sizes, args=mask_args, bin_edges=False) # type: ignore[arg-type]
314 )
315 else:
316 masks_dict = {}
318 return DataArray(
319 data,
320 coords=coords_dict,
321 masks=masks_dict,
322 )
325__all__ = [
326 'dims',
327 'sizes_dicts',
328 'units',
329 'integer_dtypes',
330 'floating_dtypes',
331 'scalar_numeric_dtypes',
332 'variables',
333 'n_variables',
334 'coord_dicts',
335 'dataarrays',
336]