Coverage for install/scipp/scipy/optimize/__init__.py: 98%
82 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-17 01:51 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-11-17 01:51 +0000
1# SPDX-License-Identifier: BSD-3-Clause
2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3# @author Simon Heybrock
4"""Sub-package for optimization such as curve fitting.
6This subpackage provides wrappers for a subset of functions from
7:py:mod:`scipy.optimize`.
8"""
10from collections.abc import Callable, Iterable
11from inspect import getfullargspec
12from numbers import Real
13from typing import Any
15import numpy as np
16import numpy.typing as npt
18from ...core import (
19 BinEdgeError,
20 DataArray,
21 DefaultUnit,
22 Unit,
23 Variable,
24 scalar,
25 stddevs,
26)
27from ...typing import VariableLike
28from ...units import default_unit, dimensionless
29from ..interpolate import _drop_masked
32def _as_scalar(obj: Any, unit: Unit | DefaultUnit | None) -> Any:
33 if unit == default_unit:
34 return obj
35 return scalar(
36 value=obj,
37 unit=unit,
38 )
41def _wrap_func(
42 f: Callable[..., Variable | DataArray],
43 p_names: Iterable[str],
44 p_units: Iterable[Unit | DefaultUnit | None],
45) -> Callable[..., npt.NDArray[Any]]:
46 def func(x: VariableLike, *args: Any) -> npt.NDArray[Any]:
47 p = {
48 k: _as_scalar(v, u) for k, v, u in zip(p_names, args, p_units, strict=True)
49 }
50 return f(x, **p).values # type: ignore[no-any-return]
52 return func
55def _get_sigma(
56 da: Variable | DataArray,
57) -> npt.NDArray[np.float64 | np.float32] | None:
58 if da.variances is None:
59 return None
61 sigma = stddevs(da).values
62 if not sigma.all():
63 raise ValueError(
64 'There is a 0 in the input variances. This would break the optimizer. '
65 'Mask the offending elements, remove them, or assign a meaningful '
66 'variance if possible before calling curve_fit.'
67 )
68 return sigma # type: ignore[no-any-return]
71def _covariance_with_units(
72 p_names: list[str],
73 pcov_values: npt.NDArray[np.float64 | np.float32],
74 units: list[Unit | DefaultUnit],
75) -> dict[str, dict[str, Variable | Real]]:
76 pcov: dict[str, dict[str, Variable | Real]] = {}
77 for i, row in enumerate(pcov_values):
78 pcov[p_names[i]] = {}
79 for j, elem in enumerate(row):
80 ui = units[i]
81 uj = units[j]
82 u = ui
83 if u == default_unit:
84 u = uj
85 elif uj != default_unit:
86 u = ui * uj # type: ignore[operator] # neither operand is DefaultUnit
87 pcov[p_names[i]][p_names[j]] = _as_scalar(elem, u)
88 return pcov
91def _make_defaults(
92 f: Callable[..., Any], p0: dict[str, Variable | float] | None
93) -> dict[str, Any]:
94 spec = getfullargspec(f)
95 if len(spec.args) != 1 or spec.varargs is not None:
96 raise ValueError("Fit function must take exactly one positional argument")
97 defaults = {} if spec.kwonlydefaults is None else spec.kwonlydefaults
98 kwargs: dict[str, Any] = {
99 arg: 1.0 for arg in spec.kwonlyargs if arg not in defaults
100 }
101 if p0 is not None:
102 kwargs.update(p0)
103 return kwargs
106def _get_specific_bounds(
107 bounds: dict[str, tuple[Variable, Variable] | tuple[float, float]],
108 name: str,
109 unit: Unit | None,
110) -> tuple[float, float]:
111 if name not in bounds:
112 return -np.inf, np.inf
113 b = bounds[name]
114 if len(b) != 2:
115 raise ValueError(
116 "Parameter bounds must be given as a tuple of length 2. "
117 f"Got a collection of length {len(b)} as bounds for '{name}'."
118 )
119 if isinstance(b[0], Variable):
120 return (
121 b[0].to(unit=unit, dtype=float).value,
122 b[1].to(unit=unit, dtype=float).value,
123 )
124 return b
127def _parse_bounds(
128 bounds: dict[str, tuple[Variable, Variable] | tuple[float, float]] | None,
129 params: dict[str, Any],
130) -> (
131 tuple[float, float]
132 | tuple[npt.NDArray[np.float64 | np.float32], npt.NDArray[np.float64 | np.float32]]
133):
134 if bounds is None:
135 return -np.inf, np.inf
137 bounds_tuples = [
138 _get_specific_bounds(
139 bounds, name, param.unit if isinstance(param, Variable) else dimensionless
140 )
141 for name, param in params.items()
142 ]
143 bounds_array = np.array(bounds_tuples).T
144 return bounds_array[0], bounds_array[1]
147def curve_fit(
148 f: Callable[..., Variable | DataArray],
149 da: DataArray,
150 *,
151 p0: dict[str, Variable | float] | None = None,
152 bounds: dict[str, tuple[Variable, Variable] | tuple[float, float]] | None = None,
153 **kwargs: Any,
154) -> tuple[dict[str, Variable | float], dict[str, dict[str, Variable | float]]]:
155 """Use non-linear least squares to fit a function, f, to data.
157 This is a wrapper around :py:func:`scipy.optimize.curve_fit`. See there for a
158 complete description of parameters. The differences are:
160 - Instead of separate xdata, ydata, and sigma arguments, the input data array
161 defines provides these, with sigma defined as the square root of the variances,
162 if present, i.e., the standard deviations.
163 - The fit function f must work with scipp objects. This provides additional safety
164 over the underlying scipy function by ensuring units are consistent.
165 - The fit function f must only take a single positional argument, x. All other
166 arguments mapping to fit parameters must be keyword-only arguments.
167 - The initial guess in p0 must be provided as a dict, mapping from fit-function
168 parameter names to initial guesses.
169 - The parameter bounds must also be provided as a dict, like p0.
170 - The fit parameters may be scalar scipp variables. In that case an initial guess
171 p0 with the correct units must be provided.
172 - The returned optimal parameter values popt and the coverance matrix pcov will
173 have units provided that the initial parameters have units. popt and pcov are
174 a dict and a dict of dict, respectively. They are indexed using the fit parameter
175 names. The variance of the returned optimal parameter values is set to the
176 corresponding diagonal value of the covariance matrix.
178 Parameters
179 ----------
180 f:
181 The model function, f(x, ...). It must take the independent variable
182 (coordinate of the data array da) as the first argument and the parameters
183 to fit as keyword arguments.
184 da:
185 One-dimensional data array. The dimension coordinate for the only
186 dimension defines the independent variable where the data is measured. The
187 values of the data array provide the dependent data. If the data array stores
188 variances then the standard deviations (square root of the variances) are taken
189 into account when fitting.
190 p0:
191 An optional dict of optional initial guesses for the parameters. If None,
192 then the initial values will all be 1 (if the parameter names for the function
193 can be determined using introspection, otherwise a ValueError is raised). If
194 the fit function cannot handle initial values of 1, in particular for parameters
195 that are not dimensionless, then typically a :py:class:`scipp.UnitError` is
196 raised, but details will depend on the function.
197 bounds:
198 Lower and upper bounds on parameters.
199 Defaults to no bounds.
200 Bounds are given as a dict of 2-tuples of (lower, upper) for each parameter
201 where lower and upper are either both Variables or plain numbers.
202 Parameters omitted from the `bounds` dict are unbounded.
204 Returns
205 -------
206 popt:
207 Optimal values for the parameters.
208 pcov:
209 The estimated covariance of popt.
211 See Also
212 --------
213 scipp.curve_fit:
214 Supports N-D curve fits.
216 Examples
217 --------
219 >>> def func(x, *, a, b):
220 ... return a * sc.exp(-b * x)
222 >>> x = sc.linspace(dim='x', start=0.0, stop=0.4, num=50, unit='m')
223 >>> y = func(x, a=5, b=17/sc.Unit('m'))
224 >>> y.values += 0.01 * np.random.default_rng().normal(size=50)
225 >>> da = sc.DataArray(y, coords={'x': x})
227 >>> from scipp.scipy.optimize import curve_fit
228 >>> popt, _ = curve_fit(func, da, p0 = {'b': 1.0 / sc.Unit('m')})
229 >>> sc.round(sc.values(popt['a']))
230 <scipp.Variable> () float64 [dimensionless] 5
231 >>> sc.round(sc.values(popt['b']))
232 <scipp.Variable> () float64 [1/m] 17
234 Fit-function parameters that have a default value do not participate in the fit
235 unless an initial guess is provided via the p0 parameters:
237 >>> from functools import partial
238 >>> func2 = partial(func, a=5)
239 >>> popt, _ = curve_fit(func2, da, p0 = {'b': 1.0 / sc.Unit('m')})
240 >>> 'a' in popt
241 False
242 >>> popt, _ = curve_fit(func2, da, p0 = {'a':2, 'b': 1.0 / sc.Unit('m')})
243 >>> 'a' in popt
244 True
245 """
246 if 'jac' in kwargs:
247 raise NotImplementedError(
248 "The 'jac' argument is not yet supported. "
249 "See https://github.com/scipp/scipp/issues/2544"
250 )
251 for arg in ['xdata', 'ydata', 'sigma']:
252 if arg in kwargs:
253 raise TypeError(
254 f"Invalid argument '{arg}', already defined by the input data array."
255 )
256 if da.sizes[da.dim] != da.coords[da.dim].sizes[da.dim]:
257 raise BinEdgeError("Cannot fit data array with bin-edge coordinate.")
258 import scipy.optimize as opt
260 da = _drop_masked(da, da.dim)
261 params = _make_defaults(f, p0)
262 p_units = [
263 p.unit if isinstance(p, Variable) else default_unit for p in params.values()
264 ]
265 p0_values = [p.value if isinstance(p, Variable) else p for p in params.values()]
266 popt, pcov = opt.curve_fit(
267 f=_wrap_func(f, params.keys(), p_units),
268 xdata=da.coords[da.dim],
269 ydata=da.values,
270 sigma=_get_sigma(da),
271 p0=p0_values,
272 bounds=_parse_bounds(bounds, params),
273 **kwargs,
274 )
275 popt = {
276 name: scalar(value=val, variance=var, unit=u)
277 for name, val, var, u in zip(
278 params.keys(), popt, np.diag(pcov), p_units, strict=True
279 )
280 }
281 pcov = _covariance_with_units(list(params.keys()), pcov, p_units)
282 return popt, pcov
285__all__ = ['curve_fit']