Coverage for install/scipp/scipy/optimize/__init__.py: 98%
80 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-04-28 01:28 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-04-28 01:28 +0000
1# SPDX-License-Identifier: BSD-3-Clause
2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp)
3# @author Simon Heybrock
4"""Sub-package for optimization such as curve fitting.
6This subpackage provides wrappers for a subset of functions from
7:py:mod:`scipy.optimize`.
8"""
10from inspect import getfullargspec
11from numbers import Real
12from typing import Callable, Dict, Optional, Tuple, Union
14import numpy as np
16from ...core import BinEdgeError, DataArray, Variable, scalar, stddevs
17from ...units import default_unit, dimensionless
18from ..interpolate import _drop_masked
21def _as_scalar(obj, unit):
22 if unit == default_unit:
23 return obj
24 return scalar(value=obj, unit=unit)
27def _wrap_func(f, p_names, p_units):
28 def func(x, *args):
29 p = {k: _as_scalar(v, u) for k, v, u in zip(p_names, args, p_units)}
30 return f(x, **p).values
32 return func
35def _get_sigma(da):
36 if da.variances is None:
37 return None
39 sigma = stddevs(da).values
40 if not sigma.all():
41 raise ValueError(
42 'There is a 0 in the input variances. This would break the optimizer. '
43 'Mask the offending elements, remove them, or assign a meaningful '
44 'variance if possible before calling curve_fit.'
45 )
46 return sigma
49def _covariance_with_units(p_names, pcov_values, units):
50 pcov = {}
51 for i, row in enumerate(pcov_values):
52 pcov[p_names[i]] = {}
53 for j, elem in enumerate(row):
54 ui = units[i]
55 uj = units[j]
56 u = ui
57 if u == default_unit:
58 u = uj
59 elif uj != default_unit:
60 u = ui * uj
61 pcov[p_names[i]][p_names[j]] = _as_scalar(elem, u)
62 return pcov
65def _make_defaults(f, p0):
66 spec = getfullargspec(f)
67 if len(spec.args) != 1 or spec.varargs is not None:
68 raise ValueError("Fit function must take exactly one positional argument")
69 defaults = {} if spec.kwonlydefaults is None else spec.kwonlydefaults
70 kwargs = {arg: 1.0 for arg in spec.kwonlyargs if arg not in defaults}
71 if p0 is not None:
72 kwargs.update(p0)
73 return kwargs
76def _get_specific_bounds(bounds, name, unit) -> Tuple[float, float]:
77 if name not in bounds:
78 return -np.inf, np.inf
79 b = bounds[name]
80 if len(b) != 2:
81 raise ValueError(
82 "Parameter bounds must be given as a tuple of length 2. "
83 f"Got a collection of length {len(b)} as bounds for '{name}'."
84 )
85 if isinstance(b[0], Variable):
86 return (
87 b[0].to(unit=unit, dtype=float).value,
88 b[1].to(unit=unit, dtype=float).value,
89 )
90 return b
93def _parse_bounds(
94 bounds, params
95) -> Union[Tuple[float, float], Tuple[np.ndarray, np.ndarray]]:
96 if bounds is None:
97 return -np.inf, np.inf
99 bounds_tuples = [
100 _get_specific_bounds(
101 bounds, name, param.unit if isinstance(param, Variable) else dimensionless
102 )
103 for name, param in params.items()
104 ]
105 bounds_array = np.array(bounds_tuples).T
106 return bounds_array[0], bounds_array[1]
109def curve_fit(
110 f: Callable,
111 da: DataArray,
112 *,
113 p0: Optional[Dict[str, Union[Variable, Real]]] = None,
114 bounds: Optional[
115 Dict[str, Union[Tuple[Variable, Variable], Tuple[Real, Real]]]
116 ] = None,
117 **kwargs,
118) -> Tuple[
119 Dict[str, Union[Variable, Real]], Dict[str, Dict[str, Union[Variable, Real]]]
120]:
121 """Use non-linear least squares to fit a function, f, to data.
123 This is a wrapper around :py:func:`scipy.optimize.curve_fit`. See there for a
124 complete description of parameters. The differences are:
126 - Instead of separate xdata, ydata, and sigma arguments, the input data array
127 defines provides these, with sigma defined as the square root of the variances,
128 if present, i.e., the standard deviations.
129 - The fit function f must work with scipp objects. This provides additional safety
130 over the underlying scipy function by ensuring units are consistent.
131 - The fit function f must only take a single positional argument, x. All other
132 arguments mapping to fit parameters must be keyword-only arguments.
133 - The initial guess in p0 must be provided as a dict, mapping from fit-function
134 parameter names to initial guesses.
135 - The parameter bounds must also be provided as a dict, like p0.
136 - The fit parameters may be scalar scipp variables. In that case an initial guess
137 p0 with the correct units must be provided.
138 - The returned optimal parameter values popt and the coverance matrix pcov will
139 have units provided that the initial parameters have units. popt and pcov are
140 a dict and a dict of dict, respectively. They are indexed using the fit parameter
141 names. The variance of the returned optimal parameter values is set to the
142 corresponding diagonal value of the covariance matrix.
144 Parameters
145 ----------
146 f:
147 The model function, f(x, ...). It must take the independent variable
148 (coordinate of the data array da) as the first argument and the parameters
149 to fit as keyword arguments.
150 da:
151 One-dimensional data array. The dimension coordinate for the only
152 dimension defines the independent variable where the data is measured. The
153 values of the data array provide the dependent data. If the data array stores
154 variances then the standard deviations (square root of the variances) are taken
155 into account when fitting.
156 p0:
157 An optional dict of optional initial guesses for the parameters. If None,
158 then the initial values will all be 1 (if the parameter names for the function
159 can be determined using introspection, otherwise a ValueError is raised). If
160 the fit function cannot handle initial values of 1, in particular for parameters
161 that are not dimensionless, then typically a :py:class:`scipp.UnitError` is
162 raised, but details will depend on the function.
163 bounds:
164 Lower and upper bounds on parameters.
165 Defaults to no bounds.
166 Bounds are given as a dict of 2-tuples of (lower, upper) for each parameter
167 where lower and upper are either both Variables or plain numbers.
168 Parameters omitted from the `bounds` dict are unbounded.
170 Returns
171 -------
172 popt:
173 Optimal values for the parameters.
174 pcov:
175 The estimated covariance of popt.
177 See Also
178 --------
179 scipp.curve_fit:
180 Supports N-D curve fits.
182 Examples
183 --------
185 >>> def func(x, *, a, b):
186 ... return a * sc.exp(-b * x)
188 >>> x = sc.linspace(dim='x', start=0.0, stop=0.4, num=50, unit='m')
189 >>> y = func(x, a=5, b=17/sc.Unit('m'))
190 >>> y.values += 0.01 * np.random.default_rng().normal(size=50)
191 >>> da = sc.DataArray(y, coords={'x': x})
193 >>> from scipp.scipy.optimize import curve_fit
194 >>> popt, _ = curve_fit(func, da, p0 = {'b': 1.0 / sc.Unit('m')})
195 >>> sc.round(sc.values(popt['a']))
196 <scipp.Variable> () float64 [dimensionless] 5
197 >>> sc.round(sc.values(popt['b']))
198 <scipp.Variable> () float64 [1/m] 17
200 Fit-function parameters that have a default value do not participate in the fit
201 unless an initial guess is provided via the p0 parameters:
203 >>> from functools import partial
204 >>> func2 = partial(func, a=5)
205 >>> popt, _ = curve_fit(func2, da, p0 = {'b': 1.0 / sc.Unit('m')})
206 >>> 'a' in popt
207 False
208 >>> popt, _ = curve_fit(func2, da, p0 = {'a':2, 'b': 1.0 / sc.Unit('m')})
209 >>> 'a' in popt
210 True
211 """
212 if 'jac' in kwargs:
213 raise NotImplementedError(
214 "The 'jac' argument is not yet supported. "
215 "See https://github.com/scipp/scipp/issues/2544"
216 )
217 for arg in ['xdata', 'ydata', 'sigma']:
218 if arg in kwargs:
219 raise TypeError(
220 f"Invalid argument '{arg}', already defined by the input data array."
221 )
222 if da.sizes[da.dim] != da.coords[da.dim].sizes[da.dim]:
223 raise BinEdgeError("Cannot fit data array with bin-edge coordinate.")
224 import scipy.optimize as opt
226 da = _drop_masked(da, da.dim)
227 params = _make_defaults(f, p0)
228 p_units = [
229 p.unit if isinstance(p, Variable) else default_unit for p in params.values()
230 ]
231 p0 = [p.value if isinstance(p, Variable) else p for p in params.values()]
232 popt, pcov = opt.curve_fit(
233 f=_wrap_func(f, params.keys(), p_units),
234 xdata=da.coords[da.dim],
235 ydata=da.values,
236 sigma=_get_sigma(da),
237 p0=p0,
238 bounds=_parse_bounds(bounds, params),
239 **kwargs,
240 )
241 popt = {
242 name: scalar(value=val, variance=var, unit=u)
243 for name, val, var, u in zip(params.keys(), popt, np.diag(pcov), p_units)
244 }
245 pcov = _covariance_with_units(list(params.keys()), pcov, p_units)
246 return popt, pcov
249__all__ = ['curve_fit']