Coverage for install/scipp/scipy/optimize/__init_

1# SPDX-License-Identifier: BSD-3-Clause

3# @author Simon Heybrock

4"""Sub-package for optimization such as curve fitting.

6This subpackage provides wrappers for a subset of functions from

7:py:mod:`scipy.optimize`.

8"""

10from collections.abc import Callable, Iterable

11from inspect import getfullargspec

12from numbers import Real

13from typing import Any

15import numpy as np

16import numpy.typing as npt

18from ...core import (

19 BinEdgeError,

20 DataArray,

21 DefaultUnit,

22 Unit,

23 Variable,

24 scalar,

25 stddevs,

26)

27from ...typing import VariableLike

28from ...units import default_unit, dimensionless

29from ..interpolate import _drop_masked

32def _as_scalar(obj: Any, unit: Unit | DefaultUnit | None) -> Any:

33 if unit == default_unit:

34 return obj

35 return scalar(

36 value=obj,

37 unit=unit,

38 )

41def _wrap_func(

42 f: Callable[..., Variable | DataArray],

43 p_names: Iterable[str],

44 p_units: Iterable[Unit | DefaultUnit | None],

45) -> Callable[..., npt.NDArray[Any]]:

46 def func(x: VariableLike, *args: Any) -> npt.NDArray[Any]:

47 p = {

48 k: _as_scalar(v, u) for k, v, u in zip(p_names, args, p_units, strict=True)

49 }

50 return f(x, **p).values # type: ignore[no-any-return]

52 return func

55def _get_sigma(

56 da: Variable | DataArray,

57) -> npt.NDArray[np.float64 | np.float32] | None:

58 if da.variances is None:

59 return None

61 sigma = stddevs(da).values

62 if not sigma.all():

63 raise ValueError(

64 'There is a 0 in the input variances. This would break the optimizer. '

65 'Mask the offending elements, remove them, or assign a meaningful '

66 'variance if possible before calling curve_fit.'

67 )

68 return sigma # type: ignore[no-any-return]

71def _covariance_with_units(

72 p_names: list[str],

73 pcov_values: npt.NDArray[np.float64 | np.float32],

74 units: list[Unit | DefaultUnit],

75) -> dict[str, dict[str, Variable | Real]]:

76 pcov: dict[str, dict[str, Variable | Real]] = {}

77 for i, row in enumerate(pcov_values):

78 pcov[p_names[i]] = {}

79 for j, elem in enumerate(row):

80 ui = units[i]

81 uj = units[j]

82 u = ui

83 if u == default_unit:

84 u = uj

85 elif uj != default_unit:

86 u = ui * uj # type: ignore[operator] # neither operand is DefaultUnit

87 pcov[p_names[i]][p_names[j]] = _as_scalar(elem, u)

88 return pcov

91def _make_defaults(

92 f: Callable[..., Any], p0: dict[str, Variable | float] | None

93) -> dict[str, Any]:

94 spec = getfullargspec(f)

95 if len(spec.args) != 1 or spec.varargs is not None:

96 raise ValueError("Fit function must take exactly one positional argument")

97 defaults = {} if spec.kwonlydefaults is None else spec.kwonlydefaults

98 kwargs: dict[str, Any] = {

99 arg: 1.0 for arg in spec.kwonlyargs if arg not in defaults

100 }

101 if p0 is not None:

102 kwargs.update(p0)

103 return kwargs

104

105

106def _get_specific_bounds(

107 bounds: dict[str, tuple[Variable, Variable] | tuple[float, float]],

108 name: str,

109 unit: Unit | None,

110) -> tuple[float, float]:

111 if name not in bounds:

112 return -np.inf, np.inf

113 b = bounds[name]

114 if len(b) != 2:

115 raise ValueError(

116 "Parameter bounds must be given as a tuple of length 2. "

117 f"Got a collection of length {len(b)} as bounds for '{name}'."

118 )

119 if isinstance(b[0], Variable):

120 return (

121 b[0].to(unit=unit, dtype=float).value,

122 b[1].to(unit=unit, dtype=float).value,

123 )

124 return b

125

126

127def _parse_bounds(

128 bounds: dict[str, tuple[Variable, Variable] | tuple[float, float]] | None,

129 params: dict[str, Any],

130) -> (

131 tuple[float, float]

132 | tuple[npt.NDArray[np.float64 | np.float32], npt.NDArray[np.float64 | np.float32]]

133):

134 if bounds is None:

135 return -np.inf, np.inf

136

137 bounds_tuples = [

138 _get_specific_bounds(

139 bounds, name, param.unit if isinstance(param, Variable) else dimensionless

140 )

141 for name, param in params.items()

142 ]

143 bounds_array = np.array(bounds_tuples).T

144 return bounds_array[0], bounds_array[1]

145

146

147def curve_fit(

148 f: Callable[..., Variable | DataArray],

149 da: DataArray,

150 *,

151 p0: dict[str, Variable | float] | None = None,

152 bounds: dict[str, tuple[Variable, Variable] | tuple[float, float]] | None = None,

153 **kwargs: Any,

154) -> tuple[dict[str, Variable | float], dict[str, dict[str, Variable | float]]]:

155 """Use non-linear least squares to fit a function, f, to data.

156

157 This is a wrapper around :py:func:`scipy.optimize.curve_fit`. See there for a

158 complete description of parameters. The differences are:

159

160 - Instead of separate xdata, ydata, and sigma arguments, the input data array

161 defines provides these, with sigma defined as the square root of the variances,

162 if present, i.e., the standard deviations.

163 - The fit function f must work with scipp objects. This provides additional safety

164 over the underlying scipy function by ensuring units are consistent.

165 - The fit function f must only take a single positional argument, x. All other

166 arguments mapping to fit parameters must be keyword-only arguments.

167 - The initial guess in p0 must be provided as a dict, mapping from fit-function

168 parameter names to initial guesses.

169 - The parameter bounds must also be provided as a dict, like p0.

170 - The fit parameters may be scalar scipp variables. In that case an initial guess

171 p0 with the correct units must be provided.

172 - The returned optimal parameter values popt and the coverance matrix pcov will

173 have units provided that the initial parameters have units. popt and pcov are

174 a dict and a dict of dict, respectively. They are indexed using the fit parameter

175 names. The variance of the returned optimal parameter values is set to the

176 corresponding diagonal value of the covariance matrix.

177

178 Parameters

179 ----------

180 f:

181 The model function, f(x, ...). It must take the independent variable

182 (coordinate of the data array da) as the first argument and the parameters

183 to fit as keyword arguments.

184 da:

185 One-dimensional data array. The dimension coordinate for the only

186 dimension defines the independent variable where the data is measured. The

187 values of the data array provide the dependent data. If the data array stores

188 variances then the standard deviations (square root of the variances) are taken

189 into account when fitting.

190 p0:

191 An optional dict of optional initial guesses for the parameters. If None,

192 then the initial values will all be 1 (if the parameter names for the function

193 can be determined using introspection, otherwise a ValueError is raised). If

194 the fit function cannot handle initial values of 1, in particular for parameters

195 that are not dimensionless, then typically a :py:class:`scipp.UnitError` is

196 raised, but details will depend on the function.

197 bounds:

198 Lower and upper bounds on parameters.

199 Defaults to no bounds.

200 Bounds are given as a dict of 2-tuples of (lower, upper) for each parameter

201 where lower and upper are either both Variables or plain numbers.

202 Parameters omitted from the `bounds` dict are unbounded.

203

204 Returns

205 -------

206 popt:

207 Optimal values for the parameters.

208 pcov:

209 The estimated covariance of popt.

210

211 See Also

212 --------

213 scipp.curve_fit:

214 Supports N-D curve fits.

215

216 Examples

217 --------

218

219 >>> def func(x, *, a, b):

220 ... return a * sc.exp(-b * x)

221

222 >>> x = sc.linspace(dim='x', start=0.0, stop=0.4, num=50, unit='m')

223 >>> y = func(x, a=5, b=17/sc.Unit('m'))

224 >>> y.values += 0.01 * np.random.default_rng().normal(size=50)

225 >>> da = sc.DataArray(y, coords={'x': x})

226

227 >>> from scipp.scipy.optimize import curve_fit

228 >>> popt, _ = curve_fit(func, da, p0 = {'b': 1.0 / sc.Unit('m')})

229 >>> sc.round(sc.values(popt['a']))

230 <scipp.Variable> () float64 [dimensionless] 5

231 >>> sc.round(sc.values(popt['b']))

232 <scipp.Variable> () float64 [1/m] 17

233

234 Fit-function parameters that have a default value do not participate in the fit

235 unless an initial guess is provided via the p0 parameters:

236

237 >>> from functools import partial

238 >>> func2 = partial(func, a=5)

239 >>> popt, _ = curve_fit(func2, da, p0 = {'b': 1.0 / sc.Unit('m')})

240 >>> 'a' in popt

241 False

242 >>> popt, _ = curve_fit(func2, da, p0 = {'a':2, 'b': 1.0 / sc.Unit('m')})

243 >>> 'a' in popt

244 True

245 """

246 if 'jac' in kwargs:

247 raise NotImplementedError(

248 "The 'jac' argument is not yet supported. "

249 "See https://github.com/scipp/scipp/issues/2544"

250 )

251 for arg in ['xdata', 'ydata', 'sigma']:

252 if arg in kwargs:

253 raise TypeError(

254 f"Invalid argument '{arg}', already defined by the input data array."

255 )

256 if da.sizes[da.dim] != da.coords[da.dim].sizes[da.dim]:

257 raise BinEdgeError("Cannot fit data array with bin-edge coordinate.")

258 import scipy.optimize as opt

259

260 da = _drop_masked(da, da.dim)

261 params = _make_defaults(f, p0)

262 p_units = [

263 p.unit if isinstance(p, Variable) else default_unit for p in params.values()

264 ]

265 p0_values = [p.value if isinstance(p, Variable) else p for p in params.values()]

266 popt, pcov = opt.curve_fit(

267 f=_wrap_func(f, params.keys(), p_units),

268 xdata=da.coords[da.dim],

269 ydata=da.values,

270 sigma=_get_sigma(da),

271 p0=p0_values,

272 bounds=_parse_bounds(bounds, params),

273 **kwargs,

274 )

275 popt = {

276 name: scalar(value=val, variance=var, unit=u)

277 for name, val, var, u in zip(

278 params.keys(), popt, np.diag(pcov), p_units, strict=True

279 )

280 }

281 pcov = _covariance_with_units(list(params.keys()), pcov, p_units)

282 return popt, pcov

283

284

285__all__ = ['curve_fit']

Coverage for install/scipp/scipy/optimize/init.py: 98%

82 statements