Coverage for install/scipp/scipy/optimize/__init__.py: 98%

82 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-12-01 01:59 +0000

1# SPDX-License-Identifier: BSD-3-Clause 

2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) 

3# @author Simon Heybrock 

4"""Sub-package for optimization such as curve fitting. 

5 

6This subpackage provides wrappers for a subset of functions from 

7:py:mod:`scipy.optimize`. 

8""" 

9 

10from collections.abc import Callable, Iterable 

11from inspect import getfullargspec 

12from numbers import Real 

13from typing import Any 

14 

15import numpy as np 

16import numpy.typing as npt 

17 

18from ...core import ( 

19 BinEdgeError, 

20 DataArray, 

21 DefaultUnit, 

22 Unit, 

23 Variable, 

24 scalar, 

25 stddevs, 

26) 

27from ...typing import VariableLike 

28from ...units import default_unit, dimensionless 

29from ..interpolate import _drop_masked 

30 

31 

32def _as_scalar(obj: Any, unit: Unit | DefaultUnit | None) -> Any: 

33 if unit == default_unit: 

34 return obj 

35 return scalar( 

36 value=obj, 

37 unit=unit, 

38 ) 

39 

40 

41def _wrap_func( 

42 f: Callable[..., Variable | DataArray], 

43 p_names: Iterable[str], 

44 p_units: Iterable[Unit | DefaultUnit | None], 

45) -> Callable[..., npt.NDArray[Any]]: 

46 def func(x: VariableLike, *args: Any) -> npt.NDArray[Any]: 

47 p = { 

48 k: _as_scalar(v, u) for k, v, u in zip(p_names, args, p_units, strict=True) 

49 } 

50 return f(x, **p).values # type: ignore[no-any-return] 

51 

52 return func 

53 

54 

55def _get_sigma( 

56 da: Variable | DataArray, 

57) -> npt.NDArray[np.float64 | np.float32] | None: 

58 if da.variances is None: 

59 return None 

60 

61 sigma = stddevs(da).values 

62 if not sigma.all(): 

63 raise ValueError( 

64 'There is a 0 in the input variances. This would break the optimizer. ' 

65 'Mask the offending elements, remove them, or assign a meaningful ' 

66 'variance if possible before calling curve_fit.' 

67 ) 

68 return sigma # type: ignore[no-any-return] 

69 

70 

71def _covariance_with_units( 

72 p_names: list[str], 

73 pcov_values: npt.NDArray[np.float64 | np.float32], 

74 units: list[Unit | DefaultUnit], 

75) -> dict[str, dict[str, Variable | Real]]: 

76 pcov: dict[str, dict[str, Variable | Real]] = {} 

77 for i, row in enumerate(pcov_values): 

78 pcov[p_names[i]] = {} 

79 for j, elem in enumerate(row): 

80 ui = units[i] 

81 uj = units[j] 

82 u = ui 

83 if u == default_unit: 

84 u = uj 

85 elif uj != default_unit: 

86 u = ui * uj # type: ignore[operator] # neither operand is DefaultUnit 

87 pcov[p_names[i]][p_names[j]] = _as_scalar(elem, u) 

88 return pcov 

89 

90 

91def _make_defaults( 

92 f: Callable[..., Any], p0: dict[str, Variable | float] | None 

93) -> dict[str, Any]: 

94 spec = getfullargspec(f) 

95 if len(spec.args) != 1 or spec.varargs is not None: 

96 raise ValueError("Fit function must take exactly one positional argument") 

97 defaults = {} if spec.kwonlydefaults is None else spec.kwonlydefaults 

98 kwargs: dict[str, Any] = { 

99 arg: 1.0 for arg in spec.kwonlyargs if arg not in defaults 

100 } 

101 if p0 is not None: 

102 kwargs.update(p0) 

103 return kwargs 

104 

105 

106def _get_specific_bounds( 

107 bounds: dict[str, tuple[Variable, Variable] | tuple[float, float]], 

108 name: str, 

109 unit: Unit | None, 

110) -> tuple[float, float]: 

111 if name not in bounds: 

112 return -np.inf, np.inf 

113 b = bounds[name] 

114 if len(b) != 2: 

115 raise ValueError( 

116 "Parameter bounds must be given as a tuple of length 2. " 

117 f"Got a collection of length {len(b)} as bounds for '{name}'." 

118 ) 

119 if isinstance(b[0], Variable): 

120 return ( 

121 b[0].to(unit=unit, dtype=float).value, 

122 b[1].to(unit=unit, dtype=float).value, 

123 ) 

124 return b 

125 

126 

127def _parse_bounds( 

128 bounds: dict[str, tuple[Variable, Variable] | tuple[float, float]] | None, 

129 params: dict[str, Any], 

130) -> ( 

131 tuple[float, float] 

132 | tuple[npt.NDArray[np.float64 | np.float32], npt.NDArray[np.float64 | np.float32]] 

133): 

134 if bounds is None: 

135 return -np.inf, np.inf 

136 

137 bounds_tuples = [ 

138 _get_specific_bounds( 

139 bounds, name, param.unit if isinstance(param, Variable) else dimensionless 

140 ) 

141 for name, param in params.items() 

142 ] 

143 bounds_array = np.array(bounds_tuples).T 

144 return bounds_array[0], bounds_array[1] 

145 

146 

147def curve_fit( 

148 f: Callable[..., Variable | DataArray], 

149 da: DataArray, 

150 *, 

151 p0: dict[str, Variable | float] | None = None, 

152 bounds: dict[str, tuple[Variable, Variable] | tuple[float, float]] | None = None, 

153 **kwargs: Any, 

154) -> tuple[dict[str, Variable | float], dict[str, dict[str, Variable | float]]]: 

155 """Use non-linear least squares to fit a function, f, to data. 

156 

157 This is a wrapper around :py:func:`scipy.optimize.curve_fit`. See there for a 

158 complete description of parameters. The differences are: 

159 

160 - Instead of separate xdata, ydata, and sigma arguments, the input data array 

161 defines provides these, with sigma defined as the square root of the variances, 

162 if present, i.e., the standard deviations. 

163 - The fit function f must work with scipp objects. This provides additional safety 

164 over the underlying scipy function by ensuring units are consistent. 

165 - The fit function f must only take a single positional argument, x. All other 

166 arguments mapping to fit parameters must be keyword-only arguments. 

167 - The initial guess in p0 must be provided as a dict, mapping from fit-function 

168 parameter names to initial guesses. 

169 - The parameter bounds must also be provided as a dict, like p0. 

170 - The fit parameters may be scalar scipp variables. In that case an initial guess 

171 p0 with the correct units must be provided. 

172 - The returned optimal parameter values popt and the coverance matrix pcov will 

173 have units provided that the initial parameters have units. popt and pcov are 

174 a dict and a dict of dict, respectively. They are indexed using the fit parameter 

175 names. The variance of the returned optimal parameter values is set to the 

176 corresponding diagonal value of the covariance matrix. 

177 

178 Parameters 

179 ---------- 

180 f: 

181 The model function, f(x, ...). It must take the independent variable 

182 (coordinate of the data array da) as the first argument and the parameters 

183 to fit as keyword arguments. 

184 da: 

185 One-dimensional data array. The dimension coordinate for the only 

186 dimension defines the independent variable where the data is measured. The 

187 values of the data array provide the dependent data. If the data array stores 

188 variances then the standard deviations (square root of the variances) are taken 

189 into account when fitting. 

190 p0: 

191 An optional dict of optional initial guesses for the parameters. If None, 

192 then the initial values will all be 1 (if the parameter names for the function 

193 can be determined using introspection, otherwise a ValueError is raised). If 

194 the fit function cannot handle initial values of 1, in particular for parameters 

195 that are not dimensionless, then typically a :py:class:`scipp.UnitError` is 

196 raised, but details will depend on the function. 

197 bounds: 

198 Lower and upper bounds on parameters. 

199 Defaults to no bounds. 

200 Bounds are given as a dict of 2-tuples of (lower, upper) for each parameter 

201 where lower and upper are either both Variables or plain numbers. 

202 Parameters omitted from the `bounds` dict are unbounded. 

203 

204 Returns 

205 ------- 

206 popt: 

207 Optimal values for the parameters. 

208 pcov: 

209 The estimated covariance of popt. 

210 

211 See Also 

212 -------- 

213 scipp.curve_fit: 

214 Supports N-D curve fits. 

215 

216 Examples 

217 -------- 

218 

219 >>> def func(x, *, a, b): 

220 ... return a * sc.exp(-b * x) 

221 

222 >>> x = sc.linspace(dim='x', start=0.0, stop=0.4, num=50, unit='m') 

223 >>> y = func(x, a=5, b=17/sc.Unit('m')) 

224 >>> y.values += 0.01 * np.random.default_rng().normal(size=50) 

225 >>> da = sc.DataArray(y, coords={'x': x}) 

226 

227 >>> from scipp.scipy.optimize import curve_fit 

228 >>> popt, _ = curve_fit(func, da, p0 = {'b': 1.0 / sc.Unit('m')}) 

229 >>> sc.round(sc.values(popt['a'])) 

230 <scipp.Variable> () float64 [dimensionless] 5 

231 >>> sc.round(sc.values(popt['b'])) 

232 <scipp.Variable> () float64 [1/m] 17 

233 

234 Fit-function parameters that have a default value do not participate in the fit 

235 unless an initial guess is provided via the p0 parameters: 

236 

237 >>> from functools import partial 

238 >>> func2 = partial(func, a=5) 

239 >>> popt, _ = curve_fit(func2, da, p0 = {'b': 1.0 / sc.Unit('m')}) 

240 >>> 'a' in popt 

241 False 

242 >>> popt, _ = curve_fit(func2, da, p0 = {'a':2, 'b': 1.0 / sc.Unit('m')}) 

243 >>> 'a' in popt 

244 True 

245 """ 

246 if 'jac' in kwargs: 

247 raise NotImplementedError( 

248 "The 'jac' argument is not yet supported. " 

249 "See https://github.com/scipp/scipp/issues/2544" 

250 ) 

251 for arg in ['xdata', 'ydata', 'sigma']: 

252 if arg in kwargs: 

253 raise TypeError( 

254 f"Invalid argument '{arg}', already defined by the input data array." 

255 ) 

256 if da.sizes[da.dim] != da.coords[da.dim].sizes[da.dim]: 

257 raise BinEdgeError("Cannot fit data array with bin-edge coordinate.") 

258 import scipy.optimize as opt 

259 

260 da = _drop_masked(da, da.dim) 

261 params = _make_defaults(f, p0) 

262 p_units = [ 

263 p.unit if isinstance(p, Variable) else default_unit for p in params.values() 

264 ] 

265 p0_values = [p.value if isinstance(p, Variable) else p for p in params.values()] 

266 popt, pcov = opt.curve_fit( 

267 f=_wrap_func(f, params.keys(), p_units), 

268 xdata=da.coords[da.dim], 

269 ydata=da.values, 

270 sigma=_get_sigma(da), 

271 p0=p0_values, 

272 bounds=_parse_bounds(bounds, params), 

273 **kwargs, 

274 ) 

275 popt = { 

276 name: scalar(value=val, variance=var, unit=u) 

277 for name, val, var, u in zip( 

278 params.keys(), popt, np.diag(pcov), p_units, strict=True 

279 ) 

280 } 

281 pcov = _covariance_with_units(list(params.keys()), pcov, p_units) 

282 return popt, pcov 

283 

284 

285__all__ = ['curve_fit']