Coverage for install/scipp/scipy/optimize/__init__.py: 98%

80 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-04-28 01:28 +0000

1# SPDX-License-Identifier: BSD-3-Clause 

2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) 

3# @author Simon Heybrock 

4"""Sub-package for optimization such as curve fitting. 

5 

6This subpackage provides wrappers for a subset of functions from 

7:py:mod:`scipy.optimize`. 

8""" 

9 

10from inspect import getfullargspec 

11from numbers import Real 

12from typing import Callable, Dict, Optional, Tuple, Union 

13 

14import numpy as np 

15 

16from ...core import BinEdgeError, DataArray, Variable, scalar, stddevs 

17from ...units import default_unit, dimensionless 

18from ..interpolate import _drop_masked 

19 

20 

21def _as_scalar(obj, unit): 

22 if unit == default_unit: 

23 return obj 

24 return scalar(value=obj, unit=unit) 

25 

26 

27def _wrap_func(f, p_names, p_units): 

28 def func(x, *args): 

29 p = {k: _as_scalar(v, u) for k, v, u in zip(p_names, args, p_units)} 

30 return f(x, **p).values 

31 

32 return func 

33 

34 

35def _get_sigma(da): 

36 if da.variances is None: 

37 return None 

38 

39 sigma = stddevs(da).values 

40 if not sigma.all(): 

41 raise ValueError( 

42 'There is a 0 in the input variances. This would break the optimizer. ' 

43 'Mask the offending elements, remove them, or assign a meaningful ' 

44 'variance if possible before calling curve_fit.' 

45 ) 

46 return sigma 

47 

48 

49def _covariance_with_units(p_names, pcov_values, units): 

50 pcov = {} 

51 for i, row in enumerate(pcov_values): 

52 pcov[p_names[i]] = {} 

53 for j, elem in enumerate(row): 

54 ui = units[i] 

55 uj = units[j] 

56 u = ui 

57 if u == default_unit: 

58 u = uj 

59 elif uj != default_unit: 

60 u = ui * uj 

61 pcov[p_names[i]][p_names[j]] = _as_scalar(elem, u) 

62 return pcov 

63 

64 

65def _make_defaults(f, p0): 

66 spec = getfullargspec(f) 

67 if len(spec.args) != 1 or spec.varargs is not None: 

68 raise ValueError("Fit function must take exactly one positional argument") 

69 defaults = {} if spec.kwonlydefaults is None else spec.kwonlydefaults 

70 kwargs = {arg: 1.0 for arg in spec.kwonlyargs if arg not in defaults} 

71 if p0 is not None: 

72 kwargs.update(p0) 

73 return kwargs 

74 

75 

76def _get_specific_bounds(bounds, name, unit) -> Tuple[float, float]: 

77 if name not in bounds: 

78 return -np.inf, np.inf 

79 b = bounds[name] 

80 if len(b) != 2: 

81 raise ValueError( 

82 "Parameter bounds must be given as a tuple of length 2. " 

83 f"Got a collection of length {len(b)} as bounds for '{name}'." 

84 ) 

85 if isinstance(b[0], Variable): 

86 return ( 

87 b[0].to(unit=unit, dtype=float).value, 

88 b[1].to(unit=unit, dtype=float).value, 

89 ) 

90 return b 

91 

92 

93def _parse_bounds( 

94 bounds, params 

95) -> Union[Tuple[float, float], Tuple[np.ndarray, np.ndarray]]: 

96 if bounds is None: 

97 return -np.inf, np.inf 

98 

99 bounds_tuples = [ 

100 _get_specific_bounds( 

101 bounds, name, param.unit if isinstance(param, Variable) else dimensionless 

102 ) 

103 for name, param in params.items() 

104 ] 

105 bounds_array = np.array(bounds_tuples).T 

106 return bounds_array[0], bounds_array[1] 

107 

108 

109def curve_fit( 

110 f: Callable, 

111 da: DataArray, 

112 *, 

113 p0: Optional[Dict[str, Union[Variable, Real]]] = None, 

114 bounds: Optional[ 

115 Dict[str, Union[Tuple[Variable, Variable], Tuple[Real, Real]]] 

116 ] = None, 

117 **kwargs, 

118) -> Tuple[ 

119 Dict[str, Union[Variable, Real]], Dict[str, Dict[str, Union[Variable, Real]]] 

120]: 

121 """Use non-linear least squares to fit a function, f, to data. 

122 

123 This is a wrapper around :py:func:`scipy.optimize.curve_fit`. See there for a 

124 complete description of parameters. The differences are: 

125 

126 - Instead of separate xdata, ydata, and sigma arguments, the input data array 

127 defines provides these, with sigma defined as the square root of the variances, 

128 if present, i.e., the standard deviations. 

129 - The fit function f must work with scipp objects. This provides additional safety 

130 over the underlying scipy function by ensuring units are consistent. 

131 - The fit function f must only take a single positional argument, x. All other 

132 arguments mapping to fit parameters must be keyword-only arguments. 

133 - The initial guess in p0 must be provided as a dict, mapping from fit-function 

134 parameter names to initial guesses. 

135 - The parameter bounds must also be provided as a dict, like p0. 

136 - The fit parameters may be scalar scipp variables. In that case an initial guess 

137 p0 with the correct units must be provided. 

138 - The returned optimal parameter values popt and the coverance matrix pcov will 

139 have units provided that the initial parameters have units. popt and pcov are 

140 a dict and a dict of dict, respectively. They are indexed using the fit parameter 

141 names. The variance of the returned optimal parameter values is set to the 

142 corresponding diagonal value of the covariance matrix. 

143 

144 Parameters 

145 ---------- 

146 f: 

147 The model function, f(x, ...). It must take the independent variable 

148 (coordinate of the data array da) as the first argument and the parameters 

149 to fit as keyword arguments. 

150 da: 

151 One-dimensional data array. The dimension coordinate for the only 

152 dimension defines the independent variable where the data is measured. The 

153 values of the data array provide the dependent data. If the data array stores 

154 variances then the standard deviations (square root of the variances) are taken 

155 into account when fitting. 

156 p0: 

157 An optional dict of optional initial guesses for the parameters. If None, 

158 then the initial values will all be 1 (if the parameter names for the function 

159 can be determined using introspection, otherwise a ValueError is raised). If 

160 the fit function cannot handle initial values of 1, in particular for parameters 

161 that are not dimensionless, then typically a :py:class:`scipp.UnitError` is 

162 raised, but details will depend on the function. 

163 bounds: 

164 Lower and upper bounds on parameters. 

165 Defaults to no bounds. 

166 Bounds are given as a dict of 2-tuples of (lower, upper) for each parameter 

167 where lower and upper are either both Variables or plain numbers. 

168 Parameters omitted from the `bounds` dict are unbounded. 

169 

170 Returns 

171 ------- 

172 popt: 

173 Optimal values for the parameters. 

174 pcov: 

175 The estimated covariance of popt. 

176 

177 See Also 

178 -------- 

179 scipp.curve_fit: 

180 Supports N-D curve fits. 

181 

182 Examples 

183 -------- 

184 

185 >>> def func(x, *, a, b): 

186 ... return a * sc.exp(-b * x) 

187 

188 >>> x = sc.linspace(dim='x', start=0.0, stop=0.4, num=50, unit='m') 

189 >>> y = func(x, a=5, b=17/sc.Unit('m')) 

190 >>> y.values += 0.01 * np.random.default_rng().normal(size=50) 

191 >>> da = sc.DataArray(y, coords={'x': x}) 

192 

193 >>> from scipp.scipy.optimize import curve_fit 

194 >>> popt, _ = curve_fit(func, da, p0 = {'b': 1.0 / sc.Unit('m')}) 

195 >>> sc.round(sc.values(popt['a'])) 

196 <scipp.Variable> () float64 [dimensionless] 5 

197 >>> sc.round(sc.values(popt['b'])) 

198 <scipp.Variable> () float64 [1/m] 17 

199 

200 Fit-function parameters that have a default value do not participate in the fit 

201 unless an initial guess is provided via the p0 parameters: 

202 

203 >>> from functools import partial 

204 >>> func2 = partial(func, a=5) 

205 >>> popt, _ = curve_fit(func2, da, p0 = {'b': 1.0 / sc.Unit('m')}) 

206 >>> 'a' in popt 

207 False 

208 >>> popt, _ = curve_fit(func2, da, p0 = {'a':2, 'b': 1.0 / sc.Unit('m')}) 

209 >>> 'a' in popt 

210 True 

211 """ 

212 if 'jac' in kwargs: 

213 raise NotImplementedError( 

214 "The 'jac' argument is not yet supported. " 

215 "See https://github.com/scipp/scipp/issues/2544" 

216 ) 

217 for arg in ['xdata', 'ydata', 'sigma']: 

218 if arg in kwargs: 

219 raise TypeError( 

220 f"Invalid argument '{arg}', already defined by the input data array." 

221 ) 

222 if da.sizes[da.dim] != da.coords[da.dim].sizes[da.dim]: 

223 raise BinEdgeError("Cannot fit data array with bin-edge coordinate.") 

224 import scipy.optimize as opt 

225 

226 da = _drop_masked(da, da.dim) 

227 params = _make_defaults(f, p0) 

228 p_units = [ 

229 p.unit if isinstance(p, Variable) else default_unit for p in params.values() 

230 ] 

231 p0 = [p.value if isinstance(p, Variable) else p for p in params.values()] 

232 popt, pcov = opt.curve_fit( 

233 f=_wrap_func(f, params.keys(), p_units), 

234 xdata=da.coords[da.dim], 

235 ydata=da.values, 

236 sigma=_get_sigma(da), 

237 p0=p0, 

238 bounds=_parse_bounds(bounds, params), 

239 **kwargs, 

240 ) 

241 popt = { 

242 name: scalar(value=val, variance=var, unit=u) 

243 for name, val, var, u in zip(params.keys(), popt, np.diag(pcov), p_units) 

244 } 

245 pcov = _covariance_with_units(list(params.keys()), pcov, p_units) 

246 return popt, pcov 

247 

248 

249__all__ = ['curve_fit']