Coverage for install/scipp/core/shape.py: 62%

40 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-04-28 01:28 +0000

1# SPDX-License-Identifier: BSD-3-Clause 

2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) 

3# @author Matthew Andrew 

4# ruff: noqa: E501 

5 

6from typing import Dict, List, Optional, Sequence, Tuple, Union 

7 

8import numpy as np 

9 

10from .._scipp import core as _cpp 

11from ..typing import VariableLikeType 

12from . import data_group 

13from ._cpp_wrapper_util import call_func as _call_cpp_func 

14from ._sizes import _parse_dims_shape_sizes 

15from .concepts import transform_data 

16 

17 

18def broadcast( 

19 x: VariableLikeType, 

20 dims: Optional[Union[List[str], Tuple[str, ...]]] = None, 

21 shape: Optional[Sequence[int]] = None, 

22 sizes: Optional[Dict[str, int]] = None, 

23) -> VariableLikeType: 

24 """Broadcast a Variable or a DataArray. 

25 

26 If the input is a DataArray, coordinates and attributes are shallow-copied 

27 and masks are deep-copied. 

28 

29 Note that Scipp operations broadcast automatically, so using this function 

30 directly is rarely required. 

31 

32 One and only one of these sets of arguments must be given: 

33 

34 - ``dims`` and ``shape`` 

35 - ``sizes`` 

36 

37 Parameters 

38 ---------- 

39 x: scipp.typing.VariableLike 

40 Input data to broadcast. 

41 dims: 

42 List of new dimensions. 

43 shape: 

44 New extents in each dimension. 

45 sizes: 

46 New dimension labels to sizes map. 

47 

48 Returns 

49 ------- 

50 : Same type as input 

51 New Variable or DataArray with requested dimension labels and shape. 

52 """ 

53 sizes = _parse_dims_shape_sizes(dims=dims, shape=shape, sizes=sizes) 

54 

55 def _broadcast(x): 

56 return _call_cpp_func(_cpp.broadcast, x, sizes["dims"], sizes["shape"]) 

57 

58 return transform_data(x, _broadcast) 

59 

60 

61def concat(x: Sequence[VariableLikeType], dim: str) -> VariableLikeType: 

62 """Concatenate input arrays along the given dimension. 

63 

64 Concatenation can happen in two ways: 

65 

66 - Along an existing dimension, yielding a new dimension extent 

67 given by the sum of the input's extents. 

68 - Along a new dimension that is not contained in either of the inputs, 

69 yielding an output with one extra dimensions. 

70 

71 In the case of a data array or dataset, the coords and masks are also 

72 concatenated. 

73 Coords and masks for any but the given dimension are required to match 

74 and are copied to the output without changes. 

75 

76 Parameters 

77 ---------- 

78 x: scipp.typing.VariableLike 

79 Sequence of input variables, data arrays, or datasets. 

80 dim: 

81 Dimension along which to concatenate. 

82 

83 Returns 

84 ------- 

85 : Same type as input 

86 Concatenation of the inputs. 

87 

88 Examples 

89 -------- 

90 

91 >>> a = sc.arange('x', 3) 

92 >>> b = 100 * sc.arange('x', 3) 

93 >>> c = sc.concat([a, b], dim='x') 

94 >>> c 

95 <scipp.Variable> (x: 6) int64 [dimensionless] [0, 1, ..., 100, 200] 

96 >>> c.values 

97 array([ 0, 1, 2, 0, 100, 200]) 

98 

99 >>> d = sc.concat([a, b], dim='y') 

100 >>> d 

101 <scipp.Variable> (y: 2, x: 3) int64 [dimensionless] [0, 1, ..., 100, 200] 

102 >>> d.values 

103 array([[ 0, 1, 2], 

104 [ 0, 100, 200]]) 

105 

106 >>> x = sc.DataArray(sc.arange('x', 3), coords={'x': sc.arange('x', 3)}) 

107 >>> y = sc.DataArray(100 * sc.arange('x', 3), coords={'x': 100 * sc.arange('x', 3)}) 

108 >>> z = sc.concat([x, y], dim='x') 

109 >>> z 

110 <scipp.DataArray> 

111 Dimensions: Sizes[x:6, ] 

112 Coordinates: 

113 * x int64 [dimensionless] (x) [0, 1, ..., 100, 200] 

114 Data: 

115 int64 [dimensionless] (x) [0, 1, ..., 100, 200] 

116 >>> z.values 

117 array([ 0, 1, 2, 0, 100, 200]) 

118 """ 

119 if x and isinstance(x[0], data_group.DataGroup): 

120 return data_group._apply_to_items(concat, x, dim) 

121 return _call_cpp_func(_cpp.concat, x, dim) 

122 

123 

124def fold( 

125 x: VariableLikeType, 

126 dim: str, 

127 sizes: Optional[Dict[str, int]] = None, 

128 dims: Optional[Union[List[str], Tuple[str, ...]]] = None, 

129 shape: Optional[Sequence[int]] = None, 

130) -> VariableLikeType: 

131 """Fold a single dimension of a variable or data array into multiple dims. 

132 

133 One and only one of these sets of arguments must be given: 

134 

135 - ``dims`` and ``shape`` 

136 - ``sizes`` 

137 

138 Parameters 

139 ---------- 

140 x: scipp.typing.VariableLike 

141 Variable or DataArray to fold. 

142 dim: 

143 A single dim label that will be folded into more dims. 

144 sizes: 

145 A dict mapping new dims to new shapes. 

146 dims: 

147 A list of new dims labels. 

148 shape: 

149 A list of new dim shapes. 

150 

151 Returns 

152 ------- 

153 : Same type as input 

154 Variable or DataArray with requested dimension labels and shape. 

155 

156 Raises 

157 ------ 

158 scipp.DimensionError 

159 If the volume of the old shape is not equal to the 

160 volume of the new shape. 

161 

162 Examples 

163 -------- 

164 

165 >>> v = sc.arange('x', 6) 

166 >>> v 

167 <scipp.Variable> (x: 6) int64 [dimensionless] [0, 1, ..., 4, 5] 

168 >>> sc.fold(v, dim='x', sizes={'y': 2, 'z': 3}) 

169 <scipp.Variable> (y: 2, z: 3) int64 [dimensionless] [0, 1, ..., 4, 5] 

170 >>> sc.fold(v, dim='x', sizes={'y': 2, 'z': 3}).values 

171 array([[0, 1, 2], 

172 [3, 4, 5]]) 

173 

174 >>> sc.fold(v, dim='x', dims=['y', 'z'], shape=[2, 3]) 

175 <scipp.Variable> (y: 2, z: 3) int64 [dimensionless] [0, 1, ..., 4, 5] 

176 

177 >>> sc.fold(v, dim='x', sizes={'y': 2, 'z': -1}) 

178 <scipp.Variable> (y: 2, z: 3) int64 [dimensionless] [0, 1, ..., 4, 5] 

179 

180 >>> a = sc.DataArray(0.1 * sc.arange('x', 6), coords={'x': sc.arange('x', 6)}) 

181 >>> sc.fold(a, dim='x', sizes={'y': 2, 'z': 3}) 

182 <scipp.DataArray> 

183 Dimensions: Sizes[y:2, z:3, ] 

184 Coordinates: 

185 * x int64 [dimensionless] (y, z) [0, 1, ..., 4, 5] 

186 Data: 

187 float64 [dimensionless] (y, z) [0, 0.1, ..., 0.4, 0.5] 

188 >>> sc.fold(a, dim='x', sizes={'y': 2, 'z': 3}).data.values 

189 array([[0. , 0.1, 0.2], 

190 [0.3, 0.4, 0.5]]) 

191 >>> sc.fold(a, dim='x', sizes={'y': 2, 'z': 3}).coords['x'].values 

192 array([[0, 1, 2], 

193 [3, 4, 5]]) 

194 """ 

195 sizes = _parse_dims_shape_sizes(dims=dims, shape=shape, sizes=sizes) 

196 

197 # Handle potential size of -1. 

198 # Note that we implement this here on the Python layer, because one cannot create 

199 # a C++ Dimensions object with negative sizes. 

200 new_shape = sizes["shape"] 

201 minus_one_count = new_shape.count(-1) 

202 if minus_one_count > 1: 

203 raise _cpp.DimensionError( 

204 "Can only have a single -1 in the new requested shape." 

205 ) 

206 if minus_one_count == 1: 

207 ind = new_shape.index(-1) 

208 new_shape[ind] = 1 

209 new_volume = np.prod(new_shape) 

210 dim_size = x.sizes[dim] // new_volume 

211 if x.sizes[dim] % new_volume != 0: 

212 raise ValueError( 

213 "-1 in new shape was computed to be {}, but the original " 

214 "shape {} cannot be divided by {}.".format( 

215 dim_size, x.sizes[dim], dim_size 

216 ) 

217 ) 

218 new_shape[ind] = dim_size 

219 

220 return _call_cpp_func(_cpp.fold, x, dim, sizes["dims"], new_shape) 

221 

222 

223def flatten( 

224 x: VariableLikeType, 

225 dims: Optional[Union[List[str], Tuple[str, ...]]] = None, 

226 to: Optional[str] = None, 

227) -> VariableLikeType: 

228 """Flatten multiple dimensions into a single dimension. 

229 

230 If the input has a bin-edge coordinate that cannot be joined together it will not 

231 be included in the output. 

232 

233 If the input is a DataArray then coords, masks, and attrs that contain at least one 

234 of the flattened dimensions will also be flattened. This implies that when 

235 flattening all dims, i.e., when ``dims=None``, all coords, masks, and attrs that 

236 share *some or all* dimensions with the data will be flattened. 

237 

238 Parameters 

239 ---------- 

240 x: scipp.typing.VariableLike 

241 Multi-dimensional input to flatten. 

242 dims: 

243 A list of dim labels that will be flattened. 

244 If ``None``, all dimensions will be flattened. 

245 If the list is empty, this will effectively add a new inner dimension of length 

246 1 to the data (meta data such as coords and masks are not touched in this case). 

247 to: 

248 A single dim label for the resulting flattened dim. 

249 

250 Returns 

251 ------- 

252 : Same type as input 

253 Variable or DataArray with requested dimension labels and shape. 

254 

255 Raises 

256 ------ 

257 scipp.DimensionError 

258 If the input does not have a contiguous memory layout, 

259 i.e. flattening would require moving data around. 

260 This can be resolved by (deep-)copying the input. 

261 

262 Examples 

263 -------- 

264 

265 >>> v = sc.array(dims=['x', 'y'], values=np.arange(6).reshape(2, 3)) 

266 >>> v 

267 <scipp.Variable> (x: 2, y: 3) int64 [dimensionless] [0, 1, ..., 4, 5] 

268 >>> sc.flatten(v, to='u') 

269 <scipp.Variable> (u: 6) int64 [dimensionless] [0, 1, ..., 4, 5] 

270 >>> sc.flatten(v, dims=['x', 'y'], to='u') 

271 <scipp.Variable> (u: 6) int64 [dimensionless] [0, 1, ..., 4, 5] 

272 

273 >>> v = sc.array(dims=['x', 'y', 'z'], values=np.arange(24).reshape(2, 3, 4)) 

274 >>> v 

275 <scipp.Variable> (x: 2, y: 3, z: 4) int64 [dimensionless] [0, 1, ..., 22, 23] 

276 >>> sc.flatten(v, to='u') 

277 <scipp.Variable> (u: 24) int64 [dimensionless] [0, 1, ..., 22, 23] 

278 >>> sc.flatten(v, dims=['x', 'y'], to='u') 

279 <scipp.Variable> (u: 6, z: 4) int64 [dimensionless] [0, 1, ..., 22, 23] 

280 >>> sc.flatten(v, dims=['y', 'z'], to='u') 

281 <scipp.Variable> (x: 2, u: 12) int64 [dimensionless] [0, 1, ..., 22, 23] 

282 

283 >>> a = sc.DataArray(0.1 * sc.array(dims=['x', 'y'], values=np.arange(6).reshape(2, 3)), 

284 ... coords={'x': sc.arange('x', 2), 

285 ... 'y': sc.arange('y', 3), 

286 ... 'xy': sc.array(dims=['x', 'y'], 

287 ... values=np.arange(6).reshape(2, 3))}) 

288 >>> a 

289 <scipp.DataArray> 

290 Dimensions: Sizes[x:2, y:3, ] 

291 Coordinates: 

292 * x int64 [dimensionless] (x) [0, 1] 

293 * xy int64 [dimensionless] (x, y) [0, 1, ..., 4, 5] 

294 * y int64 [dimensionless] (y) [0, 1, 2] 

295 Data: 

296 float64 [dimensionless] (x, y) [0, 0.1, ..., 0.4, 0.5] 

297 >>> sc.flatten(a, to='u') 

298 <scipp.DataArray> 

299 Dimensions: Sizes[u:6, ] 

300 Coordinates: 

301 * x int64 [dimensionless] (u) [0, 0, ..., 1, 1] 

302 * xy int64 [dimensionless] (u) [0, 1, ..., 4, 5] 

303 * y int64 [dimensionless] (u) [0, 1, ..., 1, 2] 

304 Data: 

305 float64 [dimensionless] (u) [0, 0.1, ..., 0.4, 0.5] 

306 

307 """ 

308 if to is None: 

309 # Note that this is a result of the fact that we want to support 

310 # calling flatten without kwargs, and that in this case it semantically 

311 # makes more sense for the dims that we want to flatten to come first 

312 # in the argument list. 

313 raise ValueError("The final flattened dimension is required.") 

314 return _call_cpp_func(_cpp.flatten, x, dims, to) 

315 

316 

317def transpose( 

318 x: VariableLikeType, dims: Optional[Union[List[str], Tuple[str, ...]]] = None 

319) -> VariableLikeType: 

320 """Transpose dimensions of the input. 

321 

322 Parameters 

323 ---------- 

324 x: scipp.typing.VariableLike 

325 Object to transpose. 

326 dims: 

327 List of dimensions in desired order. 

328 If ``None``, reverses existing order. 

329 

330 Returns 

331 ------- 

332 : Same type as input 

333 The transpose of the input. 

334 

335 Raises 

336 ------ 

337 scipp.DimensionError 

338 If ``dims`` are incompatible with the input data. 

339 """ 

340 return _call_cpp_func(_cpp.transpose, x, dims if dims is not None else []) 

341 

342 

343def squeeze( 

344 x: VariableLikeType, dim: Optional[Union[str, List[str], Tuple[str, ...]]] = None 

345) -> VariableLikeType: 

346 """Remove dimensions of length 1. 

347 

348 This is equivalent to indexing the squeezed dimensions with index 0, that is 

349 ``squeeze(x, ['x', 'y'])`` is equivalent to ``x['x', 0]['y', 0]``. 

350 

351 Parameters 

352 ---------- 

353 x: scipp.typing.VariableLike 

354 Object to remove dimensions from. 

355 dim: 

356 If given, the dimension(s) to squeeze. 

357 If ``None``, all length-1 dimensions are squeezed. 

358 

359 Returns 

360 ------- 

361 : Same type as input 

362 Input with length-1 dimensions removed. 

363 

364 Raises 

365 ------ 

366 scipp.DimensionError 

367 If a dimension in ``dim`` does not have length 1. 

368 

369 See Also 

370 -------- 

371 scipp.Variable.squeeze, scipp.DataArray.squeeze, 

372 scipp.Dataset.squeeze, numpy.squeeze 

373 

374 Examples 

375 -------- 

376 

377 >>> v = sc.arange('a', 3).fold('a', {'x': 1, 'y': 3, 'z': 1}) 

378 >>> v 

379 <scipp.Variable> (x: 1, y: 3, z: 1) int64 [dimensionless] [0, 1, 2] 

380 >>> sc.squeeze(v) 

381 <scipp.Variable> (y: 3) int64 [dimensionless] [0, 1, 2] 

382 >>> sc.squeeze(v, 'z') 

383 <scipp.Variable> (x: 1, y: 3) int64 [dimensionless] [0, 1, 2] 

384 >>> sc.squeeze(v, ['x', 'z']) 

385 <scipp.Variable> (y: 3) int64 [dimensionless] [0, 1, 2] 

386 

387 Coordinates for squeezed dimensions become unaligned: 

388 

389 >>> da = sc.DataArray(v, coords={'x': sc.arange('x', 1), 

390 ... 'y': sc.arange('y', 3)}) 

391 >>> da 

392 <scipp.DataArray> 

393 Dimensions: Sizes[x:1, y:3, z:1, ] 

394 Coordinates: 

395 * x int64 [dimensionless] (x) [0] 

396 * y int64 [dimensionless] (y) [0, 1, 2] 

397 Data: 

398 int64 [dimensionless] (x, y, z) [0, 1, 2] 

399 >>> sc.squeeze(da) 

400 <scipp.DataArray> 

401 Dimensions: Sizes[y:3, ] 

402 Coordinates: 

403 x int64 [dimensionless] () 0 

404 * y int64 [dimensionless] (y) [0, 1, 2] 

405 Data: 

406 int64 [dimensionless] (y) [0, 1, 2] 

407 """ 

408 return _call_cpp_func(_cpp.squeeze, x, (dim,) if isinstance(dim, str) else dim)