Coverage for install/scipp/core/shape.py: 53%

55 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-11-17 01:51 +0000

1# SPDX-License-Identifier: BSD-3-Clause 

2# Copyright (c) 2023 Scipp contributors (https://github.com/scipp) 

3# @author Matthew Andrew 

4# ruff: noqa: E501 

5 

6from collections.abc import Sequence 

7from typing import overload 

8 

9import numpy as np 

10 

11from .._scipp import core as _cpp 

12from ..typing import VariableLikeType 

13from . import data_group 

14from ._cpp_wrapper_util import call_func as _call_cpp_func 

15from ._sizes import _parse_dims_shape_sizes 

16from .concepts import transform_data 

17from .cpp_classes import Variable 

18 

19 

20@overload 

21def broadcast( 

22 x: VariableLikeType, 

23 *, 

24 dims: Sequence[str], 

25 shape: Sequence[int], 

26) -> VariableLikeType: ... 

27 

28 

29@overload 

30def broadcast( 

31 x: VariableLikeType, 

32 *, 

33 sizes: dict[str, int], 

34) -> VariableLikeType: ... 

35 

36 

37def broadcast( 

38 x: VariableLikeType, 

39 *, 

40 dims: Sequence[str] | None = None, 

41 shape: Sequence[int] | None = None, 

42 sizes: dict[str, int] | None = None, 

43) -> VariableLikeType: 

44 """Broadcast a Variable or a DataArray. 

45 

46 If the input is a DataArray, coordinates and attributes are shallow-copied 

47 and masks are deep-copied. 

48 

49 Note that Scipp operations broadcast automatically, so using this function 

50 directly is rarely required. 

51 

52 One and only one of these sets of arguments must be given: 

53 

54 - ``dims`` and ``shape`` 

55 - ``sizes`` 

56 

57 Parameters 

58 ---------- 

59 x: scipp.typing.VariableLike 

60 Input data to broadcast. 

61 dims: 

62 List of new dimensions. 

63 shape: 

64 New extents in each dimension. 

65 sizes: 

66 New dimension labels to sizes map. 

67 

68 Returns 

69 ------- 

70 : Same type as input 

71 New Variable or DataArray with requested dimension labels and shape. 

72 """ 

73 dims_and_shape = _parse_dims_shape_sizes(dims=dims, shape=shape, sizes=sizes) 

74 dims = dims_and_shape["dims"] 

75 shape = dims_and_shape["shape"] 

76 

77 def _broadcast(data: Variable) -> Variable: 

78 return _call_cpp_func(_cpp.broadcast, data, dims, shape) # type: ignore[return-value] 

79 

80 return transform_data(x, _broadcast) 

81 

82 

83def concat(x: Sequence[VariableLikeType], dim: str) -> VariableLikeType: 

84 """Concatenate input arrays along the given dimension. 

85 

86 Concatenation can happen in two ways: 

87 

88 - Along an existing dimension, yielding a new dimension extent 

89 given by the sum of the input's extents. 

90 - Along a new dimension that is not contained in either of the inputs, 

91 yielding an output with one extra dimensions. 

92 

93 In the case of a data array or dataset, the coords and masks are also 

94 concatenated. 

95 Coords and masks for any but the given dimension are required to match 

96 and are copied to the output without changes. 

97 

98 Parameters 

99 ---------- 

100 x: scipp.typing.VariableLike 

101 Sequence of input variables, data arrays, or datasets. 

102 dim: 

103 Dimension along which to concatenate. 

104 

105 Returns 

106 ------- 

107 : Same type as input 

108 Concatenation of the inputs. 

109 

110 Examples 

111 -------- 

112 

113 >>> a = sc.arange('x', 3) 

114 >>> b = 100 * sc.arange('x', 3) 

115 >>> c = sc.concat([a, b], dim='x') 

116 >>> c 

117 <scipp.Variable> (x: 6) int64 [dimensionless] [0, 1, ..., 100, 200] 

118 >>> c.values 

119 array([ 0, 1, 2, 0, 100, 200]) 

120 

121 >>> d = sc.concat([a, b], dim='y') 

122 >>> d 

123 <scipp.Variable> (y: 2, x: 3) int64 [dimensionless] [0, 1, ..., 100, 200] 

124 >>> d.values 

125 array([[ 0, 1, 2], 

126 [ 0, 100, 200]]) 

127 

128 >>> x = sc.DataArray(sc.arange('x', 3), coords={'x': sc.arange('x', 3)}) 

129 >>> y = sc.DataArray(100 * sc.arange('x', 3), coords={'x': 100 * sc.arange('x', 3)}) 

130 >>> z = sc.concat([x, y], dim='x') 

131 >>> z 

132 <scipp.DataArray> 

133 Dimensions: Sizes[x:6, ] 

134 Coordinates: 

135 * x int64 [dimensionless] (x) [0, 1, ..., 100, 200] 

136 Data: 

137 int64 [dimensionless] (x) [0, 1, ..., 100, 200] 

138 >>> z.values 

139 array([ 0, 1, 2, 0, 100, 200]) 

140 """ 

141 if x and isinstance(x[0], data_group.DataGroup): 

142 return data_group.apply_to_items( 

143 concat, 

144 x, 

145 dim, 

146 ) 

147 return _call_cpp_func(_cpp.concat, x, dim) # type: ignore[return-value] 

148 

149 

150@overload 

151def fold( 

152 x: VariableLikeType, 

153 dim: str, 

154 *, 

155 dims: Sequence[str], 

156 shape: Sequence[int], 

157) -> VariableLikeType: ... 

158 

159 

160@overload 

161def fold( 

162 x: VariableLikeType, 

163 dim: str, 

164 *, 

165 sizes: dict[str, int], 

166) -> VariableLikeType: ... 

167 

168 

169def fold( 

170 x: VariableLikeType, 

171 dim: str, 

172 *, 

173 dims: Sequence[str] | None = None, 

174 shape: Sequence[int] | None = None, 

175 sizes: dict[str, int] | None = None, 

176) -> VariableLikeType: 

177 """Fold a single dimension of a variable or data array into multiple dims. 

178 

179 One and only one of these sets of arguments must be given: 

180 

181 - ``dims`` and ``shape`` 

182 - ``sizes`` 

183 

184 Parameters 

185 ---------- 

186 x: scipp.typing.VariableLike 

187 Variable or DataArray to fold. 

188 dim: 

189 A single dim label that will be folded into more dims. 

190 sizes: 

191 A dict mapping new dims to new shapes. 

192 dims: 

193 A list of new dims labels. 

194 shape: 

195 A list of new dim shapes. 

196 

197 Returns 

198 ------- 

199 : Same type as input 

200 Variable or DataArray with requested dimension labels and shape. 

201 

202 Raises 

203 ------ 

204 scipp.DimensionError 

205 If the volume of the old shape is not equal to the 

206 volume of the new shape. 

207 

208 Examples 

209 -------- 

210 

211 >>> v = sc.arange('x', 6) 

212 >>> v 

213 <scipp.Variable> (x: 6) int64 [dimensionless] [0, 1, ..., 4, 5] 

214 >>> sc.fold(v, dim='x', sizes={'y': 2, 'z': 3}) 

215 <scipp.Variable> (y: 2, z: 3) int64 [dimensionless] [0, 1, ..., 4, 5] 

216 >>> sc.fold(v, dim='x', sizes={'y': 2, 'z': 3}).values 

217 array([[0, 1, 2], 

218 [3, 4, 5]]) 

219 

220 >>> sc.fold(v, dim='x', dims=['y', 'z'], shape=[2, 3]) 

221 <scipp.Variable> (y: 2, z: 3) int64 [dimensionless] [0, 1, ..., 4, 5] 

222 

223 >>> sc.fold(v, dim='x', sizes={'y': 2, 'z': -1}) 

224 <scipp.Variable> (y: 2, z: 3) int64 [dimensionless] [0, 1, ..., 4, 5] 

225 

226 >>> a = sc.DataArray(0.1 * sc.arange('x', 6), coords={'x': sc.arange('x', 6)}) 

227 >>> sc.fold(a, dim='x', sizes={'y': 2, 'z': 3}) 

228 <scipp.DataArray> 

229 Dimensions: Sizes[y:2, z:3, ] 

230 Coordinates: 

231 * x int64 [dimensionless] (y, z) [0, 1, ..., 4, 5] 

232 Data: 

233 float64 [dimensionless] (y, z) [0, 0.1, ..., 0.4, 0.5] 

234 >>> sc.fold(a, dim='x', sizes={'y': 2, 'z': 3}).data.values 

235 array([[0. , 0.1, 0.2], 

236 [0.3, 0.4, 0.5]]) 

237 >>> sc.fold(a, dim='x', sizes={'y': 2, 'z': 3}).coords['x'].values 

238 array([[0, 1, 2], 

239 [3, 4, 5]]) 

240 """ 

241 dims_and_shape = _parse_dims_shape_sizes(dims=dims, shape=shape, sizes=sizes) 

242 dims = dims_and_shape["dims"] 

243 new_shape = list(dims_and_shape["shape"]) 

244 

245 # Handle potential size of -1. 

246 # Note that we implement this here on the Python layer, because one cannot create 

247 # a C++ Dimensions object with negative sizes. 

248 minus_one_count = new_shape.count(-1) 

249 if minus_one_count > 1: 

250 raise _cpp.DimensionError( 

251 "Can only have a single -1 in the new requested shape." 

252 ) 

253 if minus_one_count == 1: 

254 if (size := x.sizes[dim]) is None: 

255 raise ValueError( 

256 f"Dim {dim} has inconsistent size, cannot compute final shape." 

257 ) 

258 ind = new_shape.index(-1) 

259 new_shape[ind] = 1 

260 new_volume = np.prod(new_shape) 

261 dim_size = int(size // new_volume) 

262 if size % new_volume != 0: 

263 raise ValueError( 

264 f"-1 in new shape was computed to be {dim_size}, but the original " 

265 f"shape {size} cannot be divided by {dim_size}." 

266 ) 

267 new_shape[ind] = dim_size 

268 

269 return _call_cpp_func(_cpp.fold, x, dim, dims, new_shape) # type: ignore[return-value] 

270 

271 

272def flatten( 

273 x: VariableLikeType, 

274 dims: Sequence[str] | None = None, 

275 to: str | None = None, 

276) -> VariableLikeType: 

277 """Flatten multiple dimensions into a single dimension. 

278 

279 If the input has a bin-edge coordinate that cannot be joined together it will not 

280 be included in the output. 

281 

282 If the input is a DataArray then coords, masks, and attrs that contain at least one 

283 of the flattened dimensions will also be flattened. This implies that when 

284 flattening all dims, i.e., when ``dims=None``, all coords, masks, and attrs that 

285 share *some or all* dimensions with the data will be flattened. 

286 

287 Parameters 

288 ---------- 

289 x: scipp.typing.VariableLike 

290 Multi-dimensional input to flatten. 

291 dims: 

292 A list of dim labels that will be flattened. 

293 If ``None``, all dimensions will be flattened. 

294 If the list is empty, this will effectively add a new inner dimension of length 

295 1 to the data (meta data such as coords and masks are not touched in this case). 

296 to: 

297 A single dim label for the resulting flattened dim. 

298 

299 Returns 

300 ------- 

301 : Same type as input 

302 Variable or DataArray with requested dimension labels and shape. 

303 

304 Raises 

305 ------ 

306 scipp.DimensionError 

307 If the input does not have a contiguous memory layout, 

308 i.e. flattening would require moving data around. 

309 This can be resolved by (deep-)copying the input. 

310 

311 Examples 

312 -------- 

313 

314 >>> v = sc.array(dims=['x', 'y'], values=np.arange(6).reshape(2, 3)) 

315 >>> v 

316 <scipp.Variable> (x: 2, y: 3) int64 [dimensionless] [0, 1, ..., 4, 5] 

317 >>> sc.flatten(v, to='u') 

318 <scipp.Variable> (u: 6) int64 [dimensionless] [0, 1, ..., 4, 5] 

319 >>> sc.flatten(v, dims=['x', 'y'], to='u') 

320 <scipp.Variable> (u: 6) int64 [dimensionless] [0, 1, ..., 4, 5] 

321 

322 >>> v = sc.array(dims=['x', 'y', 'z'], values=np.arange(24).reshape(2, 3, 4)) 

323 >>> v 

324 <scipp.Variable> (x: 2, y: 3, z: 4) int64 [dimensionless] [0, 1, ..., 22, 23] 

325 >>> sc.flatten(v, to='u') 

326 <scipp.Variable> (u: 24) int64 [dimensionless] [0, 1, ..., 22, 23] 

327 >>> sc.flatten(v, dims=['x', 'y'], to='u') 

328 <scipp.Variable> (u: 6, z: 4) int64 [dimensionless] [0, 1, ..., 22, 23] 

329 >>> sc.flatten(v, dims=['y', 'z'], to='u') 

330 <scipp.Variable> (x: 2, u: 12) int64 [dimensionless] [0, 1, ..., 22, 23] 

331 

332 >>> a = sc.DataArray(0.1 * sc.array(dims=['x', 'y'], values=np.arange(6).reshape(2, 3)), 

333 ... coords={'x': sc.arange('x', 2), 

334 ... 'y': sc.arange('y', 3), 

335 ... 'xy': sc.array(dims=['x', 'y'], 

336 ... values=np.arange(6).reshape(2, 3))}) 

337 >>> a 

338 <scipp.DataArray> 

339 Dimensions: Sizes[x:2, y:3, ] 

340 Coordinates: 

341 * x int64 [dimensionless] (x) [0, 1] 

342 * xy int64 [dimensionless] (x, y) [0, 1, ..., 4, 5] 

343 * y int64 [dimensionless] (y) [0, 1, 2] 

344 Data: 

345 float64 [dimensionless] (x, y) [0, 0.1, ..., 0.4, 0.5] 

346 >>> sc.flatten(a, to='u') 

347 <scipp.DataArray> 

348 Dimensions: Sizes[u:6, ] 

349 Coordinates: 

350 * x int64 [dimensionless] (u) [0, 0, ..., 1, 1] 

351 * xy int64 [dimensionless] (u) [0, 1, ..., 4, 5] 

352 * y int64 [dimensionless] (u) [0, 1, ..., 1, 2] 

353 Data: 

354 float64 [dimensionless] (u) [0, 0.1, ..., 0.4, 0.5] 

355 

356 """ 

357 if to is None: 

358 # Note that this is a result of the fact that we want to support 

359 # calling flatten without kwargs, and that in this case it semantically 

360 # makes more sense for the dims that we want to flatten to come first 

361 # in the argument list. 

362 raise ValueError("The final flattened dimension is required.") 

363 return _call_cpp_func(_cpp.flatten, x, dims, to) # type: ignore[return-value] 

364 

365 

366def transpose( 

367 x: VariableLikeType, dims: Sequence[str] | None = None 

368) -> VariableLikeType: 

369 """Transpose dimensions of the input. 

370 

371 Parameters 

372 ---------- 

373 x: scipp.typing.VariableLike 

374 Object to transpose. 

375 dims: 

376 List of dimensions in desired order. 

377 If ``None``, reverses existing order. 

378 

379 Returns 

380 ------- 

381 : Same type as input 

382 The transpose of the input. 

383 

384 Raises 

385 ------ 

386 scipp.DimensionError 

387 If ``dims`` are incompatible with the input data. 

388 """ 

389 return _call_cpp_func(_cpp.transpose, x, dims if dims is not None else []) # type: ignore[return-value] 

390 

391 

392def squeeze( 

393 x: VariableLikeType, dim: str | Sequence[str] | None = None 

394) -> VariableLikeType: 

395 """Remove dimensions of length 1. 

396 

397 This is equivalent to indexing the squeezed dimensions with index 0, that is 

398 ``squeeze(x, ['x', 'y'])`` is equivalent to ``x['x', 0]['y', 0]``. 

399 

400 Parameters 

401 ---------- 

402 x: scipp.typing.VariableLike 

403 Object to remove dimensions from. 

404 dim: 

405 If given, the dimension(s) to squeeze. 

406 If ``None``, all length-1 dimensions are squeezed. 

407 

408 Returns 

409 ------- 

410 : Same type as input 

411 Input with length-1 dimensions removed. 

412 

413 Raises 

414 ------ 

415 scipp.DimensionError 

416 If a dimension in ``dim`` does not have length 1. 

417 

418 See Also 

419 -------- 

420 scipp.Variable.squeeze, scipp.DataArray.squeeze, 

421 scipp.Dataset.squeeze, numpy.squeeze 

422 

423 Examples 

424 -------- 

425 

426 >>> v = sc.arange('a', 3).fold('a', sizes={'x': 1, 'y': 3, 'z': 1}) 

427 >>> v 

428 <scipp.Variable> (x: 1, y: 3, z: 1) int64 [dimensionless] [0, 1, 2] 

429 >>> sc.squeeze(v) 

430 <scipp.Variable> (y: 3) int64 [dimensionless] [0, 1, 2] 

431 >>> sc.squeeze(v, 'z') 

432 <scipp.Variable> (x: 1, y: 3) int64 [dimensionless] [0, 1, 2] 

433 >>> sc.squeeze(v, ['x', 'z']) 

434 <scipp.Variable> (y: 3) int64 [dimensionless] [0, 1, 2] 

435 

436 Coordinates for squeezed dimensions become unaligned: 

437 

438 >>> da = sc.DataArray(v, coords={'x': sc.arange('x', 1), 

439 ... 'y': sc.arange('y', 3)}) 

440 >>> da 

441 <scipp.DataArray> 

442 Dimensions: Sizes[x:1, y:3, z:1, ] 

443 Coordinates: 

444 * x int64 [dimensionless] (x) [0] 

445 * y int64 [dimensionless] (y) [0, 1, 2] 

446 Data: 

447 int64 [dimensionless] (x, y, z) [0, 1, 2] 

448 >>> sc.squeeze(da) 

449 <scipp.DataArray> 

450 Dimensions: Sizes[y:3, ] 

451 Coordinates: 

452 x int64 [dimensionless] () 0 

453 * y int64 [dimensionless] (y) [0, 1, 2] 

454 Data: 

455 int64 [dimensionless] (y) [0, 1, 2] 

456 """ 

457 return _call_cpp_func(_cpp.squeeze, x, (dim,) if isinstance(dim, str) else dim) # type: ignore[return-value]