None
Test: ingestor|bifrost|file_found_by_scicat_is_consistent_with_manual|
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
instrument = 'bifrost', coda_proposal_id = '977695'
def test_file_found_by_scicat_is_consistent_with_manual(
instrument: str,
coda_proposal_id: str,
) -> None:
scicat_path = scicat.get_latest_coda_nexus_path(
proposal_id=coda_proposal_id, instrument=instrument
)
> raw_paths = coda.get_latest_nexus_paths(
proposal_id=coda_proposal_id, instrument=instrument, n=2, inspect=40
)
tests/ingestor/ingestor_test.py:28:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
instrument = 'BIFROST', proposal_id = '977695', n = 2, inspect = 40
def get_latest_nexus_paths(
instrument: str, proposal_id: str | None, n: int = 1, inspect: int = 20
) -> list[Path]:
"""
Get the latest file paths manually by sorting the files in the directory.
Parameters
----------
instrument:
The instrument name.
proposal_id:
SciCat proposal ID for CODA.
If ``None``, the environment variable ``OVERRIDE_RAW_DATA_DIR`` must be set.
n:
The number of files to return.
inspect:
The number of files to inspect before returning.
Returns
-------
:
Paths to the n latest files for the given instrument and proposal.
"""
instrument = NEXUS_INSTRUMENT_NAME[instrument]
path = _coda_raw_data_path(proposal_id)
files_found = []
logging.info(
"Scanning the %d latest files in %s for instrument %s",
inspect,
path,
instrument,
)
recent_files = sorted(path.glob("*.hdf"))[-inspect::][::-1]
logging.info("Recent files: %s", recent_files)
for file in recent_files:
logging.info("Inspecting file: %s", file)
try:
with snx.File(file, "r", locking=False) as f:
instrument_in_file = f["/entry/instrument/name"][()]
if instrument_in_file == instrument:
logging.info(
"File %s matches requested instrument %s. Adding to list. "
"Last modified: %s",
file,
instrument,
datetime.fromtimestamp(
os.stat(file).st_mtime, tz=local_timezone()
),
)
files_found.append(file)
if len(files_found) == n:
logging.info("Found %d files. Returning.", n)
return files_found
else:
logging.info(
"File instrument is %s, but %s was requested. Skipping.",
instrument_in_file,
instrument,
)
except OSError as e:
logging.warning(
"File %s could not be opened. Reason: %s",
file,
e,
)
continue
logging.error("No files found for instrument %s", instrument)
> raise RuntimeError(f"No files found for instrument {instrument}")
E RuntimeError: No files found for instrument BIFROST
.tox/ingestor/lib/python3.12/site-packages/dmsc_nightly/nexusfiles/coda.py:94: RuntimeError
None
None
None
None
instrument = 'bifrost', coda_proposal_id = '977695'
def test_file_found_by_scicat_is_consistent_with_manual(
instrument: str,
coda_proposal_id: str,
) -> None:
scicat_path = scicat.get_latest_coda_nexus_path(
proposal_id=coda_proposal_id, instrument=instrument
)
raw_paths = coda.get_latest_nexus_paths(
proposal_id=coda_proposal_id, instrument=instrument, n=2, inspect=40
)
# Resolve symlinks because the raw folder is symlinked to `/ess/data`
# and we don't care whether we access files through links or regular paths.
scicat_path = scicat_path.resolve()
raw_paths = [path.resolve() for path in raw_paths]
> assert scicat_path in raw_paths
E AssertionError: assert PosixPath('/ess/raw/coda/2025/977695/raw/977695_00082117.hdf') in [PosixPath('/ess/raw/coda/2025/977695/raw/977695_00082165.hdf'), PosixPath('/ess/raw/coda/2025/977695/raw/977695_00082159.hdf')]
tests/ingestor/ingestor_test.py:37: AssertionError
None
instrument = 'bifrost', coda_proposal_id = '977695'
def test_file_found_by_scicat_is_consistent_with_manual(
instrument: str,
coda_proposal_id: str,
) -> None:
scicat_path = scicat.get_latest_coda_nexus_path(
proposal_id=coda_proposal_id, instrument=instrument
)
> raw_paths = coda.get_latest_nexus_paths(
proposal_id=coda_proposal_id, instrument=instrument, n=2, inspect=40
)
tests/ingestor/ingestor_test.py:28:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
instrument = 'BIFROST', proposal_id = '977695', n = 2, inspect = 40
def get_latest_nexus_paths(
instrument: str, proposal_id: str | None, n: int = 1, inspect: int = 20
) -> list[Path]:
"""
Get the latest file paths manually by sorting the files in the directory.
Parameters
----------
instrument:
The instrument name.
proposal_id:
SciCat proposal ID for CODA.
If ``None``, the environment variable ``OVERRIDE_RAW_DATA_DIR`` must be set.
n:
The number of files to return.
inspect:
The number of files to inspect before returning.
Returns
-------
:
Paths to the n latest files for the given instrument and proposal.
"""
instrument = NEXUS_INSTRUMENT_NAME[instrument]
path = _coda_raw_data_path(proposal_id)
files_found = []
logging.info(
"Scanning the %d latest files in %s for instrument %s",
inspect,
path,
instrument,
)
recent_files = sorted(path.glob("*.hdf"))[-inspect::][::-1]
logging.info("Recent files: %s", recent_files)
for file in recent_files:
logging.info("Inspecting file: %s", file)
try:
with snx.File(file, "r") as f:
instrument_in_file = f["/entry/instrument/name"][()]
if instrument_in_file == instrument:
logging.info(
"File %s matches requested instrument %s. Adding to list. "
"Last modified: %s",
file,
instrument,
datetime.fromtimestamp(
os.stat(file).st_mtime, tz=local_timezone()
),
)
files_found.append(file)
if len(files_found) == n:
logging.info("Found %d files. Returning.", n)
return files_found
else:
logging.info(
"File instrument is %s, but %s was requested. Skipping.",
instrument_in_file,
instrument,
)
except OSError as e:
logging.warning(
"File %s could not be opened. Reason: %s",
file,
e,
)
continue
logging.error("No files found for instrument %s", instrument)
> raise RuntimeError(f"No files found for instrument {instrument}")
E RuntimeError: No files found for instrument BIFROST
.tox/ingestor/lib/python3.12/site-packages/dmsc_nightly/nexusfiles/coda.py:94: RuntimeError
None
None