diff --git a/requirements.txt b/requirements.txt index 046399e..70c1dd7 100755 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,6 @@ wget>=3.2 ty ruff beautifulsoup4 +h5py +cdflib +netcdf4 diff --git a/swvo/io/RBMDataSet/RBMDataSet.py b/swvo/io/RBMDataSet/RBMDataSet.py index b135f4b..65db360 100644 --- a/swvo/io/RBMDataSet/RBMDataSet.py +++ b/swvo/io/RBMDataSet/RBMDataSet.py @@ -9,9 +9,11 @@ from __future__ import annotations import datetime as dt +import logging +import warnings from datetime import timedelta, timezone from pathlib import Path -from typing import Any, Literal, cast +from typing import Any, Literal, Optional, cast import distance import numpy as np @@ -41,10 +43,14 @@ join_var, load_file_any_format, matlab2python, + read_all_datasets_cdf, + read_all_datasets_h5, read_all_datasets_netcdf, ) from swvo.io.utils import enforce_utc_timezone +logger = logging.getLogger(__name__) + class RBMDataSet: """RBMDataSet class supporting .mat, .pickle, and .nc file formats. @@ -67,14 +73,16 @@ class RBMDataSet: Start time for file-based loading. end_time : dt.datetime, optional End time for file-based loading. - folder_path : Path, optional + folder_path : Optional[Path | str] Base folder path for file-based loading. - preferred_extension : Literal["mat", "pickle", "nc"], optional - Preferred file extension for file-based loading. Default is "pickle". + preferred_extension : Literal["mat", "pickle", "nc", "cdf", "h5"], optional + Preferred file extension for file-based loading. Default is "nc". verbose : bool, optional - Whether to print verbose output. Default is True. + Whether to log verbose output. Default is True. enable_dict_loading : bool, optional Enable dictionary-based loading even in file mode. Default is False. + dataorg: bool, optional + Whether to use the new the files saved using DataOrgStrategy for file loading. Default is False. Attributes ---------- @@ -102,7 +110,7 @@ class RBMDataSet: """ - _preferred_ext: Literal["mat", "pickle", "nc"] + _preferred_ext: Literal["mat", "pickle", "nc", "cdf", "h5"] datetime: list[dt.datetime] time: NDArray[np.float64] @@ -133,11 +141,12 @@ def __init__( mfm: MfmLike, start_time: dt.datetime | None = None, end_time: dt.datetime | None = None, - folder_path: Path | None = None, - preferred_extension: Literal["mat", "pickle", "nc"] = "nc", + folder_path: Optional[Path | str] = None, + preferred_extension: Literal["mat", "pickle", "nc", "cdf", "h5"] = "nc", *, verbose: bool = True, enable_dict_loading: bool = False, + dataorg: bool = False, ) -> None: self.possible_variables: list[str] = list(VariableLiteral.__args__) @@ -156,9 +165,21 @@ def __init__( if isinstance(mfm, str): mfm = MfmEnum[mfm.upper()] + if preferred_extension == "pickle": + warnings.warn( + "The '.pickle' file format is deprecated and will be removed in a future release", + FutureWarning, + stacklevel=2, + ) # Validate preferred_extension - if preferred_extension not in ("mat", "pickle", "nc"): - msg = f"preferred_extension must be 'mat', 'pickle', or 'nc', got '{preferred_extension}'" + if preferred_extension not in ("mat", "pickle", "nc", "cdf", "h5"): + msg = f"preferred_extension must be 'mat', 'pickle', 'nc', 'cdf', or 'h5', got '{preferred_extension}'" + raise ValueError(msg) + if dataorg and preferred_extension in ("nc", "cdf", "h5"): + msg = "dataorg = True is only supported with 'mat' or 'pickle' extensions" + raise ValueError(msg) + if not dataorg and preferred_extension == "pickle": + msg = "preferred_extension='pickle' is only supported with dataorg=True" raise ValueError(msg) # Store the original satellite enum for properties and other attributes @@ -186,13 +207,14 @@ def __init__( self._folder_path = Path(folder_path) self._folder_type = self._satellite.folder_type self._file_path_stem = self._create_file_path_stem() - self._is_nc_dataset = self._check_if_nc_dataset() + self._is_dataorg_dataset = dataorg + self._is_monthly_dataset = self._check_if_monthly_dataset() self._file_name_stem = self._create_file_name_stem() self._file_cadence = self._satellite.file_cadence self._date_of_files = self._create_date_list() self._file_loading_mode = True self._enable_dict_loading = enable_dict_loading - self._netcdf_dataset_cache: dict[Path, dict[str, Any]] = {} + self._monthly_dataset_cache: dict[Path, dict[str, Any]] = {} def __repr__(self) -> str: return f"{self.__class__.__name__}({self._satellite}, {self._instrument}, {self._mfm})" @@ -234,16 +256,17 @@ def __getattr__(self, name: str) -> NDArray[np.float64]: return getattr(self, name) if not self._file_loading_mode and name in self.possible_variables: - raise AttributeError( - f"Attribute '{name}' exists in `VariableLiteral` but has not been set. " - "Call `update_from_dict()` before accessing it." - ) + msg = f"Attribute '{name}' exists in `VariableLiteral` but has not been set. Call `update_from_dict()` before accessing it." + logger.error(msg) + raise AttributeError(msg) if levenstein_info["min_distance"] <= 2: msg = f"{self.__class__.__name__} object has no attribute {name}. Maybe you meant {levenstein_info['var_name']}?" + elif name == "custom": + msg = f"{self.__class__.__name__} object might have custom variables. However, to access them, you first have to load any of the standard variables to trigger the loading process. After that, custom variables can be accessed via `.custom['custom_var_name']`." else: msg = f"{self.__class__.__name__} object has no attribute {name}" - + logger.error(msg) raise AttributeError(msg) def load(self, name_or_var: str | VariableEnum) -> None: @@ -328,16 +351,23 @@ def update_from_dict( def get_var(self, var: VariableEnum) -> NDArray[np.float64]: return getattr(self, var.var_name) - def _check_if_nc_dataset(self) -> bool: + def _check_if_monthly_dataset(self) -> bool: does_processed_mat_files_folder_exist = (self._file_path_stem / "Processed_Mat_Files").exists() + if self._is_dataorg_dataset and self._preferred_ext in ["mat", "pickle"]: + if not does_processed_mat_files_folder_exist: + logger.warning("`dataorg` is set to True but Processed_Mat_Files does not exist. ") + return False + if not self._is_dataorg_dataset and self._preferred_ext in ["mat", "h5", "nc", "cdf"]: + return True + if does_processed_mat_files_folder_exist and self._preferred_ext in ["mat", "pickle"]: return False - elif does_processed_mat_files_folder_exist and self._preferred_ext == "nc": - # if any .nc files are stored in the file_path_stem, we switch to nc mode - return next(self._file_path_stem.glob("*.nc"), None) is not None + elif does_processed_mat_files_folder_exist and self._preferred_ext in ["nc", "cdf", "h5"]: + # if any .nc files are stored in the file_path_stem, we switch to non dataorg mode + return next(self._file_path_stem.glob(f"*.{self._preferred_ext}"), None) is not None else: - # if the Processed_Mat_Files folder does not exist, it is safe to assume nc mode + # if the Processed_Mat_Files folder does not exist, it is safe to assume non dataorg mode return True def _create_date_list(self) -> list[dt.datetime]: @@ -420,10 +450,22 @@ def _load_variable(self, var: Variable | VariableEnum) -> None: date_str = f"{start_month.strftime('%Y%m%d')}to{next_month.strftime('%Y%m%d')}" # 3. Handle File Pathing & Loading based on format - if self._is_nc_dataset: - file_name = f"{self._file_name_stem}{date_str}_{self._mfm.mfm_name}.nc" + if self._is_monthly_dataset: + file_name = f"{self._file_name_stem}{date_str}_{self._mfm.mfm_name}.{self._preferred_ext}" full_file_path = self._file_path_stem / file_name - file_content = self._get_cached_datasets_netcdf(full_file_path) + if not full_file_path.exists(): + logger.warning(f"File not found: {full_file_path}") + file_content = {} + elif self._preferred_ext == "nc": + file_content = self._get_cached_datasets_netcdf(full_file_path) + elif self._preferred_ext == "h5": + file_content = self._get_cached_datasets_h5(full_file_path) + elif self._preferred_ext == "cdf": + file_content = self._get_cached_datasets_cdf(full_file_path) + else: + if self._verbose: + logger.info(f"Loading {full_file_path}") + file_content = load_file_any_format(full_file_path) else: file_name_no_format = f"{self._file_name_stem}{date_str}_{var.mat_file_prefix}" if var.mat_has_B: @@ -431,14 +473,14 @@ def _load_variable(self, var: Variable | VariableEnum) -> None: file_name_no_format += "_ver4" full_file_path = get_file_path_any_format( - self._file_path_stem, file_name_no_format, self._preferred_ext, self._is_nc_dataset + self._file_path_stem, file_name_no_format, self._preferred_ext, self._is_monthly_dataset ) if full_file_path is None: - print(f"File not found: {file_name_no_format}") + logger.warning(f"File not found: {file_name_no_format}") continue if self._verbose: - print(f"\tLoading {full_file_path}") + logger.info(f"Loading {full_file_path}") file_content = load_file_any_format(full_file_path) if not file_content: @@ -446,13 +488,17 @@ def _load_variable(self, var: Variable | VariableEnum) -> None: # 4. Process Datetimes raw_times = file_content["time"] - if self._is_nc_dataset: - # NetCDF timestamp logic + if self._is_monthly_dataset and self._preferred_ext in ["nc", "h5", "cdf", "mat"]: + # NetCDF/HDF5/CDF timestamp logic + if self._preferred_ext == "mat": + logging.info( + "Assuming time variable in .mat files is in POSIX timestamp format (seconds since 1970-01-01T00:00:00Z)" + ) datetimes = np.asarray( [dt.datetime.fromtimestamp(t.astype(np.int64), tz=dt.timezone.utc) for t in raw_times] ) else: - # Matlab logic + # Matlab/pickle logic datetimes = np.asarray([matlab2python(t) for t in raw_times]) file_content["datetime"] = datetimes @@ -486,8 +532,18 @@ def _load_variable(self, var: Variable | VariableEnum) -> None: for var_name in var_names_stored: val = list(loaded_var_arrs[var_name]) if var_name == "datetime" else loaded_var_arrs[var_name] - if self._is_nc_dataset: - # NetCDF name mapping logic + if self._is_monthly_dataset and self._preferred_ext in ["nc", "h5", "cdf", "mat"]: + if var_name.startswith("custom/"): + custom_key = var_name.split("/", 1)[1] + if custom_key: + custom_dict = self.__dict__.get("custom") + if not isinstance(custom_dict, dict): + custom_dict = {} + custom_dict[custom_key] = val + setattr(self, "custom", custom_dict) + continue + + # NetCDF/HDF5/CDF name mapping logic rbm_names = self._get_rbm_name_for_nc(var_name, self._mfm.mfm_name) # type: ignore if rbm_names: for name in rbm_names if isinstance(rbm_names, list) else [rbm_names]: @@ -498,12 +554,32 @@ def _load_variable(self, var: Variable | VariableEnum) -> None: def _get_cached_datasets_netcdf(self, file_path: Path) -> dict[str, Any]: """Return cached parsed NetCDF content for a monthly file.""" file_path = Path(file_path) - if file_path not in self._netcdf_dataset_cache: + if file_path not in self._monthly_dataset_cache: + if self._verbose: + logger.info(f"Loading netCDF {file_path}") + + self._monthly_dataset_cache[file_path] = read_all_datasets_netcdf(file_path) + return self._monthly_dataset_cache[file_path] + + def _get_cached_datasets_h5(self, file_path: Path) -> dict[str, Any]: + """Return cached parsed HDF5 content for a monthly file.""" + file_path = Path(file_path) + if file_path not in self._monthly_dataset_cache: + if self._verbose: + logger.info(f"Loading H5 {file_path}") + + self._monthly_dataset_cache[file_path] = read_all_datasets_h5(file_path) + return self._monthly_dataset_cache[file_path] + + def _get_cached_datasets_cdf(self, file_path: Path) -> dict[str, Any]: + """Return cached parsed CDF content for a monthly file.""" + file_path = Path(file_path) + if file_path not in self._monthly_dataset_cache: if self._verbose: - print(f"\tLoading {file_path}") + logger.info(f"Loading CDF {file_path}") - self._netcdf_dataset_cache[file_path] = read_all_datasets_netcdf(file_path) - return self._netcdf_dataset_cache[file_path] + self._monthly_dataset_cache[file_path] = read_all_datasets_cdf(file_path) + return self._monthly_dataset_cache[file_path] @classmethod def _get_rbm_name_for_nc( diff --git a/swvo/io/RBMDataSet/interp_functions.py b/swvo/io/RBMDataSet/interp_functions.py index 912a13b..8a1b054 100644 --- a/swvo/io/RBMDataSet/interp_functions.py +++ b/swvo/io/RBMDataSet/interp_functions.py @@ -135,7 +135,7 @@ def interp_flux( self.Flux, self.energy_channels, self.alpha_eq_model, - targets, + targets, # ty:ignore[invalid-argument-type] ) with Pool(n_threads) as p: @@ -308,7 +308,7 @@ def interp_psd(self: RBMDataSet, _ = self.PSD; _ = self.InvMu; _ = self.InvK # parallel over time (same pattern as interp_flux) - func = partial(_interp_psd_parallel, self.PSD, self.InvMu, self.InvK, targets) + func = partial(_interp_psd_parallel, self.PSD, self.InvMu, self.InvK, targets) # ty:ignore[invalid-argument-type] with Pool(n_threads) as p: rs = p.map_async(func, range(len(self.time))) diff --git a/swvo/io/RBMDataSet/scripts/create_RBSP_line_data.py b/swvo/io/RBMDataSet/scripts/create_RBSP_line_data.py index cd7d427..2bcfcba 100644 --- a/swvo/io/RBMDataSet/scripts/create_RBSP_line_data.py +++ b/swvo/io/RBMDataSet/scripts/create_RBSP_line_data.py @@ -135,7 +135,7 @@ def create_RBSP_line_data( for i, instrument in enumerate(instruments): energy_offsets[i] = np.nanmin( - np.abs(rbm_data[i].energy_channels_no_time - target_en_single), + np.abs(rbm_data[i].energy_channels_no_time - target_en_single), # ty:ignore[unsupported-operator] axis=None, ) @@ -163,7 +163,7 @@ def create_RBSP_line_data( rbm_data_set_result.line_data_energy = np.empty((len(target_en),)) # ty:ignore[invalid-argument-type, unresolved-attribute] rbm_data_set_result.line_data_alpha_local = np.empty((len(target_al),)) # ty:ignore[invalid-argument-type, unresolved-attribute] - energy_offsets_relative = energy_offsets / target_en_single + energy_offsets_relative = energy_offsets / target_en_single # ty:ignore[unsupported-operator] if np.all(np.abs(energy_offsets_relative) > energy_offset_threshold): raise ValueError( f"For the given energy target ({target_en_single:.2e} MeV), no suitable energy channel was found for a threshold of {energy_offset_threshold:.02f}!" @@ -178,7 +178,7 @@ def create_RBSP_line_data( ) closest_en_idx = np.nanargmin( - np.abs(rbm_data[min_offset_instrument].energy_channels_no_time - target_en_single) + np.abs(rbm_data[min_offset_instrument].energy_channels_no_time - target_en_single) # ty:ignore[unsupported-operator] ) rbm_data_set_result.line_data_energy[e] = rbm_data[min_offset_instrument].energy_channels_no_time[ closest_en_idx @@ -199,7 +199,7 @@ def create_RBSP_line_data( else: rbm_data_set_result.line_data_flux[:, e] = np.squeeze( rbm_data[min_offset_instrument].interp_flux( - target_en_single, + target_en_single, # ty:ignore[invalid-argument-type] target_al[e], # ty:ignore[not-subscriptable] TargetType.TargetPairs, ) @@ -208,7 +208,7 @@ def create_RBSP_line_data( elif target_type == TargetType.TargetMeshGrid: for a, target_al_single in enumerate(target_al): closest_al_idx = np.nanargmin( - np.abs(rbm_data[min_offset_instrument].alpha_local_no_time - target_al_single) + np.abs(rbm_data[min_offset_instrument].alpha_local_no_time - target_al_single) # ty:ignore[unsupported-operator] ) rbm_data_set_result.line_data_alpha_local[a] = rbm_data[min_offset_instrument].alpha_local_no_time[ closest_al_idx @@ -221,8 +221,8 @@ def create_RBSP_line_data( else: rbm_data_set_result.line_data_flux[:, e, a] = np.squeeze( rbm_data[min_offset_instrument].interp_flux( - target_en_single, - target_al_single, + target_en_single, # ty:ignore[invalid-argument-type] + target_al_single, # ty:ignore[invalid-argument-type] TargetType.TargetPairs, ) ) diff --git a/swvo/io/RBMDataSet/utils.py b/swvo/io/RBMDataSet/utils.py index 039b017..bb235fd 100644 --- a/swvo/io/RBMDataSet/utils.py +++ b/swvo/io/RBMDataSet/utils.py @@ -5,6 +5,7 @@ from __future__ import annotations import fnmatch +import logging import pickle import re import typing @@ -14,6 +15,7 @@ from pathlib import Path from typing import Any +import cdflib import netCDF4 import numpy as np import pandas as pd @@ -23,6 +25,8 @@ from swvo.io.utils import enforce_utc_timezone +logger = logging.getLogger(__name__) + def join_var(var1: NDArray[np.generic], var2: NDArray[np.generic]) -> NDArray[np.generic]: """Join two variables along the first axis.""" @@ -42,19 +46,18 @@ def get_file_path_any_format(folder_path: Path, file_stem: str, preferred_ext: s all_files = [] if len(all_files) == 0: - warnings.warn(f"File not found: {folder_path / (file_stem + '.*')}", stacklevel=2) + logger.warning(f"File not found: {folder_path / (file_stem + '.*')}") return None if len(all_files) >= 1: extensions_found = [file.suffix[1:] for file in all_files] if len(all_files) > 1: if preferred_ext in extensions_found: - warnings.warn( + logger.warning( ( f"Several files found for {folder_path / (file_stem + '.*')} with extensions: {extensions_found}. " f"Choosing: {preferred_ext}." - ), - stacklevel=2, + ) ) return folder_path / (file_stem + "." + preferred_ext) @@ -66,9 +69,17 @@ def get_file_path_any_format(folder_path: Path, file_stem: str, preferred_ext: s raise ValueError(msg) if len(all_files) == 1: + if preferred_ext not in extensions_found: + logger.warning( + ( + f"Files found for {folder_path / (file_stem + '.*')} with extensions: {extensions_found[0]}. " + f"Preferred extension ({preferred_ext}) is not available! " + f"Choosing: {extensions_found[0]}." + ) + ) return all_files[0] - warnings.warn( + logger.warning( f"File not found: {folder_path / (file_stem + '.' + preferred_ext)}", stacklevel=2, ) @@ -103,7 +114,7 @@ def _read_all_recursively(group: netCDF4.Group | netCDF4.Dataset, path: str = "" _read_all_recursively(group_obj, new_path) if not file_path.exists(): - print(f"File not found: {file_path}") + logger.warning(f"File not found: {file_path}") return {} with netCDF4.Dataset(file_path, "r") as nc_file: @@ -112,6 +123,71 @@ def _read_all_recursively(group: netCDF4.Group | netCDF4.Dataset, path: str = "" return datasets +def read_all_datasets_h5(file_path: str | Path) -> dict[str, Any]: + """Reads all datasets (variables) from an HDF5 file, including those in groups. + + This function recursively traverses all groups and datasets in an HDF5 + file and stores their data in a dictionary. The key for each dataset is its + full hierarchical path. + + Args: + file_path (str | Path): The path to the HDF5 file. + + Returns: + Dict[str, Any]: A dictionary where keys are the full dataset paths + and values are the corresponding NumPy arrays. + """ + import h5py + + datasets: dict[str, Any] = {} + file_path = Path(file_path) + + def _read_all_recursively(group: h5py.Group, path: str = ""): + for name, item in group.items(): + full_path = f"{path}/{name}" if path else name + if isinstance(item, h5py.Dataset): + datasets[full_path] = item[()] + elif isinstance(item, h5py.Group): + _read_all_recursively(item, full_path) + + if not file_path.exists(): + logger.warning(f"File not found: {file_path}") + return {} + + with h5py.File(file_path, "r") as h5_file: + _read_all_recursively(h5_file) + + return datasets + + +def read_all_datasets_cdf(file_path: str | Path) -> dict[str, Any]: + """Reads all datasets (variables) from a CDF file. + + This function loads all zVariables and rVariables in a CDF file and stores + their data in a dictionary keyed by variable name. + + Args: + file_path (str | Path): The path to the CDF file. + Returns: + Dict[str, Any]: A dictionary where keys are the variable names and + values are the corresponding NumPy arrays. + """ + datasets: dict[str, Any] = {} + file_path = Path(file_path) + + if not file_path.exists(): + logger.warning(f"File not found: {file_path}") + return {} + + with cdflib.CDF(file_path) as cdf_file: + info = cdf_file.cdf_info() + var_names = list(info.zVariables) + list(info.rVariables) + for var_name in var_names: + datasets[var_name] = cdf_file.varget(var_name) + + return datasets + + def load_file_any_format(file_path: Path) -> dict[str, Any]: """Load a file in any supported format and return its content.""" match file_path.suffix: diff --git a/tests/io/RBMDataSet/test_RBMDataset.py b/tests/io/RBMDataSet/test_RBMDataset.py index f7050d8..f0b08ef 100644 --- a/tests/io/RBMDataSet/test_RBMDataset.py +++ b/tests/io/RBMDataSet/test_RBMDataset.py @@ -52,6 +52,7 @@ def mock_dataset(): folder_path=Path("/mock/path"), preferred_extension="pickle", verbose=False, + dataorg=True, ) return dataset @@ -76,6 +77,7 @@ def test_init_datetime_timezone(mock_module_string): end_time=end_time, folder_path=Path("/mock/path"), preferred_extension="pickle", + dataorg=True, ) assert dataset._start_time.tzinfo == timezone.utc @@ -110,6 +112,7 @@ def test_satellite_string_input(mock_module_string): end_time=dt.datetime(2023, 1, 31, tzinfo=timezone.utc), folder_path=Path("/mock/path"), preferred_extension="pickle", + dataorg=True, ) assert dataset._satellite == SatelliteEnum.RBSPA @@ -260,6 +263,7 @@ def test_load_variable_real_file(): folder_path=Path("path/to/real/files"), # this does not matter for the test preferred_extension="pickle", verbose=True, + dataorg=True, ) dataset._load_variable(VariableEnum.ALPHA_LOCAL) @@ -1111,7 +1115,7 @@ def test_get_loaded_variables_includes_computed_variables_nc(mock_dataset_nc: RB assert "InvV" in loaded_variables -def test_is_nc_dataset(tmp_path: Path): +def test_is_monthly_dataset(tmp_path: Path): """Test if _check_if_nc_dataset is correct""" rbm_ds = RBMDataSet( @@ -1121,9 +1125,9 @@ def test_is_nc_dataset(tmp_path: Path): dt.datetime(2013, 1, 1, tzinfo=timezone.utc), dt.datetime(2013, 1, 2, tzinfo=timezone.utc), folder_path=tmp_path, - preferred_extension="pickle", + preferred_extension="nc", ) - assert rbm_ds._is_nc_dataset # type: ignore + assert rbm_ds._is_monthly_dataset # type: ignore (rbm_ds._file_path_stem / "Processed_Mat_Files").mkdir(exist_ok=True, parents=True) # type: ignore rbm_ds = RBMDataSet( @@ -1134,8 +1138,9 @@ def test_is_nc_dataset(tmp_path: Path): dt.datetime(2013, 1, 2, tzinfo=timezone.utc), folder_path=tmp_path, preferred_extension="pickle", + dataorg=True, ) - assert not rbm_ds._is_nc_dataset # type: ignore + assert not rbm_ds._is_monthly_dataset # type: ignore (rbm_ds._file_path_stem / "file.nc").touch() # type: ignore rbm_ds = RBMDataSet( @@ -1146,8 +1151,9 @@ def test_is_nc_dataset(tmp_path: Path): dt.datetime(2013, 1, 2, tzinfo=timezone.utc), folder_path=tmp_path, preferred_extension="pickle", + dataorg=True, ) - assert not rbm_ds._is_nc_dataset # type: ignore + assert not rbm_ds._is_monthly_dataset # type: ignore rbm_ds = RBMDataSet( "RBSPA", @@ -1156,9 +1162,9 @@ def test_is_nc_dataset(tmp_path: Path): dt.datetime(2013, 1, 1, tzinfo=timezone.utc), dt.datetime(2013, 1, 2, tzinfo=timezone.utc), folder_path=tmp_path, - preferred_extension="nc", + preferred_extension="h5", ) - assert rbm_ds._is_nc_dataset # type: ignore + assert rbm_ds._is_monthly_dataset # type: ignore (rbm_ds._file_path_stem / "Processed_Mat_Files").rmdir() # type: ignore rbm_ds = RBMDataSet( @@ -1168,5 +1174,6 @@ def test_is_nc_dataset(tmp_path: Path): dt.datetime(2013, 1, 1, tzinfo=timezone.utc), dt.datetime(2013, 1, 2, tzinfo=timezone.utc), folder_path=tmp_path, + preferred_extension="cdf", ) - assert rbm_ds._is_nc_dataset # type: ignore + assert rbm_ds._is_monthly_dataset # type: ignore