From 52257cdb98bd0cb3d55e6a7f8012fe8b2c93a27f Mon Sep 17 00:00:00 2001 From: Sahil Jhawar Date: Thu, 7 May 2026 18:22:49 +0200 Subject: [PATCH 1/5] refactor: refactor and move the scripts around --- docs/API_reference/overview.md | 4 +- .../extraction.md} | 4 +- .../convert_string_to_datetime.md | 2 +- docs/getting_started/basic_workflow.md | 2 +- el_paso/__init__.py | 5 +- el_paso/processing/__init__.py | 6 +- .../processing/convert_string_to_datetime.py | 39 --- .../extraction.py} | 1 + el_paso/saving_strategies/saving_strategy.py | 265 ++++++++++++++++++ el_paso/utils.py | 33 +++ examples/Arase/arase_mepe.py | 10 +- examples/Arase/get_arase_orbit_variables.py | 20 +- examples/Arase/process_arase_xep_realtime.py | 34 +-- examples/Arase/process_pwe_densities.py | 6 +- examples/ESA/process_ngrm_satellite.py | 34 +-- examples/GOES/process_goes_r_mps_high.py | 34 +-- examples/GOES/process_goes_realtime.py | 12 +- examples/POES/process_poes_meped.py | 54 ++-- examples/POES/process_poes_ted.py | 38 +-- .../PROBAV/process_ept_electron_fluxes.py | 52 ++-- .../VanAllenProbes/process_ect_combined.py | 16 +- .../process_efw_emfisis_density_combined.py | 24 +- .../VanAllenProbes/process_hope_electrons.py | 12 +- examples/minimal_example.py | 12 +- mkdocs.yml | 4 +- ruff.toml | 3 +- tests/comparisons/test_mageph_rbsp.py | 12 +- ...wnload_data_and_extracting_variables.ipynb | 120 ++------ tutorials/2_variable_class.ipynb | 42 ++- tutorials/3_saving_strategies.ipynb | 116 ++++---- tutorials/4_time_binning.ipynb | 29 +- tutorials/5_magnetic_field_processing.ipynb | 80 +++--- tutorials/output.h5 | Bin 0 -> 800 bytes tutorials/rbsp_hope_example.pickle | Bin 39767790 -> 39767790 bytes 34 files changed, 649 insertions(+), 476 deletions(-) rename docs/API_reference/{extract_variables_from_files.md => processing/extraction.md} (54%) rename docs/API_reference/{processing => utilities}/convert_string_to_datetime.md (65%) delete mode 100644 el_paso/processing/convert_string_to_datetime.py rename el_paso/{extract_variables_from_files.py => processing/extraction.py} (99%) create mode 100644 el_paso/saving_strategies/saving_strategy.py create mode 100644 tutorials/output.h5 diff --git a/docs/API_reference/overview.md b/docs/API_reference/overview.md index 8751921..3da9a76 100644 --- a/docs/API_reference/overview.md +++ b/docs/API_reference/overview.md @@ -19,7 +19,7 @@ This section provides a detailed reference for all modules, classes, and functio ## Core functions [download](download.md) -[extract_variables_from_files](extract_variables_from_files.md) +[extraction](processing/extraction.md) [save](save.md) @@ -57,7 +57,7 @@ This section provides a detailed reference for all modules, classes, and functio [fold_pitch_angles_and_flux](processing/fold_pitch_angles_and_flux.md) -[convert_string_to_datetime](processing/convert_string_to_datetime.md) +[convert_string_to_datetime](utilities/convert_string_to_datetime.md) ## Saving standards diff --git a/docs/API_reference/extract_variables_from_files.md b/docs/API_reference/processing/extraction.md similarity index 54% rename from docs/API_reference/extract_variables_from_files.md rename to docs/API_reference/processing/extraction.md index 8352630..05a0d4a 100644 --- a/docs/API_reference/extract_variables_from_files.md +++ b/docs/API_reference/processing/extraction.md @@ -5,6 +5,6 @@ SPDX-FileContributor: Bernhard Haas SPDX-License-Identifier: Apache-2.0 --> -::: el_paso.extract_variables_from_files.extract_variables_from_files +::: el_paso.processing.extraction.extract_variables_from_files -::: el_paso.extract_variables_from_files.ExtractionInfo +::: el_paso.processing.extraction.ExtractionInfo diff --git a/docs/API_reference/processing/convert_string_to_datetime.md b/docs/API_reference/utilities/convert_string_to_datetime.md similarity index 65% rename from docs/API_reference/processing/convert_string_to_datetime.md rename to docs/API_reference/utilities/convert_string_to_datetime.md index c143713..4bd26b8 100644 --- a/docs/API_reference/processing/convert_string_to_datetime.md +++ b/docs/API_reference/utilities/convert_string_to_datetime.md @@ -5,4 +5,4 @@ SPDX-FileContributor: Bernhard Haas SPDX-License-Identifier: Apache-2.0 --> -::: el_paso.processing.convert_string_to_datetime.convert_string_to_datetime +::: el_paso.utils.convert_string_to_datetime diff --git a/docs/getting_started/basic_workflow.md b/docs/getting_started/basic_workflow.md index 815540b..fd39ba6 100644 --- a/docs/getting_started/basic_workflow.md +++ b/docs/getting_started/basic_workflow.md @@ -31,7 +31,7 @@ You can learn more about the EL-PASO download routine in tutorial #1 located in A [Variable](../API_reference/variable.md) in EL-PASO is a custom class which holds a numpy-array as data and metadata (unit, processing notes, etc). -We can turn the downloaded files into variables by calling the EL-PASO [extract_variables_from_files](../API_reference/extract_variables_from_files.md) routine. +We can turn the downloaded files into variables by calling the EL-PASO [extract_variables_from_files](../API_reference/processing/extraction.md) routine. The user has to provide information about under which name or column the variable is storred and its unit: diff --git a/el_paso/__init__.py b/el_paso/__init__.py index cc37eeb..1dfe662 100644 --- a/el_paso/__init__.py +++ b/el_paso/__init__.py @@ -25,8 +25,8 @@ from el_paso.save import save from el_paso.processing import TimeBinMethod from el_paso.download import download -from el_paso.extract_variables_from_files import extract_variables_from_files, ExtractionInfo from el_paso.load_indices_solar_wind_parameters import load_indices_solar_wind_parameters +from el_paso import utils # expose RBMDataSet related classes and functions from swvo.io.RBMDataSet.custom_enums import ( @@ -58,13 +58,11 @@ "IRBEM_SYSAXIS_GSM", "IRBEM_SYSAXIS_MAG", "IRBEM_SYSAXIS_SM", - "ExtractionInfo", "TimeBinMethod", "Variable", "activate_release_mode", "data_standards", "download", - "extract_variables_from_files", "get_release_msg", "is_in_release_mode", "load_indices_solar_wind_parameters", @@ -73,6 +71,7 @@ "save", "saving_strategies", "units", + "utils", ] __version__ = "1.0.3rc0" diff --git a/el_paso/processing/__init__.py b/el_paso/processing/__init__.py index 66c6d78..84c1012 100644 --- a/el_paso/processing/__init__.py +++ b/el_paso/processing/__init__.py @@ -1,5 +1,6 @@ # SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences # SPDX-FileContributor: Bernhard Haas +# SPDX-FileContributor: Sahil Jhawar # # SPDX-License-Identifier: Apache-2.0 @@ -13,12 +14,13 @@ from el_paso.processing.compute_phase_space_density import compute_phase_space_density from el_paso.processing.compute_pitch_angles_for_telescopes import compute_pitch_angles_for_telescopes from el_paso.processing.construct_pitch_angle_distribution import construct_pitch_angle_distribution -from el_paso.processing.convert_string_to_datetime import convert_string_to_datetime +from el_paso.processing.extraction import ExtractionInfo, extract_variables_from_files from el_paso.processing.fold_pitch_angles_and_flux import fold_pitch_angles_and_flux from el_paso.processing.get_real_time_tipsod import get_real_time_tipsod from el_paso.processing.magnetic_field_utils import MagFieldVarTypes __all__ = [ + "ExtractionInfo", "MagFieldVarTypes", "TimeBinMethod", "VariableRequest", @@ -31,7 +33,7 @@ "compute_phase_space_density", "compute_pitch_angles_for_telescopes", "construct_pitch_angle_distribution", - "convert_string_to_datetime", + "extract_variables_from_files", "fold_pitch_angles_and_flux", "get_real_time_tipsod", "magnetic_field_utils", diff --git a/el_paso/processing/convert_string_to_datetime.py b/el_paso/processing/convert_string_to_datetime.py deleted file mode 100644 index e19d5a6..0000000 --- a/el_paso/processing/convert_string_to_datetime.py +++ /dev/null @@ -1,39 +0,0 @@ -# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences -# SPDX-FileContributor: Bernhard Haas -# -# SPDX-License-Identifier: Apache-2.0 - -from datetime import datetime, timezone - -import numpy as np -from dateutil import parser -from numpy.typing import NDArray - -from el_paso import Variable - - -def convert_string_to_datetime(time_var: Variable, time_format: str | None = None) -> NDArray[np.generic]: - """Converts a Variable's string-based time data to UTC datetime objects. - - This function transforms an array of time strings into Python datetime objects, - automatically setting the timezone to UTC. If time_format is provided, it uses - datetime.strptime for explicit parsing; otherwise, it uses a flexible parser - (like dateutil.parser.parse) to infer the format. - - Args: - time_var (Variable): The variable containing string-based time data to be - converted. Its data is accessed via time_var.get_data(). - time_format (str | None): The explicit format string (e.g., "%Y-%m-%d %H:%M:%S") - used to parse the time data. If None (default), the function uses a - flexible parser to infer the correct format. - - Returns: - NDArray[np.generic]: A NumPy array of Python datetime objects that are all - localized to the UTC timezone. - """ - time_var.metadata.add_processing_note("Converting string-time to datetime") - - if time_format is None: - return np.asarray([parser.parse(t).replace(tzinfo=timezone.utc) for t in time_var.get_data()]) - - return np.asarray([datetime.strptime(t, time_format).replace(tzinfo=timezone.utc) for t in time_var.get_data()]) diff --git a/el_paso/extract_variables_from_files.py b/el_paso/processing/extraction.py similarity index 99% rename from el_paso/extract_variables_from_files.py rename to el_paso/processing/extraction.py index d3fe552..937af4a 100644 --- a/el_paso/extract_variables_from_files.py +++ b/el_paso/processing/extraction.py @@ -1,5 +1,6 @@ # SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences # SPDX-FileContributor: Bernhard Haas +# SPDX-FileContributor: Sahil Jhawar # # SPDX-License-Identifier: Apache-2.0 diff --git a/el_paso/saving_strategies/saving_strategy.py b/el_paso/saving_strategies/saving_strategy.py new file mode 100644 index 0000000..2e8d974 --- /dev/null +++ b/el_paso/saving_strategies/saving_strategy.py @@ -0,0 +1,265 @@ +# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences +# SPDX-FileContributor: Bernhard Haas +# +# SPDX-License-Identifier: Apache-2.0 + +import logging +import pickle +import typing +from abc import ABC, abstractmethod +from copy import deepcopy +from datetime import datetime +from pathlib import Path +from typing import Any, NamedTuple + +import h5py # type: ignore[reportMissingTypeStubs] +import numpy as np +from astropy import units as u # type: ignore[reportMissingTypeStubs] +from scipy.io import savemat # type: ignore[reportMissingTypeStubs] + +from el_paso import Variable + +logger = logging.getLogger(__name__) + + +class OutputFile(NamedTuple): + """Represents an output file with its name and a list of variable names to save. + + Attributes: + name (str): The name of the output file. + names_to_save (list[str]): List of variable names to be saved in the output file. + save_incomplete (bool): If True, allows saving even if some variables are missing. + """ + + name: str + names_to_save: list[str] + save_incomplete: bool = False + + +class SavingStrategy(ABC): + """Abstract base class for defining strategies to save output files with specific time intervals and variables. + + Attributes: + output_files (list[OutputFile]): List of output files to be managed by the saving strategy. + + Methods: + get_time_intervals_to_save(start_time: datetime | None, end_time: datetime | None) + -> list[tuple[datetime, datetime]]: + Abstract method to determine the time intervals for saving data between start_time and end_time. + + get_file_path(interval_start: datetime, interval_end: datetime, output_file: OutputFile) -> Path: + Abstract method to generate the file path for a given time interval and output file. + + standardize_variable(variable: Variable, name_in_file: str) -> Variable: + Abstract method to standardize a variable before saving, possibly renaming or formatting it. + + get_target_variables(output_file: OutputFile, variables_dict: dict[str, Variable], time_var: Variable | None, + start_time: datetime | None, end_time: datetime | None) -> dict[str, Variable] | None: + Selects and prepares variables to be saved in the output file, optionally truncating them to a time range. + + save_single_file(file_path: Path, dict_to_save: dict[str, Any], *, append: bool = False): + Saves the provided dictionary to a file in the specified format (.mat, .pickle, .h5), + optionally appending data. + + append_data(file_path: Path, dict_to_save: dict[str, Any]) -> dict[str, Any]: + Abstract method to append data to an existing file; must be implemented by subclasses. + """ + + output_files: list[OutputFile] + dependency_dict: dict[str, list[str]] + + @abstractmethod + def get_time_intervals_to_save(self, start_time: datetime, end_time: datetime) -> list[tuple[datetime, datetime]]: + """Generates a list of time intervals to save between the specified start and end times. + + Args: + start_time (datetime | None): The starting datetime for the intervals. + If None, intervals may start from the earliest available time. + end_time (datetime | None): The ending datetime for the intervals. + If None, intervals may end at the latest available time. + + Returns: + list[tuple[datetime, datetime]]: A list of tuples, each representing a time interval (start, end) + to be saved. + """ + + @abstractmethod + def get_file_path(self, interval_start: datetime, interval_end: datetime, output_file: OutputFile) -> Path: + """Generates a file path for saving variables based on the provided interval and output file information. + + Args: + interval_start (datetime): The start of the interval for which the file is being generated. + interval_end (datetime): The end of the interval for which the file is being generated. + output_file (OutputFile): An OutputFile containing the name of the output file, + and which variables should be saved in this file. + + Returns: + Path: The generated file path where the output data should be saved. + """ + + @abstractmethod + def standardize_variable(self, variable: Variable, name_in_file: str, *, first_call_of_interval: bool) -> Variable: + """Standardizes the given variable according to the specified name in the file. + + Standardization may include checking of units, dimensions, and size consistency. + + Args: + variable (Variable): The variable instance to be standardized. + name_in_file (str): The name of the variable as it appears in the file. + first_call_of_interval (bool): Flag to indicate if it is the first call of a time interval + + Returns: + Variable: The standardized variable instance. + """ + + def get_target_variables( + self, + output_file: OutputFile, + variables_dict: dict[str, Variable], + time_var: Variable | None, + start_time: datetime | None, + end_time: datetime | None, + ) -> dict[str, Variable] | None: + """Retrieves and processes target variables for saving based on the specified output file. + + Parameters: + output_file (OutputFile): The output file configuration containing variable names to save. + variables_dict (dict[str, Variable]): Dictionary mapping variable names to Variable objects. + time_var (Variable | None): The time variable used for truncation, if applicable. + start_time (datetime | None): The start time for truncating variables, if specified. + end_time (datetime | None): The end time for truncating variables, if specified. + + Returns: + dict[str, Variable] | None: + - A dictionary of processed Variable objects keyed by their names, + or None if any specified variable name is not found in variables_dict. + + Notes: + - If no variable names are specified in output_file, all variables in variables_dict are processed. + - Variables are deep-copied before processing. + - Each variable is standardized using the `standardize_variable` method. + - If a requested variable name is not found, a warning is issued and None is returned. + """ + target_variables: dict[str, Variable] = {} + first_call_of_interval = True + + # if no variables have been specified, we save all of them + if len(output_file.names_to_save) == 0: + for key, var in variables_dict.items(): + var_to_save = deepcopy(var) + + if start_time is not None and end_time is not None and time_var is not None: + var_to_save.truncate(time_var, start_time.timestamp(), end_time.timestamp()) + var_to_save = self.standardize_variable(var_to_save, key, first_call_of_interval=first_call_of_interval) + first_call_of_interval = False + + target_variables[key] = var_to_save + + return target_variables + + for name_to_save in output_file.names_to_save: + if name_to_save in variables_dict: + var_to_save = deepcopy(variables_dict[name_to_save]) + + if start_time is not None and end_time is not None and time_var is not None: + var_to_save.truncate(time_var, start_time.timestamp(), end_time.timestamp()) + + var_to_save = self.standardize_variable( + var_to_save, name_to_save, first_call_of_interval=first_call_of_interval + ) + first_call_of_interval = False + + target_variables[name_to_save] = var_to_save + else: + msg = f"Could not find target variable {name_to_save}!" + logger.info(msg, stacklevel=2) + if output_file.save_incomplete: + target_variables[name_to_save] = Variable(original_unit=u.dimensionless_unscaled, data=np.array([])) + else: + return None + + return target_variables + + def save_single_file(self, file_path: Path, dict_to_save: dict[str, Any], *, append: bool = False) -> None: # noqa: C901, PLR0912 + """Saves variable data to a single file in one of the supported formats (.mat, .pickle, .h5). + + Parameters: + file_path (Path): The path to the file where the dictionary will be saved. + The file extension determines the format. + dict_to_save (dict[str, Any]): The dictionary containing variable data to save. + append (bool, optional): If True and the file exists, appends data to the existing file (if supported). + Defaults to False. + + Raises: + NotImplementedError: If the file format specified by the file extension is not supported. + + Supported formats: + - .mat: Saves using scipy.io.savemat. + - .pickle: Saves using pickle.dump. + - .h5: Saves using h5py, with each key as a dataset (excluding "metadata"). + """ + logger.info(f"Saving file {file_path.name}...") + + file_path.parent.mkdir(parents=True, exist_ok=True) + format_name = file_path.suffix.lower() + + if file_path.exists() and append: + dict_to_save = self.append_data(file_path, dict_to_save) + + if format_name == ".mat": + # Save the dictionary into a .mat file + savemat(str(file_path), dict_to_save) + + elif format_name == ".pickle": + with file_path.open("wb") as file: + pickle.dump(dict_to_save, file) + + elif format_name == ".h5": + with h5py.File(file_path, "w") as file: + for path, value in dict_to_save.items(): + if path == "metadata": + continue + + path_parts = path.split("/") + groups = path_parts[:-1] + dataset_name = path_parts[-1] + + curr_hierachy = file + for group in groups: + if group not in curr_hierachy: + curr_hierachy = curr_hierachy.create_group(group) # type: ignore[reportUnknownVariableType] + else: + curr_hierachy = typing.cast("h5py.Group", curr_hierachy[group]) + + data_set = curr_hierachy.create_dataset(dataset_name, data=value, compression="gzip", shuffle=True) # type: ignore[reportUnknownMemberType] + + if path in dict_to_save["metadata"]: + for key, metadata in dict_to_save["metadata"][path].items(): + data_set.attrs[key] = metadata + + elif format_name == ".nc": + msg = ( + "Encountered format netCDF (.nc). This format has to be implemented by " + "each subclass as no general writer exists for it!" + ) + raise NotImplementedError(msg) + + else: + msg = f"The '{format_name}' format is not implemented." + raise NotImplementedError(msg) + + def append_data(self, file_path: Path, data_dict_to_save: dict[str, Any]) -> dict[str, Any]: + """Appends variable data from the specified file to the provided dictionary. + + Args: + file_path (Path): The path to the file where data should be appended. + data_dict_to_save (dict[str, Any]): The dictionary containing data to append. + + Returns: + dict[str, Any]: The updated dictionary after appending data. + + Raises: + NotImplementedError: This method must be implemented by subclasses. + """ + msg = "This has to be overwritten for each Strategy!" + raise NotImplementedError(msg) diff --git a/el_paso/utils.py b/el_paso/utils.py index d144296..a6680cd 100644 --- a/el_paso/utils.py +++ b/el_paso/utils.py @@ -1,5 +1,6 @@ # SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences # SPDX-FileContributor: Bernhard Haas +# SPDX-FileContributor: Sahil Jhawar # # SPDX-License-Identifier: Apache-2.0 @@ -15,15 +16,20 @@ from pathlib import Path from typing import Any, ParamSpec, TypeVar +import numpy as np import pandas as pd import tqdm +from dateutil import parser from packaging import version as version_pkg if typing.TYPE_CHECKING: from collections.abc import Callable, Iterable from multiprocessing.pool import MapResult + from numpy.typing import NDArray + import el_paso as ep + from el_paso import Variable logger = logging.getLogger(__name__) @@ -299,3 +305,30 @@ def make_dict_hashable(dict_input: dict[Any, Any] | None) -> Hashabledict | None return dict_input return Hashabledict(dict_input) + + +def convert_string_to_datetime(time_var: Variable, time_format: str | None = None) -> NDArray[np.generic]: + """Converts a Variable's string-based time data to UTC datetime objects. + + This function transforms an array of time strings into Python datetime objects, + automatically setting the timezone to UTC. If time_format is provided, it uses + datetime.strptime for explicit parsing; otherwise, it uses a flexible parser + (like dateutil.parser.parse) to infer the format. + + Args: + time_var (Variable): The variable containing string-based time data to be + converted. Its data is accessed via time_var.get_data(). + time_format (str | None): The explicit format string (e.g., "%Y-%m-%d %H:%M:%S") + used to parse the time data. If None (default), the function uses a + flexible parser to infer the correct format. + + Returns: + NDArray[np.generic]: A NumPy array of Python datetime objects that are all + localized to the UTC timezone. + """ + time_var.metadata.add_processing_note("Converting string-time to datetime") + + if time_format is None: + return np.asarray([parser.parse(t).replace(tzinfo=timezone.utc) for t in time_var.get_data()]) + + return np.asarray([datetime.strptime(t, time_format).replace(tzinfo=timezone.utc) for t in time_var.get_data()]) diff --git a/examples/Arase/arase_mepe.py b/examples/Arase/arase_mepe.py index b07c1ac..d2c93cc 100644 --- a/examples/Arase/arase_mepe.py +++ b/examples/Arase/arase_mepe.py @@ -65,31 +65,31 @@ def process_mepe_level_3( # noqa: PLR0915 ) extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column="epoch", unit=ep.units.tt2000, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Energy", name_or_column="FEDU_Energy", unit=u.keV, is_time_dependent=False, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Pitch_angle", name_or_column="FEDU_Alpha", unit=u.deg, is_time_dependent=False, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDU", name_or_column="FEDU", unit=(u.cm**2 * u.s * u.sr * u.keV) ** (-1), ), ] - mepe_variables = ep.extract_variables_from_files( + mepe_variables = ep.processing.extract_variables_from_files( start_time, end_time, "daily", diff --git a/examples/Arase/get_arase_orbit_variables.py b/examples/Arase/get_arase_orbit_variables.py index d6c8fba..9afad14 100644 --- a/examples/Arase/get_arase_orbit_variables.py +++ b/examples/Arase/get_arase_orbit_variables.py @@ -37,19 +37,19 @@ def get_arase_orbit_level_2_variables( ) extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column="epoch", unit=ep.units.tt2000, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="pos_sm", name_or_column="pos_sm", unit=u.dimensionless_unscaled, ), ] - variables = ep.extract_variables_from_files( + variables = ep.processing.extract_variables_from_files( start_time, end_time, "daily", @@ -108,39 +108,39 @@ def get_arase_orbit_level_3_variables( raise ValueError(msg) extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column="epoch", unit=ep.units.tt2000, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="B_local", name_or_column=f"pos_blocal_{mag_field_label}", unit=u.nT, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="B_eq", name_or_column=f"pos_beq_{mag_field_label}", unit=u.nT, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Lm", name_or_column=f"pos_lmc_{mag_field_label}", unit=u.dimensionless_unscaled, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Lstar", name_or_column=f"pos_lstar_{mag_field_label}", unit=u.dimensionless_unscaled, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="pos_eq", name_or_column=f"pos_eq_{mag_field_label}", unit=u.dimensionless_unscaled, ), ] - variables = ep.extract_variables_from_files( + variables = ep.processing.extract_variables_from_files( start_time, end_time, "daily", diff --git a/examples/Arase/process_arase_xep_realtime.py b/examples/Arase/process_arase_xep_realtime.py index 7d98fc4..419ef09 100644 --- a/examples/Arase/process_arase_xep_realtime.py +++ b/examples/Arase/process_arase_xep_realtime.py @@ -238,20 +238,20 @@ def _get_xep_variables( fedo_unit = typing.cast("u.Unit", (u.cm**2 * u.s * u.sr * u.keV) ** (-1)) extraction_infos = [ - ep.ExtractionInfo(name_or_column="time", unit=u.dimensionless_unscaled, result_key="Epoch"), - ep.ExtractionInfo(name_or_column="ch1", unit=fedo_unit, result_key="FEDO_ch1"), - ep.ExtractionInfo(name_or_column="ch2", unit=fedo_unit, result_key="FEDO_ch2"), - ep.ExtractionInfo(name_or_column="ch3", unit=fedo_unit, result_key="FEDO_ch3"), - ep.ExtractionInfo(name_or_column="ch4", unit=fedo_unit, result_key="FEDO_ch4"), - ep.ExtractionInfo(name_or_column="ch5", unit=fedo_unit, result_key="FEDO_ch5"), - ep.ExtractionInfo(name_or_column="ch6", unit=fedo_unit, result_key="FEDO_ch6"), - ep.ExtractionInfo(name_or_column="ch7", unit=fedo_unit, result_key="FEDO_ch7"), - ep.ExtractionInfo(name_or_column="ch8", unit=fedo_unit, result_key="FEDO_ch8"), + ep.processing.ExtractionInfo(name_or_column="time", unit=u.dimensionless_unscaled, result_key="Epoch"), + ep.processing.ExtractionInfo(name_or_column="ch1", unit=fedo_unit, result_key="FEDO_ch1"), + ep.processing.ExtractionInfo(name_or_column="ch2", unit=fedo_unit, result_key="FEDO_ch2"), + ep.processing.ExtractionInfo(name_or_column="ch3", unit=fedo_unit, result_key="FEDO_ch3"), + ep.processing.ExtractionInfo(name_or_column="ch4", unit=fedo_unit, result_key="FEDO_ch4"), + ep.processing.ExtractionInfo(name_or_column="ch5", unit=fedo_unit, result_key="FEDO_ch5"), + ep.processing.ExtractionInfo(name_or_column="ch6", unit=fedo_unit, result_key="FEDO_ch6"), + ep.processing.ExtractionInfo(name_or_column="ch7", unit=fedo_unit, result_key="FEDO_ch7"), + ep.processing.ExtractionInfo(name_or_column="ch8", unit=fedo_unit, result_key="FEDO_ch8"), ] # Bernhard: the header is also in the file, but there is a comment after it, so it cannot be read by pd.read_csv xep_header = ("time", "ch1", "ch2", "ch3", "ch4", "ch5", "ch6", "ch7", "ch8") - xep_variables = ep.extract_variables_from_files( + xep_variables = ep.processing.extract_variables_from_files( extraction_infos=extraction_infos, data_path=data_path_stem, file_name_stem=file_name_stem, @@ -263,7 +263,7 @@ def _get_xep_variables( # convert time variable # parse time strings - datetimes = ep.processing.convert_string_to_datetime(xep_variables["Epoch"]) + datetimes = ep.utils.convert_string_to_datetime(xep_variables["Epoch"]) xep_variables["Epoch"].set_data(np.asarray([t.timestamp() for t in datetimes]), unit=ep.units.posixtime) # add energy variable @@ -336,13 +336,13 @@ def _get_orb_variables( ) extraction_infos = [ - ep.ExtractionInfo(name_or_column="time", unit=u.dimensionless_unscaled, result_key="Epoch"), - ep.ExtractionInfo(name_or_column="sm_x", unit=ep.units.RE, result_key="sm_x"), - ep.ExtractionInfo(name_or_column="sm_y", unit=ep.units.RE, result_key="sm_y"), - ep.ExtractionInfo(name_or_column="sm_z", unit=ep.units.RE, result_key="sm_z"), + ep.processing.ExtractionInfo(name_or_column="time", unit=u.dimensionless_unscaled, result_key="Epoch"), + ep.processing.ExtractionInfo(name_or_column="sm_x", unit=ep.units.RE, result_key="sm_x"), + ep.processing.ExtractionInfo(name_or_column="sm_y", unit=ep.units.RE, result_key="sm_y"), + ep.processing.ExtractionInfo(name_or_column="sm_z", unit=ep.units.RE, result_key="sm_z"), ] - orb_variables = ep.extract_variables_from_files( + orb_variables = ep.processing.extract_variables_from_files( extraction_infos=extraction_infos, data_path=data_path_stem, file_name_stem=file_name_stem, @@ -351,7 +351,7 @@ def _get_orb_variables( file_cadence="daily", ) - datetimes = ep.processing.convert_string_to_datetime(orb_variables["Epoch"]) + datetimes = ep.utils.convert_string_to_datetime(orb_variables["Epoch"]) orb_variables["Epoch"].set_data(np.asarray([t.timestamp() for t in datetimes]), unit=ep.units.posixtime) # convert SM to GEO diff --git a/examples/Arase/process_pwe_densities.py b/examples/Arase/process_pwe_densities.py index 63bffbd..99b2c4f 100644 --- a/examples/Arase/process_pwe_densities.py +++ b/examples/Arase/process_pwe_densities.py @@ -56,19 +56,19 @@ def process_pwe_density( ) extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column="Epoch", unit=ep.units.tt2000, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Density", name_or_column="ne_mgf", unit=u.cm ** (-3), ), ] - pwe_variables = ep.extract_variables_from_files( + pwe_variables = ep.processing.extract_variables_from_files( start_time, end_time, "daily", diff --git a/examples/ESA/process_ngrm_satellite.py b/examples/ESA/process_ngrm_satellite.py index 9cf54aa..e76cfc7 100644 --- a/examples/ESA/process_ngrm_satellite.py +++ b/examples/ESA/process_ngrm_satellite.py @@ -77,44 +77,44 @@ def process_ngrm_electron_fluxes( flux_unit = typing.cast("u.Unit", (u.cm**2 * u.s * u.sr * u.MeV) ** (-1)) extraction_infos = [ - ep.ExtractionInfo(result_key="Epoch_iso", name_or_column="Time", unit=u.dimensionless_unscaled), - ep.ExtractionInfo( + ep.processing.ExtractionInfo(result_key="Epoch_iso", name_or_column="Time", unit=u.dimensionless_unscaled), + ep.processing.ExtractionInfo( result_key="FEDO_ch1", name_or_column="Differential electron flux (0.18 MeV)", unit=flux_unit ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDO_ch2", name_or_column="Differential electron flux (0.27 MeV)", unit=flux_unit ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDO_ch3", name_or_column="Differential electron flux (0.40 MeV)", unit=flux_unit ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDO_ch4", name_or_column="Differential electron flux (0.60 MeV)", unit=flux_unit ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDO_ch5", name_or_column="Differential electron flux (0.88 MeV)", unit=flux_unit ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDO_ch6", name_or_column="Differential electron flux (1.30 MeV)", unit=flux_unit ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDO_ch7", name_or_column="Differential electron flux (1.93 MeV)", unit=flux_unit ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDO_ch8", name_or_column="Differential electron flux (2.90 MeV)", unit=flux_unit ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDO_ch9", name_or_column="Differential electron flux (3.40 MeV)", unit=flux_unit ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDO_ch10", name_or_column="Differential electron flux (4.00 MeV)", unit=flux_unit ), - ep.ExtractionInfo(result_key="x_ECI", name_or_column="X", unit=u.km), - ep.ExtractionInfo(result_key="y_ECI", name_or_column="Y", unit=u.km), - ep.ExtractionInfo(result_key="z_ECI", name_or_column="Z", unit=u.km), - ep.ExtractionInfo(result_key="L", name_or_column="L", unit=ep.units.RE), + ep.processing.ExtractionInfo(result_key="x_ECI", name_or_column="X", unit=u.km), + ep.processing.ExtractionInfo(result_key="y_ECI", name_or_column="Y", unit=u.km), + ep.processing.ExtractionInfo(result_key="z_ECI", name_or_column="Z", unit=u.km), + ep.processing.ExtractionInfo(result_key="L", name_or_column="L", unit=ep.units.RE), ] - variables = ep.extract_variables_from_files( + variables = ep.processing.extract_variables_from_files( start_time, end_time, file_cadence="daily", @@ -127,7 +127,7 @@ def process_ngrm_electron_fluxes( time_format = "%Y-%m-%dT%H:%M:%S.%fZ" if satellite in ["MTG-I1", "MTG-S1"] else "%Y-%m-%dT%H:%M:%SZ" # convert iso strings to posixtime - datetimes = ep.processing.convert_string_to_datetime(variables["Epoch_iso"], time_format=time_format) + datetimes = ep.utils.convert_string_to_datetime(variables["Epoch_iso"], time_format=time_format) variables["Epoch"] = ep.Variable( data=np.asarray([t.timestamp() for t in datetimes]), original_unit=ep.units.posixtime ) diff --git a/examples/GOES/process_goes_r_mps_high.py b/examples/GOES/process_goes_r_mps_high.py index c628eac..e28c37d 100644 --- a/examples/GOES/process_goes_r_mps_high.py +++ b/examples/GOES/process_goes_r_mps_high.py @@ -107,7 +107,7 @@ def process_goes_r_mps_high( # fold pitch angles around 90 degree local_pa = local_pa_var.get_data(u.degree) - local_pa_folded = np.where(local_pa > 90, local_pa - 90, local_pa) # noqa: PLR2004 + local_pa_folded = np.where(local_pa > 90, local_pa - 90, local_pa) local_pa_var.set_data(local_pa_folded, unit=u.degree) # sort pitch angles in ascending order and apply to fluxes @@ -233,12 +233,12 @@ def _get_magn_variables( ) extraction_infos = [ - ep.ExtractionInfo(name_or_column="time", unit=ep.units.j2k, result_key="time"), - ep.ExtractionInfo(name_or_column="DQF", unit=u.dimensionless_unscaled, result_key="dqf"), - ep.ExtractionInfo(name_or_column="b_brf", unit=u.nT, result_key="b_brf"), + ep.processing.ExtractionInfo(name_or_column="time", unit=ep.units.j2k, result_key="time"), + ep.processing.ExtractionInfo(name_or_column="DQF", unit=u.dimensionless_unscaled, result_key="dqf"), + ep.processing.ExtractionInfo(name_or_column="b_brf", unit=u.nT, result_key="b_brf"), ] - return ep.extract_variables_from_files( + return ep.processing.extract_variables_from_files( start_time, end_time, file_cadence="daily", @@ -269,11 +269,11 @@ def _get_ephe_variables( ) extraction_infos = [ - ep.ExtractionInfo(name_or_column="time", unit=ep.units.j2k, result_key="time"), - ep.ExtractionInfo(name_or_column="gse_xyz", unit=u.km, result_key="xgse"), + ep.processing.ExtractionInfo(name_or_column="time", unit=ep.units.j2k, result_key="time"), + ep.processing.ExtractionInfo(name_or_column="gse_xyz", unit=u.km, result_key="xgse"), ] - return ep.extract_variables_from_files( + return ep.processing.extract_variables_from_files( start_time, end_time, file_cadence="daily", @@ -304,26 +304,28 @@ def _get_mps_high_variables( ) extraction_infos = [ - ep.ExtractionInfo(name_or_column="time", unit=ep.units.j2k, result_key="time"), - ep.ExtractionInfo( + ep.processing.ExtractionInfo(name_or_column="time", unit=ep.units.j2k, result_key="time"), + ep.processing.ExtractionInfo( name_or_column="AvgDiffElectronFlux", unit=(u.cm**2 * u.s * u.keV * u.sr) ** (-1), result_key="diff_flux" ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( name_or_column="AvgDiffElectronFluxUncert", unit=(u.cm**2 * u.s * u.keV * u.sr) ** (-1), result_key="diff_flux_uncert", ), - ep.ExtractionInfo(name_or_column="DiffElectronEffectiveEnergy", unit=u.keV, result_key="diff_energy"), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( + name_or_column="DiffElectronEffectiveEnergy", unit=u.keV, result_key="diff_energy" + ), + ep.processing.ExtractionInfo( name_or_column="AvgIntElectronFlux", unit=(u.cm**2 * u.s * u.sr) ** (-1), result_key="int_flux" ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( name_or_column="AvgIntElectronFluxUncert", unit=(u.cm**2 * u.s * u.sr) ** (-1), result_key="int_flux_uncert" ), - ep.ExtractionInfo(name_or_column="IntElectronEffectiveEnergy", unit=u.keV, result_key="int_energy"), + ep.processing.ExtractionInfo(name_or_column="IntElectronEffectiveEnergy", unit=u.keV, result_key="int_energy"), ] - return ep.extract_variables_from_files( + return ep.processing.extract_variables_from_files( start_time, end_time, file_cadence="daily", diff --git a/examples/GOES/process_goes_realtime.py b/examples/GOES/process_goes_realtime.py index 54b46a3..3390728 100644 --- a/examples/GOES/process_goes_realtime.py +++ b/examples/GOES/process_goes_realtime.py @@ -68,24 +68,24 @@ def process_goes_real_time( ) extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column="time_tag", unit=u.dimensionless_unscaled, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Energy", name_or_column="energy", unit=u.keV, is_time_dependent=False, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDO", name_or_column="flux", unit=(u.cm**2 * u.s * u.keV) ** (-1), dependent_variables=["time_tag", "energy"], ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="sat_id", name_or_column="satellite", unit=u.dimensionless_unscaled, @@ -93,7 +93,7 @@ def process_goes_real_time( ), ] - variables = ep.extract_variables_from_files( + variables = ep.processing.extract_variables_from_files( start_time, end_time, file_cadence="daily", @@ -106,7 +106,7 @@ def process_goes_real_time( logger.info(f"Processing satellite: {sat_name}") # parse time strings - datetimes = ep.processing.convert_string_to_datetime(variables["Epoch"], time_format="%Y-%m-%dT%H:%M:%SZ") + datetimes = ep.utils.convert_string_to_datetime(variables["Epoch"], time_format="%Y-%m-%dT%H:%M:%SZ") variables["Epoch"].set_data(np.asarray([t.timestamp() for t in datetimes]), ep.units.posixtime) # generated weighted energy channels diff --git a/examples/POES/process_poes_meped.py b/examples/POES/process_poes_meped.py index 157513e..de4d72d 100644 --- a/examples/POES/process_poes_meped.py +++ b/examples/POES/process_poes_meped.py @@ -45,7 +45,6 @@ def process_poes_meped_electron( num_cores: int = 32, bin_cadence: timedelta = timedelta(minutes=5), ) -> None: - data_path_stem = f"{raw_data_path}/YYYY/MM/{satellite_str}/" url = f"https://spdf.gsfc.nasa.gov/pub/data/noaa/{satellite_str}/sem2_fluxes-2sec/YYYY/" file_name_stem = satellite_str + "_poes-sem2_fluxes-2sec_YYYYMMDD_.{3}.cdf" @@ -60,50 +59,50 @@ def process_poes_meped_electron( ) extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column="Epoch", unit=ep.units.tt2000, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Energy", name_or_column="mep_ele_int_energies", unit=u.keV, is_time_dependent=False, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEIU", name_or_column="mep_ele_flux", unit=(u.cm**2 * u.s * u.sr) ** (-1), ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="PA_local_t0", name_or_column="meped_alpha_0_sat", unit=u.deg, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="PA_local_t90", name_or_column="meped_alpha_90_sat", unit=u.deg, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="alt", name_or_column="alt", unit=u.km, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="lon", name_or_column="lon", unit=u.deg, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="lat", name_or_column="lat", unit=u.deg, ), ] - variables = ep.extract_variables_from_files( + variables = ep.processing.extract_variables_from_files( start_time, end_time, file_cadence="daily", @@ -130,7 +129,9 @@ def process_poes_meped_electron( variables["FEIU"].transpose_data((0, 2, 1)) # stack pitch angles - pa_arr = np.stack((variables["PA_local_t0"].get_data(u.deg), variables["PA_local_t90"].get_data(u.deg))).T.astype(np.float64) + pa_arr = np.stack((variables["PA_local_t0"].get_data(u.deg), variables["PA_local_t90"].get_data(u.deg))).T.astype( + np.float64 + ) variables["PA_local"] = ep.Variable(data=pa_arr, original_unit=u.deg) del variables["PA_local_t0"] @@ -207,9 +208,7 @@ def process_poes_meped_electron( logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) logging.getLogger().setLevel(logging.INFO) - parser = argparse.ArgumentParser( - description="Process MEPED data from POES satellites." - ) + parser = argparse.ArgumentParser(description="Process MEPED data from POES satellites.") parser.add_argument( "--start_time", type=str, @@ -239,17 +238,16 @@ def process_poes_meped_electron( # with tempfile.TemporaryDirectory() as tmpdir: for sat_str in get_args(poes_satellite_literal): - print(f"Processing {sat_str}!") - # try: - process_poes_meped_electron( - start_time=dt_start, - end_time=dt_end, - satellite_str=sat_str, - irbem_lib_path=args.irbem_lib_path, - raw_data_path=".", - processed_data_path=".", - num_cores=64, - bin_cadence=timedelta(seconds=10), - ) - # except: - # continue \ No newline at end of file + try: + process_poes_meped_electron( + start_time=dt_start, + end_time=dt_end, + satellite_str=sat_str, + irbem_lib_path=args.irbem_lib_path, + raw_data_path=".", + processed_data_path=".", + num_cores=64, + bin_cadence=timedelta(seconds=10), + ) + except: # noqa: E722, S112 + continue diff --git a/examples/POES/process_poes_ted.py b/examples/POES/process_poes_ted.py index ee3da2c..6734a32 100644 --- a/examples/POES/process_poes_ted.py +++ b/examples/POES/process_poes_ted.py @@ -47,7 +47,6 @@ def process_poes_meped_electron( *, calculate_Lm_Lstar: bool = False, # noqa: N803 ) -> None: - data_path_stem = f"{raw_data_path}/YYYY/MM/{satellite_str}/" url = f"https://spdf.gsfc.nasa.gov/pub/data/noaa/{satellite_str}/sem2_fluxes-2sec/YYYY/" file_name_stem = satellite_str + "_poes-sem2_fluxes-2sec_YYYYMMDD_.{3}.cdf" @@ -62,50 +61,50 @@ def process_poes_meped_electron( ) extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column="Epoch", unit=ep.units.tt2000, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Energy", name_or_column="ted_ele_diff_energies", unit=u.eV, is_time_dependent=False, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDU", name_or_column="ted_ele_flux", unit=(u.cm**2 * u.s * u.sr * u.eV) ** (-1), ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="PA_local_t0", name_or_column="ted_alpha_0_sat", unit=u.deg, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="PA_local_t30", name_or_column="ted_alpha_30_sat", unit=u.deg, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="alt", name_or_column="alt", unit=u.km, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="lon", name_or_column="lon", unit=u.deg, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="lat", name_or_column="lat", unit=u.deg, ), ] - variables = ep.extract_variables_from_files( + variables = ep.processing.extract_variables_from_files( start_time, end_time, file_cadence="daily", @@ -132,7 +131,9 @@ def process_poes_meped_electron( variables["FEDU"].transpose_data((0, 2, 1)) # stack pitch angles - pa_arr = np.stack((variables["PA_local_t0"].get_data(u.deg), variables["PA_local_t30"].get_data(u.deg))).T.astype(np.float64) + pa_arr = np.stack((variables["PA_local_t0"].get_data(u.deg), variables["PA_local_t30"].get_data(u.deg))).T.astype( + np.float64 + ) pa_arr = np.where(pa_arr > 90, 180 - pa_arr, pa_arr) variables["PA_local"] = ep.Variable(data=pa_arr, original_unit=u.deg) @@ -196,8 +197,10 @@ def process_poes_meped_electron( } if calculate_Lm_Lstar: - variables_to_save |= {"position/T89/Lm": magnetic_field_variables["Lm_T89"], - "position/T89/Lstar": magnetic_field_variables["Lstar_T89"]} + variables_to_save |= { + "position/T89/Lm": magnetic_field_variables["Lm_T89"], + "position/T89/Lstar": magnetic_field_variables["Lstar_T89"], + } saving_strategy = ep.saving_strategies.MonthlyNetCDFStrategy( base_data_path=Path(processed_data_path) / "POES" / sat_str, @@ -214,9 +217,7 @@ def process_poes_meped_electron( logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) logging.getLogger().setLevel(logging.INFO) - parser = argparse.ArgumentParser( - description="Process TED data from POES satellites." - ) + parser = argparse.ArgumentParser(description="Process TED data from POES satellites.") parser.add_argument( "--start_time", type=str, @@ -246,7 +247,6 @@ def process_poes_meped_electron( # with tempfile.TemporaryDirectory() as tmpdir: for sat_str in get_args(poes_satellite_literal): - print(f"Processing {sat_str}!") try: process_poes_meped_electron( start_time=dt_start, @@ -258,5 +258,5 @@ def process_poes_meped_electron( num_cores=64, bin_cadence=timedelta(seconds=2), ) - except: - continue \ No newline at end of file + except: # noqa: E722, S112 + continue diff --git a/examples/PROBAV/process_ept_electron_fluxes.py b/examples/PROBAV/process_ept_electron_fluxes.py index 825ff16..61e17c6 100644 --- a/examples/PROBAV/process_ept_electron_fluxes.py +++ b/examples/PROBAV/process_ept_electron_fluxes.py @@ -68,30 +68,42 @@ def process_ept_electron_fluxes( flux_unit = typing.cast("u.Unit", (u.cm**2 * u.s * u.sr * u.MeV) ** (-1)) extraction_infos = [ - ep.ExtractionInfo(result_key="year", name_or_column="Y", unit=u.dimensionless_unscaled, np_dtype=np.int32), - ep.ExtractionInfo(result_key="month", name_or_column="M", unit=u.dimensionless_unscaled, np_dtype=np.int32), - ep.ExtractionInfo(result_key="day", name_or_column="D", unit=u.dimensionless_unscaled, np_dtype=np.int32), - ep.ExtractionInfo(result_key="hour", name_or_column="H", unit=u.dimensionless_unscaled, np_dtype=np.int32), - ep.ExtractionInfo(result_key="minute", name_or_column="MI", unit=u.dimensionless_unscaled, np_dtype=np.int32), - ep.ExtractionInfo(result_key="second", name_or_column="S", unit=u.dimensionless_unscaled, np_dtype=np.int32), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( + result_key="year", name_or_column="Y", unit=u.dimensionless_unscaled, np_dtype=np.int32 + ), + ep.processing.ExtractionInfo( + result_key="month", name_or_column="M", unit=u.dimensionless_unscaled, np_dtype=np.int32 + ), + ep.processing.ExtractionInfo( + result_key="day", name_or_column="D", unit=u.dimensionless_unscaled, np_dtype=np.int32 + ), + ep.processing.ExtractionInfo( + result_key="hour", name_or_column="H", unit=u.dimensionless_unscaled, np_dtype=np.int32 + ), + ep.processing.ExtractionInfo( + result_key="minute", name_or_column="MI", unit=u.dimensionless_unscaled, np_dtype=np.int32 + ), + ep.processing.ExtractionInfo( + result_key="second", name_or_column="S", unit=u.dimensionless_unscaled, np_dtype=np.int32 + ), + ep.processing.ExtractionInfo( result_key="millisecond", name_or_column="mS", unit=u.dimensionless_unscaled, np_dtype=np.int32 ), - ep.ExtractionInfo(result_key="flag", name_or_column="FLAG", unit=u.dimensionless_unscaled), - ep.ExtractionInfo(result_key="chi2", name_or_column="e-Chi2", unit=u.dimensionless_unscaled), - ep.ExtractionInfo(result_key="ch0", name_or_column="e-fl-00", unit=flux_unit), - ep.ExtractionInfo(result_key="ch1", name_or_column="e-fl-01", unit=flux_unit), - ep.ExtractionInfo(result_key="ch2", name_or_column="e-fl-02", unit=flux_unit), - ep.ExtractionInfo(result_key="ch3", name_or_column="e-fl-03", unit=flux_unit), - ep.ExtractionInfo(result_key="ch4", name_or_column="e-fl-04", unit=flux_unit), - ep.ExtractionInfo(result_key="ch5", name_or_column="e-fl-05", unit=flux_unit), - ep.ExtractionInfo(result_key="PA_local", name_or_column="Pitch", unit=u.deg), - ep.ExtractionInfo(result_key="rad", name_or_column="Rad", unit=u.km), - ep.ExtractionInfo(result_key="lon", name_or_column="Long", unit=u.deg), - ep.ExtractionInfo(result_key="lat", name_or_column="Lat", unit=u.deg), + ep.processing.ExtractionInfo(result_key="flag", name_or_column="FLAG", unit=u.dimensionless_unscaled), + ep.processing.ExtractionInfo(result_key="chi2", name_or_column="e-Chi2", unit=u.dimensionless_unscaled), + ep.processing.ExtractionInfo(result_key="ch0", name_or_column="e-fl-00", unit=flux_unit), + ep.processing.ExtractionInfo(result_key="ch1", name_or_column="e-fl-01", unit=flux_unit), + ep.processing.ExtractionInfo(result_key="ch2", name_or_column="e-fl-02", unit=flux_unit), + ep.processing.ExtractionInfo(result_key="ch3", name_or_column="e-fl-03", unit=flux_unit), + ep.processing.ExtractionInfo(result_key="ch4", name_or_column="e-fl-04", unit=flux_unit), + ep.processing.ExtractionInfo(result_key="ch5", name_or_column="e-fl-05", unit=flux_unit), + ep.processing.ExtractionInfo(result_key="PA_local", name_or_column="Pitch", unit=u.deg), + ep.processing.ExtractionInfo(result_key="rad", name_or_column="Rad", unit=u.km), + ep.processing.ExtractionInfo(result_key="lon", name_or_column="Long", unit=u.deg), + ep.processing.ExtractionInfo(result_key="lat", name_or_column="Lat", unit=u.deg), ] - variables = ep.extract_variables_from_files( + variables = ep.processing.extract_variables_from_files( start_time, end_time, file_cadence="daily", diff --git a/examples/VanAllenProbes/process_ect_combined.py b/examples/VanAllenProbes/process_ect_combined.py index 692f2f7..52edba6 100644 --- a/examples/VanAllenProbes/process_ect_combined.py +++ b/examples/VanAllenProbes/process_ect_combined.py @@ -47,46 +47,46 @@ def process_ect_combined( ) extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column="Epoch", unit=ep.units.cdf_epoch, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Energy", name_or_column="FEDU_Energy", unit=u.keV, is_time_dependent=False, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Pitch_angle", name_or_column="FEDU_Alpha", unit=u.deg, is_time_dependent=False, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDU", name_or_column="FEDU", unit=(u.cm**2 * u.s * u.sr * u.keV) ** (-1), ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDU_quality", name_or_column="FEDU_Quality", unit=u.dimensionless_unscaled, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDO", name_or_column="FEDO", unit=(u.cm**2 * u.s * u.sr * u.keV) ** (-1), ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="xGEO", name_or_column="Position", unit=u.km, ), ] - variables = ep.extract_variables_from_files( + variables = ep.processing.extract_variables_from_files( start_time, end_time, "daily", diff --git a/examples/VanAllenProbes/process_efw_emfisis_density_combined.py b/examples/VanAllenProbes/process_efw_emfisis_density_combined.py index 0ad2ccd..7710de4 100644 --- a/examples/VanAllenProbes/process_efw_emfisis_density_combined.py +++ b/examples/VanAllenProbes/process_efw_emfisis_density_combined.py @@ -177,24 +177,24 @@ def _get_efw_variables( ) extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column="epoch", unit=ep.units.cdf_epoch, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Density", name_or_column="density", unit=u.cm**-3, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="xGSE", name_or_column="position_gse", unit=u.km, ), ] - variables = ep.extract_variables_from_files( + variables = ep.processing.extract_variables_from_files( start_time, end_time, "daily", @@ -227,24 +227,24 @@ def _get_emfisis_variables( ) extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column="Epoch", unit=ep.units.tt2000, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Density", name_or_column="density", unit=u.cm**-3, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Digi_type", name_or_column="digi_type", unit=u.dimensionless_unscaled, ), ] - variables = ep.extract_variables_from_files( + variables = ep.processing.extract_variables_from_files( start_time, end_time, "daily", @@ -282,19 +282,19 @@ def _get_and_time_bin_hiss_derived_densities( file_name_stem = "rbsp-b_hiss_density_arase_recalibrated_v2.txt" extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column=0, unit=u.dimensionless_unscaled, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Density", name_or_column=1, unit=u.cm**-3, ), ] - hiss_derived_vars = ep.extract_variables_from_files( + hiss_derived_vars = ep.processing.extract_variables_from_files( start_time=start_time, end_time=end_time, file_cadence="single_file", @@ -304,7 +304,7 @@ def _get_and_time_bin_hiss_derived_densities( pd_read_csv_kwargs={"skiprows": 4, "sep": "\t", "dtype": {0: str, 1: np.float64}}, ) - datetimes = ep.processing.convert_string_to_datetime(hiss_derived_vars["Epoch"], time_format="%Y-%m-%dT%H:%M:%S.%f") + datetimes = ep.utils.convert_string_to_datetime(hiss_derived_vars["Epoch"], time_format="%Y-%m-%dT%H:%M:%S.%f") timestamps = np.asarray([dt.timestamp() for dt in datetimes]) hiss_derived_vars["Epoch"].set_data(timestamps, unit=ep.units.posixtime) diff --git a/examples/VanAllenProbes/process_hope_electrons.py b/examples/VanAllenProbes/process_hope_electrons.py index 516a8c3..ae113e3 100644 --- a/examples/VanAllenProbes/process_hope_electrons.py +++ b/examples/VanAllenProbes/process_hope_electrons.py @@ -50,35 +50,35 @@ def process_hope_electrons( ) extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column="Epoch_Ele", unit=ep.units.cdf_epoch, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Energy", name_or_column="HOPE_ENERGY_Ele", unit=u.eV, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Pitch_angle", name_or_column="PITCH_ANGLE", unit=u.deg, is_time_dependent=False, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDU", name_or_column="FEDU", unit=(u.cm**2 * u.s * u.sr * u.keV) ** (-1), ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="xGEO", name_or_column="Position_Ele", unit=u.km, ), ] - variables = ep.extract_variables_from_files( + variables = ep.processing.extract_variables_from_files( start_time, end_time, "daily", diff --git a/examples/minimal_example.py b/examples/minimal_example.py index 80356f9..caa86f0 100644 --- a/examples/minimal_example.py +++ b/examples/minimal_example.py @@ -37,36 +37,36 @@ ) extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column="Epoch", unit=ep.units.cdf_epoch, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Energy", name_or_column="FEDU_Energy", unit=u.keV, is_time_dependent=False, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Pitch_angle", name_or_column="FEDU_Alpha", unit=u.deg, is_time_dependent=False, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="FEDU", name_or_column="FEDU", unit=(u.cm**2 * u.s * u.sr * u.keV) ** (-1), ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="xGEO", name_or_column="Position", unit=u.km, ), ] -variables = ep.extract_variables_from_files( +variables = ep.processing.extract_variables_from_files( start_time=start_time, end_time=end_time, file_cadence="daily", diff --git a/mkdocs.yml b/mkdocs.yml index 638f35c..608bb75 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -47,7 +47,7 @@ nav: - API_reference/variable.md - Core Functions: - API_reference/download.md - - API_reference/extract_variables_from_files.md + - API_reference/processing/extraction.md - API_reference/save.md - Processing Functions: - API_reference/processing/bin_by_time.md @@ -57,7 +57,6 @@ nav: - API_reference/processing/compute_phase_space_density.md - API_reference/processing/compute_pitch_angles_for_telescopes.md - API_reference/processing/construct_pitch_angle_distribution.md - - API_reference/processing/convert_string_to_datetime.md - API_reference/processing/fold_pitch_angles_and_flux.md - API_reference/processing/calculate_geo_coords_from_tle.md - Utilities: @@ -67,6 +66,7 @@ nav: - Release mode: API_reference/utilities/release_mode.md - Scripts: API_reference/utilities/scripts.md - Units: API_reference/utilities/units.md + - API_reference/utilities/convert_string_to_datetime.md - Saving Strategies: - DataOrg Strategy: API_reference/saving_strategies/data_org.md - Monthly H5 Strategy: API_reference/saving_strategies/monthly_h5.md diff --git a/ruff.toml b/ruff.toml index 1fd8f5c..44008d1 100644 --- a/ruff.toml +++ b/ruff.toml @@ -16,8 +16,9 @@ ignore = [ "COM812", # disable trailing whitespace because we use formatter "S101", # allow assert statements "RET504", # allow assignment before return + "PLR2004", # allow constants inline ] -exclude = ["*.ipynb", "IRBEM/*", "setup.py"] +exclude = ["*.ipynb", "setup.py"] [lint.pydocstyle] convention = "google" diff --git a/tests/comparisons/test_mageph_rbsp.py b/tests/comparisons/test_mageph_rbsp.py index 891de76..fa0ae38 100644 --- a/tests/comparisons/test_mageph_rbsp.py +++ b/tests/comparisons/test_mageph_rbsp.py @@ -20,8 +20,6 @@ from el_paso import InstrumentEnum, MfmEnum, RBMDataSet from examples.VanAllenProbes.process_hope_electrons import process_hope_electrons -# ruff: noqa: PLR2004 - sat_str_list = ["a", "b"] mag_field_list = ["TS04", "T89"] @@ -84,29 +82,29 @@ def test_mageph_rbsp(sat_str: Literal["a", "b"], mag_field: Literal["T89", "TS04 ) extraction_infos = [ - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Epoch", name_or_column="IsoTime", unit=u.dimensionless_unscaled, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Lstar", name_or_column="Lstar", unit=u.dimensionless_unscaled, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Alpha_eq", name_or_column="Alpha", unit=u.dimensionless_unscaled, ), - ep.ExtractionInfo( + ep.processing.ExtractionInfo( result_key="Kp", name_or_column="Kp", unit=u.dimensionless_unscaled, ), ] - variables = ep.extract_variables_from_files( + variables = ep.processing.extract_variables_from_files( start_time, end_time, "daily", "tests/comparisons/raw_data", file_name_stem, extraction_infos ) diff --git a/tutorials/1_download_data_and_extracting_variables.ipynb b/tutorials/1_download_data_and_extracting_variables.ipynb index de6f57d..8e2391a 100644 --- a/tutorials/1_download_data_and_extracting_variables.ipynb +++ b/tutorials/1_download_data_and_extracting_variables.ipynb @@ -16,83 +16,12 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "ce5e0be4", "metadata": {}, "outputs": [ { "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "int64", - "type": "integer" - }, - { - "name": "DATETIME", - "rawType": "object", - "type": "string" - }, - { - "name": "alt(km)", - "rawType": "float64", - "type": "float" - }, - { - "name": "lat(deg)", - "rawType": "float64", - "type": "float" - }, - { - "name": "lon(deg)", - "rawType": "float64", - "type": "float" - } - ], - "ref": "7a65f54c-cea7-49f9-9d6f-cd168fb586ec", - "rows": [ - [ - "0", - "2019-07-30 17:04:00", - "11362.75841", - "11.391923", - "154.268925" - ], - [ - "1", - "2019-07-30 17:09:00", - "11516.45781", - "8.645016", - "156.193306" - ], - [ - "2", - "2019-07-30 17:14:00", - "11650.92313", - "5.916772", - "158.021787" - ], - [ - "3", - "2019-07-30 17:19:00", - "11765.75939", - "3.20951", - "159.778169" - ], - [ - "4", - "2019-07-30 17:24:00", - "11860.64721", - "0.524741", - "161.484253" - ] - ], - "shape": { - "columns": 4, - "rows": 5 - } - }, "text/html": [ "
\n", "