Source code for pyspedas.mth5.load_fdsn

import os

import mth5
import numpy as np
import pandas as pd
import pyspedas

import pyspedas
from .config import CONFIG
from pyspedas.tplot_tools import store_data, options

# MTH5 installation is checked in __init__
from mth5.clients.fdsn import FDSN
from mth5.mth5 import MTH5

from pyspedas.mth5.utilities import mth5_time_str
from pyspedas.mth5.utilities import _list_of_fdsn_channels

from datetime import datetime


def _disable_loguru_warnings(record):
    if record["extra"].get("no_warning"):
        return record["level"].no != mth5.logger.level("WARNING").no
    return True

def _request_df_from_input(trange, network, station):
    """
    Create a request_df from input parameters.

    Parameters
    ----------
        trange : list of str
            Time range of interest.
        network : str
            Network name.
        station : str
            Station name.

    Returns
    -------
    request_df : pandas.DataFrame
        Request dataframe.
    """

    # Get MTH5 file from the server
    # we use "*F*" channel instead of "*" to extract magnetic data
    # TODO: modify according to SEED manual appendix A: https://www.fdsn.org/pdf/SEEDManual_V2.4_Appendix-A.pdf
    channel_list = _list_of_fdsn_channels()
    # channel_list = "*F*"

    # Create request_df from input parameters
    request_df = pd.DataFrame(
        {
            "network": [network],
            "station": [station],
            "location": ["--"],
            "channel": [channel_list],
            "start": [mth5_time_str(trange[0])],  # ["2019-11-14T00:00:00"],
            "end": [mth5_time_str(trange[1])]  # ["2019-11-15T00:00:00"]
        }
    )

    return request_df


def _validate_date_format(date_string):
    """
    Validate if a date string is in the correct format.

    Parameters
    ----------
    date_string : str
        The date string to validate. Expected format: YYYY-MM-DDThh:mm:ss.

    Raises
    ------
    ValueError
        If the date string is not in the expected format.
    """
    try:
        datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S")
    except ValueError:
        raise ValueError(f"Invalid date format: {date_string}. Expected format: YYYY-MM-DDThh:mm:ss")



[docs]
def load_fdsn(trange=None, network=None, station=None,
              nodownload=False, noexception=False, print_request=False,
              nowarnings=False, request_df=None):
    """
    Load FDSN data using MTH5 interface.

    Parameters
    ----------
    trange : list of str
        Time range of interest.
    network : str
        Network name.
    station : str
        Station name.
    nodownload : bool, default=False
        If h5 file is already created do not load another one.
    noexception : bool, default=False
        If true, do not raise an exception produced by FDSN.
    print_request : bool, default=False
        Print request_df structure, which can be useful for debugging the request.
    nowarnings : bool, default=False
        If true, disable loguru warnings.
    request_df : pandas.DataFrame, optional
        Custom request_df dataframe for the `make_mth5_from_fdsn_client` method of the `FDSN` class.
        This parameter is optional, and it is not advised to use it unless the user knows exactly what to do.
        request_df must contain the following:

        - One `station`, one `network`, and one `location` as a list.
        - A list of 3 strings for `channels`.
        - The `start` and `end` date fields must contain a list of one string.

        See `pyspedas.mth5.load_fdsn._request_df_from_input` implementation for reference.

    Returns
    -------
    tplot_variable : str or None
        Tplot variable name. Tplot variable is created if data is loaded successfully, None otherwise.
    """

    # mth5.logger.remove()
    mth5.logger._core.extra["no_warning"] = nowarnings

    # If trange is not specified we don't know what to load
    if trange is None:
        pyspedas.logging.error("trange not specified")
        return

    if not network:
        pyspedas.logging.error("Network not specified")
        return

    if not station:
        pyspedas.logging.error("Station not specified")
        return

    # Handle request_df
    if request_df is None:
        request_df = _request_df_from_input(trange, network, station)
    else:
        network = request_df.network
        station = request_df.station
        trange = [request_df.start[0], request_df.end[0]]

    if print_request:
        print(request_df.to_string())

    # Create time variables that correspond to the request time period
    request_start = datetime.fromisoformat(request_df.start[0])
    request_end = datetime.fromisoformat(request_df.end[0])

    # Validate time period
    _validate_date_format(request_df.start[0])
    _validate_date_format(request_df.end[0])

    if request_end <= request_start:
        pyspedas.logging.error(f'End time {request_end} earlier or equal to start time {request_start}')
        return

    # Determine where data will be stored
    mth5dir = CONFIG['local_data_dir']
    # if os.environ.get('SPEDAS_DATA_DIR'):
    #     mth5dir = os.environ['SPEDAS_DATA_DIR']

    mth5_filename = f"{network}_{station}_{trange[0]}_{trange[1]}.h5".replace(":", "").replace("-", "")
    mth5_pathfile = os.path.join(mth5dir, mth5_filename)

    # Implementation of caching
    if nodownload and os.path.isfile(mth5_pathfile):
        mth5_path = mth5_pathfile
    else:
        # Initialize FDSN object
        fdsn_object = FDSN(mth5_version='0.2.0', client="EARTHSCOPE")
        try:
            mth5_path = fdsn_object.make_mth5_from_fdsn_client(request_df, interact=False, path=mth5dir)
            if os.path.isfile(mth5_pathfile):
                pyspedas.logger.info(f"Deleting cached {mth5_pathfile}")
                os.remove(mth5_pathfile)
            pyspedas.logger.info(f"Creating cached {mth5_pathfile}")
            os.rename(mth5_path, mth5_pathfile)
            mth5_path = mth5_pathfile
        except Exception as e:
            # Hande mth5 object initialization error.
            pyspedas.logger.error(f"Cannot initialize mth5 object:\n{e}")

            # # Check if file was the cache file was created
            # if 'mth5_path' in locals() and os.path.isfile(mth5_path):
            #     pyspedas.logger.info(f"Cache was created and will be deleted: {mth5_path}")
            #     os.remove(mth5_path)

            # Exit code by flag
            if noexception:
                return
            raise
        finally:
            mth5_tmp = fdsn_object.make_filename(request_df)
            mth5_file = os.path.join(mth5dir, mth5_tmp)
            if os.path.isfile(mth5_file):
                pyspedas.logger.info(f"Deleting mth5 temporary h5 file {mth5_tmp}")
                os.remove(mth5_file)

    # Using MTH5 as a context manager
    with MTH5() as mth5_object:
        mth5_object.open_mth5(mth5_path)
        surveys = mth5_object.surveys_group.groups_list

        # mth5_object = MTH5()
        # mth5_object.open_mth5(mth5_path)
        # surveys = mth5_object.surveys_group.groups_list
        # # print(surveys)

        # Create new data for time, x, y and z
        time = np.array([])
        attributes = [('hx', 'bx'), ('hy', 'by'), ('hz', 'bz')]
        variables = {'x': np.array([]), 'y': np.array([]), 'z': np.array([])}

        attr_dict = {'filename': mth5_path}  # metadata with filename
        units = set()  # basic units
        measurements_type = set()  # basic type of measurements

        # loop over surveys and runs
        for survey in surveys:
            station_data = mth5_object.get_station(station_name=station, survey=survey)
            attr_dict["metadata"] = station_data.metadata

            # TODO: Check orientation reference frame
            # "orientation.reference_frame": "geographic"

            # Loop over runs
            for run_name in station_data.metadata.run_list:
                # Get the data
                run_data = station_data.get_run(run_name=run_name)

                # Check run time coverage
                # The times in the metadata may already be strings, or objects that are convertible to strings,
                # depending on which versions of mth5, obspy, and mt-metadata are being used.
                # We'll just call str() on whatever we get back and hope for the best...
                run_start = datetime.fromisoformat(str(run_data.metadata.time_period.start)).replace(tzinfo=None)
                run_end = datetime.fromisoformat(str(run_data.metadata.time_period.end)).replace(tzinfo=None)

                # Skip processing if run is outside requested time range
                if run_start > request_end or run_end < request_start or request_end <= request_start:
                    continue

                # Get the run table summary
                # Time clips according to original times
                run_ts = run_data.to_runts(start=request_df.start[0], end=request_df.end[0])

                # Determine is we have all components
                # can channels_recorded_magnetic be missing from the dataset?
                # TODO: What to do with the runs without 3 components - operate only with 3 components for now...
                if 'channels_recorded_magnetic' in run_ts.dataset.attrs:
                    if len(run_ts.dataset.channels_recorded_magnetic) != 3:
                        pyspedas.logger.info("Run has less than 3 components")
                        continue
                else:
                    pyspedas.logger.warning("Run dataset has no channels_recorded_magnetic attribute")
                    continue

                # Check if the filters are available to calibrate data
                if len(run_ts.filters) == 0:
                    pyspedas.logger.warning("Filters were not added to run_ts")
                    continue

                # Convert counts to physical units
                run_ts = run_ts.calibrate()

                # Copy numpy arrays to ensure that data is in memory after mth5 is closed
                if time.size == 0:
                    time = run_ts.dataset.time.to_numpy()
                else:
                    time = np.append(time, run_ts.dataset.time.to_numpy())

                # Check if all required attributes are present
                all_attributes_present = all(
                    hasattr(run_ts.dataset, attr_h) or hasattr(run_ts.dataset, attr_b)
                    for attr_h, attr_b in attributes
                )
                if all_attributes_present:
                    for (attr_h, attr_b), var_name in zip(attributes, variables.keys()):
                        if hasattr(run_ts.dataset, attr_h):
                            data = getattr(run_ts.dataset, attr_h)
                        elif hasattr(run_ts.dataset, attr_b):
                            data = getattr(run_ts.dataset, attr_b)
                        else:
                            pyspedas.logger.warning(f"Neither {attr_h} nor {attr_b} found in run dataset")
                            continue

                        variables[var_name] = np.append(variables[var_name], data.to_numpy())

                        try:
                            units.add(data.units)
                            measurements_type.add(data.type)
                        except AttributeError:
                            pyspedas.logger.warning(f"Problem with adding {attr_h}/{attr_b} dataset units or type")

                    # After the loop, you can reassign the modified arrays back to x, y, z
                    # x, y, z = variables['x'], variables['y'], variables['z']
                else:
                    pyspedas.logger.warning("Some of the attributes are not present in run_ts dataset")
                    continue

        # TODO: Should data be sorted before saving
        data = {'x': time, 'y': np.vstack((variables['x'], variables['y'], variables['z'])).T}
        tplot_variable = 'fdsn_' + network + '_' + station

        # Handle legends
        legend_names = ['x', 'y', 'z']
        try:
            legend_names = station_data.metadata.channels_recorded
        except AttributeError or UnboundLocalError:
            pyspedas.logger.error("station_data was not defined correctly")
            # Also raise this exception if noexception flag is set
            if not noexception:
                raise

        tplot_variable_created = False
        try:
            store_data(tplot_variable, data=data, attr_dict=attr_dict)
            tplot_variable_created = True
        except IndexError:
            pyspedas.logger.error(f"Cannot create empty tplot variabe {tplot_variable}")
        except Exception as E:
            pyspedas.logger.error(f"Unexpected error {E} while creating tplot variable {tplot_variable}")
            if not noexception:
                raise

        if tplot_variable_created:
            tplot_options = {
                "name": f"FDSN: {network}, station: {station}",  # network and stations
                "ytitle": f"{','.join(list(measurements_type))}",  # measurement
                "ysubtitle": f"{','.join(list(units))}",  # units
                "legend_names": legend_names
            }

            options(tplot_variable, opt_dict=tplot_options)

            # TODO: For potential future implementation
            #     filename : bool, default=False
            #         If true, return filename as a second return parameter

            return tplot_variable