Source code for pyspedas.tplot_tools.importers.netcdf_to_tplot

import os
import copy
import calendar
import logging
import numpy as np
import xarray as xr
import pyspedas
from pyspedas.tplot_tools import tplot, store_data
from netCDF4 import Dataset, num2date


def change_time_to_unix_time(time_var):
    """
    Convert the variable to seconds since epoch.
    """
    # Capitalization of variable attributes may vary...
    if hasattr(time_var, "units"):
        units = time_var.units
    elif hasattr(time_var, "Units"):
        units = time_var.Units
    elif hasattr(time_var, "UNITS"):
        units = time_var.UNITS
    # ICON uses nonstandard units strings
    if units == "ms":
        units = "milliseconds since 1970-01-01 00:00:00"
    # Check if the long_name attribute has stored the epoch description (POES/METOP):
    elif hasattr(time_var, "long_name"):
        if time_var.long_name == "milliseconds since 1970":
            units = "milliseconds since 1970-01-01 00:00:00"

    time_data=time_var[:]
    if hasattr(time_data,"data"):
        time_data=time_var[:].data
    dates = num2date(time_data, units=units)
    unix_times = list()
    for date in dates:
        unix_time = calendar.timegm(date.timetuple()) + date.microsecond/1e6
        unix_times.append(unix_time)
    return unix_times



[docs]
def netcdf_to_tplot(
    filenames, time="", prefix="", suffix="", plot=False, merge=False, strict_time=True
):
    """
    Create tplot variables from netCDF files.

    Parameters
    ----------
    filenames : str or list of str
        The file names and full paths of netCDF files.
    time : str, optional
        This is not used anymore. Remains here for backward compatibility.
        Currently, the name of the time variable is found in the netCDF variables themselves.
    prefix : str, optional
        The tplot variable names will be given this prefix.
        By default, no prefix is added.
    suffix : str, optional
        The tplot variable names will be given this suffix.
        By default, no suffix is added.
    plot : bool, optional
        If True, the data is plotted immediately after being generated. All tplot
        variables generated from this function will be on the same plot.
        By default, a plot is not created.
    merge : bool, optional
        If True, then data from 'filenames' will be merged into existing tplot variables.
        If False (default), then data from 'filenames' will overwrite existing tplot variables.
        Data in 'filenames' will always be merged/combined by themselves.
    strict_time : bool, optional
        If True (default), variables will be loaded into tplot variables only if
        their data length matches the time length.
        If False, all variables will be loaded. This is useful because some
        variables may contain general information, like satellite longitude.

    Returns
    -------
    stored_variables : list of str
        List of tplot variables created.

    Examples
    --------
    Create tplot variables from a GOES netCDF file:

    >>> import pyspedas
    >>> file = "/Users/user_name/goes_files/g15_epead_a16ew_1m_20171201_20171231.nc"
    >>> pyspedas.netcdf_to_tplot(file, prefix='mvn_')

    Add a prefix, and plot immediately:

    >>> import pyspedas
    >>> file = "/Users/user_name/goes_files/g15_epead_a16ew_1m_20171201_20171231.nc"
    >>> pyspedas.netcdf_to_tplot(file, prefix='goes_prefix_', plot=True)
    """

    stored_variables = []

    if prefix is None:
        prefix = ''
    if suffix is None:
        suffix = ''

    if isinstance(filenames, str):
        filenames = [filenames]
    elif isinstance(filenames, list):
        pass
    else:
        logging.error("Invalid filenames input. Must be string or list of strings.")
        return stored_variables

    filenames = sorted(list(set(filenames)))
    for filename in filenames:

        # Read file
        if os.path.isfile(filename):
            vfile = Dataset(filename)
        else:
            logging.error("Cannot find file: " + filename)
            continue

        # Create a dictionary that contains variables and their attributes.
        vars_and_atts = {}
        for name, variable in vfile.variables.items():
            vars_and_atts[name] = {}
            for attrname in variable.ncattrs():
                vars_and_atts[name][attrname] = getattr(variable, attrname)

        # Fill in missing values for each variable with np.nan (if values are not already nan)
        # and save the masked variables to a new dictionary.
        masked_vars = {}  # Dictionary containing properly masked variables
        for var in vars_and_atts.keys():
            reg_var = vfile.variables[var]
            # Check for some attributes that might be used to flag fill values
            atts_dict = vars_and_atts[var]
            fillval_atts_lc = ["fillval", "_fillval", "_fillvalue", "fillvalue", "missing_data"]
            var_fill_value = None
            for key in atts_dict.keys():
                if key.lower() in fillval_atts_lc:
                    # If multiple matching keys are found, the one that appears latest in the above list
                    # will take precedence
                    var_fill_value = atts_dict[key]
            
            if hasattr(reg_var[:],"get_fill_value"):
                var_fill_value=reg_var[:].get_fill_value()

            # If var_fill_value is None, or already NaN, there's nothing to do here.
            # Integer arrays can't be NaN-filled, so if var_fill_value is any kind of integer, skip those too.
            # Some missions have strings defined as fill values.  (ICON)
            if var_fill_value is not None and not isinstance(var_fill_value, np.integer) and not isinstance(var_fill_value, str) and not np.isnan(var_fill_value):
                # We want to force missing values to be nan so that plots don't look strange
                if hasattr(reg_var[:],"data"):
                        var_mask = np.ma.masked_where(
                            reg_var[:].data == np.float32(var_fill_value), reg_var[:].data
                        )
                        var_filled = np.ma.filled(var_mask, np.nan)
                        masked_vars[var] = var_filled
                else:    
                    var_mask = np.ma.masked_where(
                        reg_var == np.float32(var_fill_value), reg_var
                    )
                    var_filled = np.ma.filled(var_mask, np.nan)
                    masked_vars[var] = var_filled
            else:
                var_filled = reg_var
                masked_vars[var] = var_filled

        # A dictionary with the time variables in this file.
        times_dict = {}

        # Store each netcdf variable as a tplot variable.
        for i, var in enumerate(vfile.variables):

            # Make sure that the variables are time-based, otherwise don't store them as tplot variables.
            if len(vfile[var].dimensions) > 0 and len(vfile[var].dimensions[0]) > 0:

                # Find the time dependence of the current variable.
                this_time = vfile[var].dimensions[0]
                if this_time not in vars_and_atts.keys():
                    # For GOES satelites, sometimes we get 'record' as time dependance.
                    # In that case, we can try 'time' and 'time_tag' as alternatives.
                    if "time" in vars_and_atts.keys():
                        this_time = "time"
                    elif "time_tag" in vars_and_atts.keys():
                        this_time = "time_tag"

                if this_time not in vars_and_atts.keys():
                    # If this_time does not exist, we can't save this as tplot variable.
                    continue
                elif this_time == var:
                    # The time the variable depends on may not have been set.
                    # Check if time is a variable:
                    if 'time' in vars_and_atts.keys():
                        # If it is, check if the sizes match:
                        if vfile[var].size == vfile['time'].size:
                            # If they do, we can infer that the time variable is the one we want here.
                            this_time = "time"
                        else:
                            # If not, it probably depends on something else / nothing.    
                            continue
                    else:        
                        # If this_time has the same name as the current variable, do not save it.
                        continue

                # Find the time values (as unix times).
                if this_time in times_dict:
                    unix_times = times_dict[this_time]
                else:
                    try:
                        time_var = vfile[this_time]
                        unix_times = change_time_to_unix_time(time_var)
                        times_dict[this_time] = unix_times
                    except Exception as e:
                        # In this case, we could not handle the time, print an error
                        logging.error(
                            "Could not process time variable '"
                            + this_time
                            + "' for the netcdf variable: '"
                            + var
                            + "'"
                        )
                        logging.error("Exception details: " + str(e))
                        continue

                if var not in masked_vars:
                    # We don't have any values for this variable, skip it.
                    continue
                this_masked_var = masked_vars[var]
                if len(this_masked_var.shape) < 1:
                    # Values are empty, skip it.
                    continue
                if len(unix_times) != this_masked_var.shape[0] and strict_time:
                    # If strict_time is true, reject all variables that do not have
                    # same length for data and time. These can be inclination and other information
                    # saved as netcdf variables.
                    # If strict_time is false, pyspedas.store_data will complain about this
                    # "lengths of x and y do not match", but it will create the tplot variable.
                    # But if we try to plot these variables we may get an error.
                    continue

                # Store the data, and merge variables if that was requested.
                var_name = prefix + var + suffix
                to_merge = False
                # Merge if the variable has been saved already in the current group of files.
                # Also merge when the variable is already in tplot and merge is True.
                if (var_name in stored_variables) or (
                    var_name in pyspedas.tplot_tools.data_quants.keys() and (merge == True)
                ):
                    prev_data_quant = pyspedas.tplot_tools.data_quants[var_name]
                    to_merge = True

                tplot_data = {"x": unix_times, "y": this_masked_var}
                store_data(var_name, tplot_data)
                if var_name not in stored_variables:
                    stored_variables.append(var_name)

                if to_merge == True:
                    cur_data_quant = pyspedas.tplot_tools.data_quants[var_name]
                    plot_options = copy.deepcopy(pyspedas.tplot_tools.data_quants[var_name].attrs)
                    merged_data = [prev_data_quant, cur_data_quant]
                    pyspedas.tplot_tools.data_quants[var_name] = xr.concat(
                        merged_data, dim="time"
                    ).sortby("time")
                    pyspedas.tplot_tools.data_quants[var_name].attrs = plot_options

    # If we are interested in seeing a quick plot of the variables, do it
    if plot:
        tplot(stored_variables)

    return stored_variables