Source code for pyspedas.tplot_tools.importers.netcdf_to_tplot

import os
import copy
import calendar
import logging
import numpy as np
import xarray as xr
import pyspedas
from pyspedas.tplot_tools import tplot, store_data
from netCDF4 import Dataset, num2date


def change_time_to_unix_time(time_var):
    """
    Convert the variable to seconds since epoch.
    """
    # Capitalization of variable attributes may vary...
    if hasattr(time_var, "units"):
        units = time_var.units
    elif hasattr(time_var, "Units"):
        units = time_var.Units
    elif hasattr(time_var, "UNITS"):
        units = time_var.UNITS
    # ICON uses nonstandard units strings
    if units == "ms":
        units = "milliseconds since 1970-01-01 00:00:00"
    # Check if the long_name attribute has stored the epoch description (POES/METOP):
    elif hasattr(time_var, "long_name"):
        if time_var.long_name == "milliseconds since 1970":
            units = "milliseconds since 1970-01-01 00:00:00"

    time_data=time_var[:]
    if hasattr(time_data,"data"):
        time_data=time_var[:].data
    dates = num2date(time_data, units=units)
    unix_times = list()
    for date in dates:
        unix_time = calendar.timegm(date.timetuple()) + date.microsecond/1e6
        unix_times.append(unix_time)
    return unix_times


[docs] def netcdf_to_tplot( filenames, time="", prefix="", suffix="", plot=False, merge=False, strict_time=True ): """ Create tplot variables from netCDF files. Parameters ---------- filenames : str or list of str The file names and full paths of netCDF files. time : str, optional This is not used anymore. Remains here for backward compatibility. Currently, the name of the time variable is found in the netCDF variables themselves. prefix : str, optional The tplot variable names will be given this prefix. By default, no prefix is added. suffix : str, optional The tplot variable names will be given this suffix. By default, no suffix is added. plot : bool, optional If True, the data is plotted immediately after being generated. All tplot variables generated from this function will be on the same plot. By default, a plot is not created. merge : bool, optional If True, then data from 'filenames' will be merged into existing tplot variables. If False (default), then data from 'filenames' will overwrite existing tplot variables. Data in 'filenames' will always be merged/combined by themselves. strict_time : bool, optional If True (default), variables will be loaded into tplot variables only if their data length matches the time length. If False, all variables will be loaded. This is useful because some variables may contain general information, like satellite longitude. Returns ------- stored_variables : list of str List of tplot variables created. Examples -------- Create tplot variables from a GOES netCDF file: >>> import pyspedas >>> file = "/Users/user_name/goes_files/g15_epead_a16ew_1m_20171201_20171231.nc" >>> pyspedas.netcdf_to_tplot(file, prefix='mvn_') Add a prefix, and plot immediately: >>> import pyspedas >>> file = "/Users/user_name/goes_files/g15_epead_a16ew_1m_20171201_20171231.nc" >>> pyspedas.netcdf_to_tplot(file, prefix='goes_prefix_', plot=True) """ stored_variables = [] if prefix is None: prefix = '' if suffix is None: suffix = '' if isinstance(filenames, str): filenames = [filenames] elif isinstance(filenames, list): pass else: logging.error("Invalid filenames input. Must be string or list of strings.") return stored_variables filenames = sorted(list(set(filenames))) for filename in filenames: # Read file if os.path.isfile(filename): vfile = Dataset(filename) else: logging.error("Cannot find file: " + filename) continue # Create a dictionary that contains variables and their attributes. vars_and_atts = {} for name, variable in vfile.variables.items(): vars_and_atts[name] = {} for attrname in variable.ncattrs(): vars_and_atts[name][attrname] = getattr(variable, attrname) # Fill in missing values for each variable with np.nan (if values are not already nan) # and save the masked variables to a new dictionary. masked_vars = {} # Dictionary containing properly masked variables for var in vars_and_atts.keys(): reg_var = vfile.variables[var] # Check for some attributes that might be used to flag fill values atts_dict = vars_and_atts[var] fillval_atts_lc = ["fillval", "_fillval", "_fillvalue", "fillvalue", "missing_data"] var_fill_value = None for key in atts_dict.keys(): if key.lower() in fillval_atts_lc: # If multiple matching keys are found, the one that appears latest in the above list # will take precedence var_fill_value = atts_dict[key] if hasattr(reg_var[:],"get_fill_value"): var_fill_value=reg_var[:].get_fill_value() # If var_fill_value is None, or already NaN, there's nothing to do here. # Integer arrays can't be NaN-filled, so if var_fill_value is any kind of integer, skip those too. # Some missions have strings defined as fill values. (ICON) if var_fill_value is not None and not isinstance(var_fill_value, np.integer) and not isinstance(var_fill_value, str) and not np.isnan(var_fill_value): # We want to force missing values to be nan so that plots don't look strange if hasattr(reg_var[:],"data"): var_mask = np.ma.masked_where( reg_var[:].data == np.float32(var_fill_value), reg_var[:].data ) var_filled = np.ma.filled(var_mask, np.nan) masked_vars[var] = var_filled else: var_mask = np.ma.masked_where( reg_var == np.float32(var_fill_value), reg_var ) var_filled = np.ma.filled(var_mask, np.nan) masked_vars[var] = var_filled else: var_filled = reg_var masked_vars[var] = var_filled # A dictionary with the time variables in this file. times_dict = {} # Store each netcdf variable as a tplot variable. for i, var in enumerate(vfile.variables): # Make sure that the variables are time-based, otherwise don't store them as tplot variables. if len(vfile[var].dimensions) > 0 and len(vfile[var].dimensions[0]) > 0: # Find the time dependence of the current variable. this_time = vfile[var].dimensions[0] if this_time not in vars_and_atts.keys(): # For GOES satelites, sometimes we get 'record' as time dependance. # In that case, we can try 'time' and 'time_tag' as alternatives. if "time" in vars_and_atts.keys(): this_time = "time" elif "time_tag" in vars_and_atts.keys(): this_time = "time_tag" if this_time not in vars_and_atts.keys(): # If this_time does not exist, we can't save this as tplot variable. continue elif this_time == var: # The time the variable depends on may not have been set. # Check if time is a variable: if 'time' in vars_and_atts.keys(): # If it is, check if the sizes match: if vfile[var].size == vfile['time'].size: # If they do, we can infer that the time variable is the one we want here. this_time = "time" else: # If not, it probably depends on something else / nothing. continue else: # If this_time has the same name as the current variable, do not save it. continue # Find the time values (as unix times). if this_time in times_dict: unix_times = times_dict[this_time] else: try: time_var = vfile[this_time] unix_times = change_time_to_unix_time(time_var) times_dict[this_time] = unix_times except Exception as e: # In this case, we could not handle the time, print an error logging.error( "Could not process time variable '" + this_time + "' for the netcdf variable: '" + var + "'" ) logging.error("Exception details: " + str(e)) continue if var not in masked_vars: # We don't have any values for this variable, skip it. continue this_masked_var = masked_vars[var] if len(this_masked_var.shape) < 1: # Values are empty, skip it. continue if len(unix_times) != this_masked_var.shape[0] and strict_time: # If strict_time is true, reject all variables that do not have # same length for data and time. These can be inclination and other information # saved as netcdf variables. # If strict_time is false, pyspedas.store_data will complain about this # "lengths of x and y do not match", but it will create the tplot variable. # But if we try to plot these variables we may get an error. continue # Store the data, and merge variables if that was requested. var_name = prefix + var + suffix to_merge = False # Merge if the variable has been saved already in the current group of files. # Also merge when the variable is already in tplot and merge is True. if (var_name in stored_variables) or ( var_name in pyspedas.tplot_tools.data_quants.keys() and (merge == True) ): prev_data_quant = pyspedas.tplot_tools.data_quants[var_name] to_merge = True tplot_data = {"x": unix_times, "y": this_masked_var} store_data(var_name, tplot_data) if var_name not in stored_variables: stored_variables.append(var_name) if to_merge == True: cur_data_quant = pyspedas.tplot_tools.data_quants[var_name] plot_options = copy.deepcopy(pyspedas.tplot_tools.data_quants[var_name].attrs) merged_data = [prev_data_quant, cur_data_quant] pyspedas.tplot_tools.data_quants[var_name] = xr.concat( merged_data, dim="time" ).sortby("time") pyspedas.tplot_tools.data_quants[var_name].attrs = plot_options # If we are interested in seeing a quick plot of the variables, do it if plot: tplot(stored_variables) return stored_variables