Source code for pyspedas.tplot_tools.importers.cdf_to_tplot

# Copyright 2020 Regents of the University of Colorado. All Rights Reserved.
# Released under the MIT license.
# This software was developed at the University of Colorado's Laboratory for
# Atmospheric and Space Physics.
# Verify current version before use at: https://github.com/MAVENSDC/PyTplot

import cdflib
import logging
import re
import numpy as np
import xarray as xr
import datetime
from datetime import timedelta
from pyspedas.tplot_tools import store_data
from pyspedas.tplot_tools import tplot
from pyspedas.tplot_tools import options
import pyspedas
import copy
from collections.abc import Iterable


[docs] def cdf_to_tplot(filenames, mastercdf=None, varformat=None, exclude_format=None, get_support_data=False, get_metadata=False, get_ignore_data=False, string_encoding='ascii', prefix='', suffix='', plot=False, merge=False, center_measurement=False, notplot=False, varnames=None): """ This function will automatically create tplot variables from CDF files. In general, the files should be ISTP compliant for this importer to work. Each variable is read into a new tplot variable (a.k.a an xarray DataArray), and all associated file/variable metadata is read into the attrs dictionary. .. note:: Variables must have an attribute named "VAR_TYPE". If the attribute entry is "data" (or "support_data"), then they will be added as tplot variables. Additionally, data variables should have attributes named "DEPEND_TIME" or "DEPEND_0" that describes which variable is x axis. If the data is 2D, then an attribute "DEPEND_1" must describe which variable contains the secondary axis. Parameters: filenames : str/list of str The file names and full paths of CDF files. mastercdf : str The file name of a master CDF to be used, if any varformat : str or list[str] The file variable formats to load into tplot. Wildcard character "*" is accepted. By default, all variables are loaded in. exclude_format : str or list[str] The file variable formats to exclude from loading into tplot. Wildcard character "*" is accepted. By default, no variables are excluded. get_support_data: bool Data with an attribute "VAR_TYPE" with a value of "support_data" will be loaded into tplot. By default, only loads in data with a "VAR_TYPE" attribute of "data". prefix: str The tplot variable names will be given this prefix. By default, no prefix is added. suffix: str The tplot variable names will be given this suffix. By default, no suffix is added. plot: bool The data is plotted immediately after being generated. All tplot variables generated from this function will be on the same plot. merge: bool If True, then data from different cdf files will be merged into a single tplot variable. get_ignore_data: bool Data with an attribute "VAR_TYPE" with a value of "ignore_data" will be loaded into tplot. By default, only loads in data with a "VAR_TYPE" attribute of "data". center_measurement: bool If True, the CDF epoch variables are time-shifted to the middle of the accumulation interval by their DELTA_PLUS_VAR and DELTA_MINUS_VAR variable attributes notplot: bool If True, then data are returned in a hash table instead of being stored in tplot variables (useful for debugging, and access to multi-dimensional data products) varnames: str or list of str Load these variables only. If None or [] or ['*'], then load everything. Returns: List of tplot variables created (unless notplot keyword is used). """ stored_variables = [] epoch_cache = {} output_table = {} metadata = {} new_cdflib = False if cdflib.__version__ > "0.4.9": new_cdflib = True logging.debug("Using new version of cdflib (%s)", cdflib.__version__) else: new_cdflib = False logging.debug("Using old version of cdflib (%s)", cdflib.__version__) if prefix is None: prefix = '' if suffix is None: suffix = '' # When doing varname, varformat, or exclude_format checks, do we need to take prefixes or suffixes into account? check_pre_suff = False if prefix != '' or suffix != '': check_pre_suff = True # If nontrivial varnames or varformat are explicitly supplied, load the requested data whether or not # it's marked as support_data in the CDF. if varformat is not None: get_support_data = True if varnames is None: varnames = [] if not isinstance(varnames, list): varnames = [varnames] if len(varnames) > 0: get_support_data = True if '*' in varnames: varnames = [] # pyspedas.tplot_tools.data_quants = {} if isinstance(filenames, str): filenames = [filenames] elif isinstance(filenames, list): pass else: logging.warning("Invalid filenames input. Must be string or list of strings.") return stored_variables var_type = ['data'] if varformat is None: varformat = ".*" if get_support_data: var_type.append('support_data') if get_metadata: var_type.append('metadata') if get_ignore_data: var_type.append('ignore_data') # Replace lists with regex alternation if isinstance(varformat, list): varformat = '|'.join(varformat) # Replace spaces with regex alternation if ' ' in varformat: explode=varformat.split() varformat='|'.join(explode) varformat = varformat.replace("*", ".*") var_regex = re.compile(varformat) if exclude_format is not None: # Replace spaces and lists with regex alternation as we did for varformat if isinstance(exclude_format, list): exclude_format = '|'.join(exclude_format) # Replace spaces with regex alternation if ' ' in exclude_format: explode=exclude_format.split() exclude_format='|'.join(explode) exclude_format = exclude_format.replace("*",".*") exclude_regex = re.compile(exclude_format) else: exclude_regex = None # This step may not be appropriate if the lexicographic sort does not correspond to a time sort. (For example, # if filenames contain orbit numbers rather than dates, and no leading zeroes are used.) JWL 2023-03-17 filenames.sort() # Get metadata from master CDF, if provided # In IDL, cdf2tplot uses the first file provided as a de-facto master CDF. # in pyspedas, cdf_to_tplot can do things like loading data from all 4 MMS probes in a single call. # So, we can't always use the first CDF in the list, because it may not apply to other files in the list. # Therefore, we supply a master CDF, if needed, in a separate argument. JWL 2023-03-17 if not mastercdf is None: mastercdf_flag = True logging.debug('Processing master CDF %s', mastercdf) master_cdf_file = cdflib.CDF(mastercdf) master_cdf_file.string_encoding = string_encoding master_cdf_info = master_cdf_file.cdf_info() if new_cdflib: master_cdf_variables = master_cdf_info.rVariables + master_cdf_info.zVariables else: master_cdf_variables = master_cdf_info['rVariables'] + master_cdf_info['zVariables'] logging.debug("master_cdf_variables: " + str(master_cdf_variables)) else: mastercdf_flag = False logging.debug("Input filenames: " + str(filenames)) for filename in filenames: logging.debug('Processing filename %s', filename) cdf_file = cdflib.CDF(filename) cdf_file.string_encoding = string_encoding cdf_info = cdf_file.cdf_info() if new_cdflib: all_cdf_variables = cdf_info.rVariables + cdf_info.zVariables else: all_cdf_variables = cdf_info['rVariables'] + cdf_info['zVariables'] logging.debug("all_cdf_variables: " + str(all_cdf_variables)) if not mastercdf_flag: # If not using a master CDF, each CDF is its own master master_cdf_file = cdf_file mastercdf = filename master_cdf_variables = all_cdf_variables # User defined variables. if len(varnames) > 0: load_cdf_variables = [value for value in varnames if value in all_cdf_variables] if check_pre_suff: pre_suff = [value for value in all_cdf_variables if prefix+value+suffix in varnames] load_cdf_variables.extend(pre_suff) else: load_cdf_variables = all_cdf_variables try: gatt = master_cdf_file.globalattsget() except: logging.warning('Unable to get global attributes for filename %s', mastercdf) gatt = {} for var in load_cdf_variables: if not re.match(var_regex, var) and (not check_pre_suff or not re.match(var_regex, prefix+var+suffix)): logging.debug("Variable %s does not match varformat, skipping", var) continue elif exclude_regex is not None and (re.match(exclude_regex, var) or (check_pre_suff and re.match(exclude_regex, prefix+var+suffix))): logging.debug("Variable %s matches exclude_format, skipping", var) continue logging.debug('Processing variable attributes for %s', var) try: var_atts = master_cdf_file.varattsget(var) except ValueError: logging.warning("Unable to get variable attributes for %s in file %s, skipping", var, mastercdf) continue if 'VIRTUAL' in var_atts: this_virtual = var_atts['VIRTUAL'].lower() if this_virtual=="true": logging.debug("Skipping virtual variable %s",var) continue elif 'FUNCT' in var_atts and 'COMPONENT_0' in var_atts: logging.info("Variable %s not marked as VIRTUAL, but has FUNCT and COMPONENT_0 attributes; skipping", var) continue if 'VAR_TYPE' in var_atts: this_var_type = var_atts['VAR_TYPE'].lower() elif 'PARAMETER_TYPE' in var_atts: this_var_type = var_atts['PARAMETER_TYPE'].lower() else: # 'VAR_TYPE' and 'PARAMETER_TYPE' not found in the variable attributes logging.info('No VAR_TYPE or PARAMETER_TYPE attributes defined for variable %s, skipping', var) continue if this_var_type in var_type: var_properties = master_cdf_file.varinq(var) var_properties_data_cdf = cdf_file.varinq(var) # Find data name and if it is already in stored variables if 'TPLOT_NAME' in var_atts: var_name = prefix + var_atts['TPLOT_NAME'] + suffix else: var_name = prefix + var + suffix # Is this variable marked as non-record-varying? This may # differ between the data and master CDFs. if new_cdflib: rec_vary_data \ = var_properties_data_cdf.Rec_Vary rec_vary_master \ = var_properties.Rec_Vary else: rec_vary_data \ = var_properties_data_cdf["Rec_Vary"] rec_vary_master \ = var_properties["Rec_Vary"] if (rec_vary_master != rec_vary_data): logging.warning("Master and data CDFs have different values for Rec_Vary property on variable %s, using %s from master CDF.", var, rec_vary_master) rec_vary = rec_vary_master nrv_has_times = False if "DEPEND_TIME" in var_atts: x_axis_var = var_atts["DEPEND_TIME"] if not rec_vary: logging.warning("Variable %s is marked non-record-varying, but has DEPEND_TIME attribute",var) nrv_has_times = True elif "DEPEND_0" in var_atts: x_axis_var = var_atts["DEPEND_0"] if not rec_vary: logging.warning("Variable %s is marked non-record-varying, but has DEPEND_0 attribute",var) nrv_has_times = True else: # non-record varying variables (NRVs) # added by egrimes, 13Jan2021 # here we assume if there isn't a DEPEND_TIME or DEPEND_0, there are no other depends logging.debug( 'No DEPEND_TIME or DEPEND_0 attributes found for variable %s, filename %s . Treating as non-record-varying.', var, filename) if rec_vary and ('epoch' not in var.lower()): logging.warning("Variable %s is marked as record-varying, but no DEPEND_TIME or DEPEND_0 attributes found. Treating as non-record-varying.",var) try: ydata = cdf_file.varget(var) except: logging.debug('Unable to get ydata for non-record-varying variable %s, filename %s', var, filename) continue if ydata is None: continue # since NRVs don't vary with time, they shouldn't vary across files output_table[var_name] = {'y': ydata} continue if x_axis_var not in master_cdf_variables: logging.warning('Variable %s timestamp variable %s not found, skipping', var, x_axis_var) continue if new_cdflib: data_type_description \ = cdf_file.varinq(x_axis_var).Data_Type_Description else: data_type_description \ = cdf_file.varinq(x_axis_var)["Data_Type_Description"] if epoch_cache.get(filename + x_axis_var) is None: delta_plus_var = 0.0 delta_minus_var = 0.0 delta_time = 0.0 # Skip variables with ValueErrors. try: xdata = cdf_file.varget(x_axis_var) epoch_var_atts = cdf_file.varattsget(x_axis_var) except ValueError: logging.debug('Problem getting data for variable %s, filename %s', var, filename) continue # check for DELTA_PLUS_VAR/DELTA_MINUS_VAR attributes if center_measurement: if 'DELTA_PLUS_VAR' in epoch_var_atts: delta_plus_var = cdf_file.varget(epoch_var_atts['DELTA_PLUS_VAR']) delta_plus_var_att = cdf_file.varattsget(epoch_var_atts['DELTA_PLUS_VAR']) # check if a conversion to seconds is required if 'SI_CONVERSION' in delta_plus_var_att: si_conv = delta_plus_var_att['SI_CONVERSION'] delta_plus_var = delta_plus_var.astype(float) * np.float64(si_conv.split('>')[0]) elif 'SI_CONV' in delta_plus_var_att: si_conv = delta_plus_var_att['SI_CONV'] delta_plus_var = delta_plus_var.astype(float) * np.float64(si_conv.split('>')[0]) if 'DELTA_MINUS_VAR' in epoch_var_atts: delta_minus_var = cdf_file.varget(epoch_var_atts['DELTA_MINUS_VAR']) delta_minus_var_att = cdf_file.varattsget(epoch_var_atts['DELTA_MINUS_VAR']) # check if a conversion to seconds is required if 'SI_CONVERSION' in delta_minus_var_att: si_conv = delta_minus_var_att['SI_CONVERSION'] delta_minus_var = delta_minus_var.astype(float) * np.float64(si_conv.split('>')[0]) elif 'SI_CONV' in delta_minus_var_att: si_conv = delta_minus_var_att['SI_CONV'] delta_minus_var = delta_minus_var.astype(float) * np.float64(si_conv.split('>')[0]) # sometimes these are specified as arrays if isinstance(delta_plus_var, np.ndarray) and isinstance(delta_minus_var, np.ndarray): delta_time = (delta_plus_var - delta_minus_var) / 2.0 else: # and sometimes constants if delta_plus_var != 0.0 or delta_minus_var != 0.0: delta_time = (delta_plus_var - delta_minus_var) / 2.0 if epoch_cache.get(filename + x_axis_var) is None: if ('CDF_TIME' in data_type_description) or \ ('CDF_EPOCH' in data_type_description): # the old way: # store the times as unix times, and cache them # xdata = cdfepoch.unixtime(xdata) # epoch_cache[filename+x_axis_var] = np.array(xdata)+delta_time # the new way: # store and cache the datetime objects directly # and delay conversion to unix times until get_data is called # Cluster uses multidimensional DEPEND_0 values on some "caveat" and "dsettings" variables. This will cause the # xdata[0] < 0.0 test to crash if len(xdata.shape) > 1: logging.warning("CDF DEPEND_0 attribute %s for variable %s is multidimensional with shape %s, skipping", var_atts['DEPEND_0'], var, str(xdata.shape),) continue # Cluster apparently uses (-1.0e-31) as time tag fill values?? Better check... # if xdata[0] < 0.0: # logging.warning("CDF time tag %e for variable %s cannot be converted to datetime, skipping",xdata[0],var) # continue # Check for all-fill Cluster times # NOTE: At least for Cluster onboard moments downloaded from CSA, # there is a FILLVAL specified on the time_tags variable, but # it doesn't seem to match the actual fill value we're seeing. # The time_tags FILLVAL attribute has -1e+31 (very large negative value) while # the ones we're actually seeing in the data are -1e-31 (very small negative value) is_cluster_fill = xdata == -1.0e-31 if is_cluster_fill.all(): logging.warning("Time variable %s for data variable %s has values all equal to -1.0e-31", x_axis_var, var) # Check if time variable has a FILLVAL attribute defined. if 'FILLVAL' in epoch_var_atts: fillval = epoch_var_atts['FILLVAL'] is_fillval = xdata == fillval if is_fillval.any(): logging.warning("Time variable %s for data variable %s contain at least one time equal to FILLVAL (%e)", x_axis_var, var, fillval) xdata = np.array(cdflib.cdfepoch.to_datetime(xdata)) if isinstance(xdata[0],datetime.datetime): # old cdflib < 1.0.0 returns datetime.datetime objects if isinstance(delta_time, np.ndarray) or isinstance(delta_time, list): delta_t = np.array([timedelta(seconds=dtime) for dtime in delta_time]) else: delta_t = timedelta(seconds=delta_time) else: # new cdflib >= 1.0.0 returns np.datetime64 objects if isinstance(delta_time, np.ndarray) or isinstance(delta_time, list): delta_t = np.array([np.timedelta64(int(dtime*1e9),'ns') for dtime in delta_time]) else: delta_t = np.timedelta64(int(delta_time*1e9),'ns') epoch_cache[filename + x_axis_var] = xdata + delta_t else: xdata = epoch_cache[filename + x_axis_var] try: ydata = cdf_file.varget(var) except: logging.warning('Unable to get ydata for variable %s', var) continue if ydata is None: logging.info('No ydata for variable %s', var) continue elif np.isscalar(ydata): # Cluster sets FILLVAL attributes on scalar quantities (!) so we need to check... # This can happen for density variables in the Cluster onboard moments loaded from CSA. # It may be due to no valid data being available, but CSA makes a CDF with a single # time value and data point that are both fillvals. logging.info('ydata for variable %s is a scalar, converting to numpy array',var) # We won't worry here about how many dimensions it's supposed to have. We'll fix that below if needed. For now, we just want to be # sure it's not a scalar. ydata = np.array(ydata) if "FILLVAL" in var_atts: if new_cdflib: thisvar_dtd = var_properties.Data_Type_Description else: thisvar_dtd = var_properties["Data_Type_Description"] fillval = var_atts['FILLVAL'] if (thisvar_dtd == 'CDF_FLOAT' or thisvar_dtd == 'CDF_REAL4' or thisvar_dtd == 'CDF_DOUBLE' or thisvar_dtd == 'CDF_REAL8'): is_fill_cond = ydata == fillval if is_fill_cond.all(): logging.warning("Floating point data values for variable %s are all fillval (%e)",var, fillval) ydata[is_fill_cond] = np.nan elif is_fill_cond.any(): ydata[is_fill_cond] = np.nan else: # No fillvals, nothing to do pass elif thisvar_dtd[:7] == 'CDF_INT': # NaN is only valid for floating point data # but we still need to handle FILLVAL's for # integer data, so we'll just set those to 0 is_fill_cond = ydata == fillval if is_fill_cond.all(): logging.warning("Integer data values for variable %s are all fillval (%d).",var, fillval) ydata[is_fill_cond] = 0 elif is_fill_cond.any(): ydata[is_fill_cond] = 0 else: # No fillvals, nothing to do pass # Check dimensions of ydata to see if a leading time dimension has been lost # This seems to happen with some Cluster CDFs, at least the ones served by CSA, # if a variable only has a single timestamp. For example, the CP_CIS-HIA_ONBOARD_MOMENTS datatype, as seen # in test_load_csa_mom_data. num_times = len(xdata) ydims = ydata.shape y_ndims = len(ydata.shape) if num_times == 1: if y_ndims == 0: logging.warning("Restoring missing time dimension for scalar-valued variable %s", var) ydata = ydata.reshape(1) ydims = ydata.shape y_ndims = len(ydata.shape) elif ydims[0] != 1: logging.warning("Restoring missing time dimension for array-valued variable %s", var) ydata = ydata.reshape(1,*ydims) ydims = ydata.shape y_ndims = len(ydata.shape) elif nrv_has_times and (num_times > 2) and (ydims[0] != num_times): # This case is primarily to catch some MMS FEEPS support variables # that's marked NRV, but has a DEPEND_0. Here, we ignore the times, # make the tplot variable from just the Y data, and skip the rest of # the metadata processing for this variable. logging.warning("Ignoring times for probably non-record-varying variable %s", var_name) output_table[var_name] = {'y': ydata} continue tplot_data = {'x': xdata, 'y': ydata} # We want to know if this is a spectrogram or not. If not, don't make # "v", "v1", "v2" entries. Technically, a vector quantity should have a DEPEND_1 # with numeric values. Most of the time, they are provided as strings, or simply # omitted. But if we made a "v" variable for it, it would break a lot of code # that only expects to get (times, data) back from a get_data call on a non-spectral variable. is_spectrogram = False if 'DISPLAY_TYPE' in var_atts: disp_type = var_atts['DISPLAY_TYPE'].lower() if "spect" in disp_type: is_spectrogram = True # Data may depend on other data in the CDF. depend_1 = None depend_2 = None depend_3 = None if "DEPEND_1" in var_atts: if y_ndims < 2: logging.warning("Variable %s has only %d dimension (including time), but has a DEPEND_1 attribute. Removing attribute.", var, y_ndims) depend_1 = None elif var_atts["DEPEND_1"] in master_cdf_variables: try: # Check for correct shape, matching time and data dimensions dep_name = var_atts["DEPEND_1"] depend_1 = np.array(master_cdf_file.varget(dep_name)) # String-valued DEPEND_1 handling # This is not strictly ISTP compliant, but it's extremeley common. For # example, vector-valued data will often have DEPEND_1 values that are more like # labels, i.e. ['x', 'y', 'z'] # Previous versions of cdf_to_tplot simply ignored string-valued DEPEND_N. # But some missions (e.g. ERG) really do want string values as 'v1' or 'v2' tags. # # The situation is further complicated by the fact that cdflib seems to introduce # extra leading or trailing dimensions on string-valued variables. # # As of cdflib version 1.3.3, it seems the issue has been corrected. I will leave # the test and warning enabled, in case of a regression later. JWL 2025-04-04 # if depend_1 is not None and depend_1.dtype.type is np.str_: # Get the original array dimensions from the variable properties dp_props = master_cdf_file.varinq(dep_name) if new_cdflib: orig_dimensions = dp_props.Dim_Sizes else: orig_dimensions = dp_props['Dim_Sizes'] reshape_dim = tuple(orig_dimensions) if depend_1.shape != reshape_dim: logging.warning('Variable %s has shape %s. Its DEPEND_1 attribute %s is string-valued, and cdflib returned dimensions %s which do not match original dimensions %s.',var, ydata.shape, dep_name, depend_1.shape, reshape_dim) if len(depend_1.shape) == 1 and depend_1.shape[0] == ydata.shape[1]: logging.warning('Returned dimensions are a better match than original, no reshaping required') elif len(reshape_dim) == 1 and reshape_dim[0] == ydata.shape[1]: logging.warning('Original dimensions are a better match to data dimensions, reshaping. Updating to a more recent version of cdflib may get rid of this warning.') depend_1 = np.reshape(depend_1, reshape_dim) else: logging.warning('Neither original nor returned dimensions match data shape, ignoring this DEPEND_1.') depend_1 = None pass dep_dims = depend_1.shape dep_ndims = len(dep_dims) if dep_ndims == 0: logging.warning("Variable %s DEPEND_1 attribute %s is zero-dimensional, Removing attribute.", var, dep_name) depend_1 = None elif dep_ndims == 1: # Not time varying if dep_dims[0] != ydims[1]: logging.warning("Variable %s DEPEND_1 attribute %s has length %d, but corresponding data dimension has length %d. Removing attribute.",var,dep_name,dep_dims[0],ydims[1]) depend_1 = None elif dep_ndims == 2: # time-varying or otherwise multidimensional if dep_dims[0] != num_times: logging.warning("Variable %s is 2-dimensional, but first dimension of DEPEND_1 attribute %s has size %d versus num_times %d. Attribute will be kept (for now).",var,dep_name,dep_dims[0], num_times) if dep_dims[0] == 1 and dep_dims[1] == ydims[1]: # RBSP EMPHISIS HFR_Spectra has this logging.warning("Variable %s DEPEND_1 attribute %s has dimensions 1 x y_dims[1]; reshaping to 1-D array.", var, dep_name) depend_1 = np.reshape(depend_1, (ydims[1],)) # Or, it could be ERG HEP omniflux data with an extra (length-2) dimension as upper/lower bounds. # So for now, we'll allow it. pass if dep_dims[1] != ydims[1]: # ERG XEP seems to make a 9x2 rather than a 2x9 array logging.warning("Variable %s is 2-dimensional, but second dimension of DEPEND_1 attribute %s has data length %d, but corresponding data dimension has length %d. Attribute will be kept (for now).",var,dep_name,dep_dims[1],ydims[1]) #depend_1 = None pass else: # Too many dimensions # ERG LEPE has time dependent DEPEND_1 with an extra dimension for upper/lower limits, so # we need to allow this for now, or at least add a flag to skip this check. logging.warning("Variable %s DEPEND_1 attribute %s has too many dimensions (%d). Keeping extra dimensions (for now).",var,dep_name,dep_ndims) #depend_1 = None pass except ValueError: logging.warning('Unable to get DEPEND_1 variable %s while processing %s', var_atts["DEPEND_1"], var) pass if "DEPEND_2" in var_atts: if y_ndims < 3: logging.warning("Variable %s has only %d dimensions (including time), but has a DEPEND_2 attribute. Removing attribute.", var, y_ndims) depend_2 = None elif var_atts["DEPEND_2"] in master_cdf_variables: try: # Check for correct shape, matching time and data dimensions dep_name = var_atts["DEPEND_2"] depend_2 = np.array(master_cdf_file.varget(dep_name)) # String-valued DEPEND_N handling # This is not strictly ISTP compliant, but it's extremeley common. For # example, vector-valued data will often have DEPEND_1 values that are more like # labels, i.e. ['x', 'y', 'z'] # Previous versions of cdf_to_tplot simply ignored string-valued DEPEND_N. # But some missions (e.g. ERG) really do want string values as 'v1' or 'v2' tags. # # The situation is further complicated by the fact that cdflib seems to introduce # extra leading or trailing dimensions on string-valued variables. if depend_2 is not None and depend_2.dtype.type is np.str_: # Get the original array dimensions from the variable properties dp_props = master_cdf_file.varinq(dep_name) if new_cdflib: orig_dimensions = dp_props.Dim_Sizes else: orig_dimensions = dp_props['Dim_Sizes'] reshape_dim = tuple(orig_dimensions) depend_2 = np.reshape(depend_2, reshape_dim) #depend_2 = None pass dep_dims = depend_2.shape dep_ndims = len(dep_dims) if dep_ndims == 0: logging.warning("Variable %s DEPEND_2 attribute %s is zero-dimensional. Removing attribute.", var, dep_name) depend_2 = None elif dep_ndims == 1: # Not time varying if dep_dims[0] != ydims[2]: logging.warning( "Variable %s DEPEND_2 attribute %s has length %d, but corresponding data dimension has length %d. Removing attribute.", var, dep_name, dep_dims[0], ydims[2]) depend_2 = None elif dep_ndims == 2: # time-varying or otherwise multidimensional if dep_dims[0] != num_times: logging.warning( "Variable %s multidimensional DEPEND_2 attribute %s has %d elements in first dimension, but data has %d times. Removing attribute.", var, dep_name, dep_dims[0], num_times) depend_2 = None if dep_dims[1] != ydims[2]: logging.warning( "Variable %s multidimensional DEPEND_2 attribute %s has %d elements in second dimension, but corresponding data dimension has length %d. Removing attribute.", var, dep_name, dep_dims[1], ydims[2]) depend_2 = None else: # Too many dimensions logging.warning( "Variable %s DEPEND_2 attribute %s has too many dimensions (%d). Removing attribute.", var, dep_name, dep_ndims) depend_2 = None except ValueError: logging.warning('Unable to get DEPEND_2 variable %s while processing %s', var_atts["DEPEND_2"], var) if "DEPEND_3" in var_atts: if y_ndims < 4: # TWINS imager data has this logging.warning("Variable %s has only %d dimensions (including time), but has a DEPEND_3 attribute. Removing attribute.", var, y_ndims) depend_3 = None elif var_atts["DEPEND_3"] in master_cdf_variables: try: # Check for correct shape, matching time and data dimensions dep_name = var_atts["DEPEND_3"] depend_3 = np.array(master_cdf_file.varget(dep_name)) # String-valued DEPEND_N handling # This is not strictly ISTP compliant, but it's extremeley common. For # example, vector-valued data will often have DEPEND_1 values that are more like # labels, i.e. ['x', 'y', 'z'] # Previous versions of cdf_to_tplot simply ignored string-valued DEPEND_N. # But some missions (e.g. ERG) really do want string values as 'v1' or 'v2' tags. # # The situation is further complicated by the fact that cdflib seems to introduce # extra leading or trailing dimensions on string-valued variables. if depend_3 is not None and depend_3.dtype.type is np.str_: # Get the original array dimensions from the variable properties dp_props = master_cdf_file.varinq(dep_name) if new_cdflib: orig_dimensions = dp_props.Dim_Sizes else: orig_dimensions = dp_props['Dim_Sizes'] reshape_dim = tuple(orig_dimensions) depend_3 = np.reshape(depend_3, reshape_dim) #depend_3 = None pass dep_dims = depend_3.shape dep_ndims = len(dep_dims) if dep_ndims == 0: logging.warning("Variable %s DEPEND_3 attribute %s is zero-dimensional. Removing attribute.", var, dep_name) depend_3 = None elif dep_ndims == 1: # Not time varying if dep_dims[0] != ydims[3]: logging.warning( "Variable %s DEPEND_3 attribute %s has length %d, but corresponding data dimension has length %d. Removing attribute.", var, dep_name, dep_dims[0], ydims[3]) depend_3 = None elif dep_ndims == 2: # time-varying, or otherwise multidimensional if dep_dims[0] != num_times: logging.warning( "Variable %s multidimensional DEPEND_3 attribute %s has %d elements in first dimension, but data has %d times. Removing attribute.", var, dep_name, dep_dims[0], num_times) depend_3 = None if dep_dims[1] != ydims[3]: logging.warning( "Variable %s multidimensional DEPEND_3 attribute %s has %d elements in second dimension, but corresponding data dimension has length %d. Removing attribute.", var, dep_name, dep_dims[1], ydims[3]) depend_3 = None else: # Too many dimensions logging.warning( "Variable %s DEPEND_3 attribute %s has too many dimensions (%d). Removing attribute.", var, dep_name, dep_ndims) depend_3 = None except ValueError: logging.warning('Unable to get DEPEND_3 variable %s while processing %s', var_atts["DEPEND_3"], var) nontime_varying_depends = [] # Fill in any missing depend_n values (skipping this for now) ndims = len(ydata.shape) if ndims >= 2 and depend_1 is None: # This is so common, we won't bother logging it # depend_1 = np.arange(ydata.shape[1]) pass if ndims >= 3 and depend_2 is None: #logging.warning("Variable %s has %d dimensions, but no DEPEND_2, adding index range for dimension 2", var_name, ndims ) #depend_2 = np.arange(ydata.shape[2]) pass if ndims >= 4 and depend_3 is None: #logging.warning("Variable %s has %d dimensions, but no DEPEND_3, adding index range for dimension 3", var_name, ndims ) #depend_3 = np.arange(ydata.shape[3]) pass if depend_1 is not None and depend_2 is not None and depend_3 is not None: tplot_data['v1'] = depend_1 tplot_data['v2'] = depend_2 tplot_data['v3'] = depend_3 if len(depend_1.shape) == 1: nontime_varying_depends.append('v1') if len(depend_2.shape) == 1: nontime_varying_depends.append('v2') if len(depend_3.shape) == 1: nontime_varying_depends.append('v3') elif depend_1 is not None and depend_2 is not None: tplot_data['v1'] = depend_1 tplot_data['v2'] = depend_2 if len(depend_1.shape) == 1: nontime_varying_depends.append('v1') if len(depend_2.shape) == 1: nontime_varying_depends.append('v2') elif depend_1 is not None: tplot_data['v'] = depend_1 if len(depend_1.shape) == 1: nontime_varying_depends.append('v') elif depend_2 is not None: tplot_data['v2'] = depend_2 if len(depend_2.shape) == 1: nontime_varying_depends.append('v') metadata[var_name] = {'display_type': var_atts.get("DISPLAY_TYPE", "time_series"), 'scale_type': var_atts.get("SCALE_TYP"), 'y_spec_scale_type': None, 'var_attrs': var_atts, 'labels': None, 'file_name': filename, 'global_attrs': gatt} labl_ptr = var_atts.get('LABL_PTR_1') if labl_ptr is not None: try: labl_ptr_arr = master_cdf_file.varget(labl_ptr) if labl_ptr_arr is not None: metadata[var_name]['labels'] = labl_ptr_arr.flatten().tolist() except: pass units = filter_greater_than(var_atts.get('UNITS')) if units is None: unit_ptr = var_atts.get('UNIT_PTR') if unit_ptr is not None: try: unit_ptr_array = master_cdf_file.varget(unit_ptr) if unit_ptr_array is not None: units = filter_greater_than(unit_ptr_array.flatten().tolist()) except: pass if isinstance(units, (list,np.ndarray)): # If units is a list or array, and are all the same, replace with the single value # Otherwise, stringify the whole array firstunit=units[0].lower() allsame=True for u in units: if u.lower() != firstunit: allsame=False if allsame: # Return units as a scalar metadata[var_name]['units'] = units[0] else: # Go ahead and stringify the whole mess to force it to bw a scalar # TODO: there must be a better way to handle this! logging.warning(f'Variable {var_name} in file {cdf_file} has non-homogeneous unit values {units}') metadata[var_name]['units'] = str(units) else: metadata[var_name]['units'] = str(units) if metadata[var_name]['scale_type'] is None: alt_scale_type = var_atts.get("SCALETYP", "linear") if alt_scale_type is not None: metadata[var_name]['scale_type'] = alt_scale_type # handle y-axis options for spectra if 'DEPEND_1' in var_atts: if isinstance(var_atts['DEPEND_1'], str): try: depend_1_var_atts = master_cdf_file.varattsget(var_atts['DEPEND_1']) scale_type = depend_1_var_atts.get('SCALETYP') if scale_type is None: scale_type = depend_1_var_atts.get('SCALE_TYP') if scale_type is not None: metadata[var_name]['y_spec_scale_type'] = scale_type depend_1_units = depend_1_var_atts.get('UNITS') if depend_1_units is not None: metadata[var_name]['y_spec_units'] = depend_1_units metadata[var_name]['DEPEND_1_UNITS'] = depend_1_units except ValueError: pass # options for multidimensional variables if 'DEPEND_2' in var_atts: if isinstance(var_atts['DEPEND_2'], str): try: depend_2_var_atts = master_cdf_file.varattsget(var_atts['DEPEND_2']) depend_2_units = depend_2_var_atts.get('UNITS') if depend_2_units is not None: metadata[var_name]['DEPEND_2_UNITS'] = depend_2_units except ValueError: # some variables aren't actually available pass if 'DEPEND_3' in var_atts: if isinstance(var_atts['DEPEND_3'], str): try: depend_3_var_atts = master_cdf_file.varattsget(var_atts['DEPEND_3']) depend_3_units = depend_3_var_atts.get('UNITS') if depend_3_units is not None: metadata[var_name]['DEPEND_3_UNITS'] = depend_3_units except ValueError: # some variables aren't actually available pass # Check if the variable already exists in the for loop output if var_name not in output_table: output_table[var_name] = tplot_data else: # If it does, loop though the existing variable's x,y,v,v2,v3,etc var_data = output_table[var_name] for output_var in var_data: if output_var not in nontime_varying_depends: if np.asarray(tplot_data[output_var]).ndim == 0 and np.equal(tplot_data[output_var], None): # If there is nothing in the new variable, then pass pass elif np.asarray(var_data[output_var]).ndim == 0 and np.equal(var_data[output_var], None): # If there is nothing in the old variable, then replace var_data[output_var] = tplot_data[output_var] else: # If they both have something, then concatenate var_data[output_var] = np.concatenate((var_data[output_var], tplot_data[output_var])) if notplot: return output_table for var_name in output_table.keys(): to_merge = False if var_name in pyspedas.tplot_tools.data_quants.keys() and merge: prev_data_quant = pyspedas.tplot_tools.data_quants[var_name] to_merge = True try: attr_dict = {} if metadata.get(var_name) is not None: attr_dict["CDF"] = {} attr_dict["CDF"]["VATT"] = metadata[var_name]['var_attrs'] attr_dict["CDF"]["GATT"] = metadata[var_name]['global_attrs'] attr_dict["CDF"]["FILENAME"] = metadata[var_name]['file_name'] attr_dict["CDF"]["LABELS"] = metadata[var_name]['labels'] # populate data_att; used by PySPEDAS as a common interface to # data attributes such as units, coordinate system, etc attr_dict["data_att"] = {"coord_sys": "", #"units": metadata[var_name]['var_attrs'].get('UNITS'), "units": metadata[var_name]['units'], "depend_1_units": metadata[var_name].get('DEPEND_1_UNITS'), "depend_2_units": metadata[var_name].get('DEPEND_2_UNITS'), "depend_3_units": metadata[var_name].get('DEPEND_3_UNITS')} # populate depend_1_units in data_att, if it's not set if attr_dict['data_att']['depend_1_units'] is None and metadata[var_name]['var_attrs'].get('UNITS') is not None: attr_dict['data_att']['depend_1_units'] = metadata[var_name]['var_attrs'].get('UNITS') # extract the coordinate system, if available vatt_keys = list(attr_dict["CDF"]["VATT"].keys()) vatt_lower = [k.lower() for k in vatt_keys] if 'coordinate_system' in vatt_lower: attr_dict['data_att']['coord_sys'] = filter_greater_than( attr_dict["CDF"]["VATT"][vatt_keys[vatt_lower.index('coordinate_system')]]) if 'labels' in vatt_lower: if attr_dict["CDF"]["VATT"].get('labels') is not None: if isinstance(attr_dict["CDF"]["VATT"]['labels'], str): # check for line separators # this fixes the legend for RBSP L3 EFW data if '\\n' in attr_dict["CDF"]["VATT"]['labels']: attr_dict["CDF"]["VATT"]['labels'] = attr_dict["CDF"]["VATT"]['labels'].split('\\n') if '\\N' in attr_dict["CDF"]["VATT"]['labels']: attr_dict["CDF"]["VATT"]['labels'] = attr_dict["CDF"]["VATT"]['labels'].split('\\N') store_data(var_name, data=output_table[var_name], attr_dict=attr_dict) except (TypeError, ValueError) as err: logging.warning("Exception of type %s raised during store_data call for variable %s", str(type(err)), var_name) logging.warning("Exception message: %s",str(err)) continue if var_name not in stored_variables: stored_variables.append(var_name) if metadata.get(var_name) is not None: if metadata[var_name]['display_type'].lower() == "spectrogram": options(var_name, 'spec', 1) if metadata[var_name]['scale_type'] == 'log': if metadata[var_name]['display_type'].lower() == "spectrogram": options(var_name, 'zlog', 1) else: options(var_name, 'ylog', 1) if metadata[var_name].get('y_spec_scale_type') is not None: if metadata[var_name]['y_spec_scale_type'] == 'log': options(var_name, 'ylog', 1) if metadata[var_name].get('y_spec_units') is not None: options(var_name, 'ysubtitle', '[' + metadata[var_name].get('y_spec_units') + ']') if metadata[var_name].get('var_attrs') is not None: if metadata[var_name]['var_attrs'].get('LABLAXIS') is not None: options(var_name, 'ytitle', metadata[var_name]['var_attrs']['LABLAXIS']) if metadata[var_name]['var_attrs'].get('UNITS') is not None: unitsstr = filter_greater_than(metadata[var_name]['var_attrs']['UNITS']) if metadata[var_name]['display_type'].lower() == 'spectrogram': options(var_name, 'ztitle', f'[{unitsstr}]') else: options(var_name, 'ysubtitle', f'[{unitsstr}]') # Gather up all options in the variable attribute section, toss them into options and see what sticks # JWL 2025-03-06: We want options() to warn about unrecognized option names. If used here, it will # spam the logs with warnings. There is also the possibility that someone could use an attribute name # for some other purpose, that collides with the PySPEDAS interpretation. #options(var_name, opt_dict=metadata[var_name]['var_attrs']) if to_merge is True: cur_data_quant = pyspedas.tplot_tools.data_quants[var_name] if isinstance(pyspedas.tplot_tools.data_quants[var_name], dict): # non-record varying variable, shouldn't be merged continue plot_options = copy.deepcopy(pyspedas.tplot_tools.data_quants[var_name].attrs) pyspedas.tplot_tools.data_quants[var_name] = xr.concat([prev_data_quant, cur_data_quant], dim='time').sortby('time') pyspedas.tplot_tools.data_quants[var_name].attrs = plot_options if notplot: return output_table if plot: tplot(stored_variables) return stored_variables
def filter_greater_than_single(attr): """ Returns any text to the left of > in a variable attribute (e.g., coordinate systems, units) Assumes input is a single value """ if not isinstance(attr, str): return attr return attr.split('>')[0].rstrip() def filter_greater_than(attr): """ Strip CDF comments from attribute values (single value or array/list of values) Returns any text to the left of '>' Args: attr: Returns: str """ if isinstance(attr,str): return filter_greater_than_single(attr) elif isinstance(attr,Iterable): return list(map(filter_greater_than_single,attr)) else: return filter_greater_than_single(attr)