# Copyright 2020 Regents of the University of Colorado. All Rights Reserved.
# Released under the MIT license.
# This software was developed at the University of Colorado's Laboratory for Atmospheric and Space Physics.
# Verify current version before use at: https://github.com/MAVENSDC/PyTplot
import pandas as pd
import numpy as np
import datetime
import logging
from pyspedas.tplot_tools import del_data, tplot_rename, get_y_range, replace_metadata
import pyspedas
import xarray as xr
import copy
import warnings
from pyspedas import is_timezone_aware
tplot_num = 1
[docs]
def store_data(name, data=None, delete=False, newname=None, attr_dict={}):
"""
Create a "Tplot Variable" (similar to the IDL SPEDAS concept) based on the inputs, and
stores this data in memory. Tplot Variables store all of the information
needed to generate a plot.
Parameters
----------
name : str
Name of the tplot variable that will be created
data : dict or list[str]
A python dictionary object for creating a single variable, or a list of base variables to combine them into a 'pseudovariable'
'x' should be a 1-dimensional array that represents the data's x axis. If x is a numeric type, it is interpreted
as seconds since the Unix epoch. x can also be passed as Pandas Series object, datetime.datetime, numpy.datetime64, or strings.
represented in seconds since epoch (January 1st 1970)
'y' should be the data values. This can be 2 dimensions if multiple lines or a spectrogram are desired.
'v' is optional, and is only used for spectrogram plots. This will be a list of bins to be used. If this
is provided, then 'y' should have dimensions of x by z.
'v1/v2/v3/etc' are also optional, and are only used for to spectrogram plots. These will act as the coordinates
for 'y' if 'y' has numerous dimensions. By default, 'v2' is plotted in spectrogram plots.
delete : bool, optional
If True, deletes the tplot variable matching the "name" parameter
Default: False
newname: str
If set, renames TVar to new name
Default: False
attr_dict: dict
A dictionary object of attributes (these do not affect routines in pyspedas, this is merely to keep metadata alongside the file)
Default: {} (empty dictionary)
.. note::
If you want to combine multiple tplot variables into one, simply supply the list of tplot variables to the
"data" parameter. This will cause the data to overlay when plotted.
Returns
-------
bool
True if successful, False otherwise
Examples
--------
>>> # Store a single line
>>> import pyspedas
>>> x_data = [1,2,3,4,5]
>>> y_data = [1,2,3,4,5]
>>> pyspedas.store_data("Variable1", data={'x':x_data, 'y':y_data})
>>> # Store two lines
>>> x_data = [1,2,3,4,5]
>>> y_data = [[1,5],[2,4],[3,3],[4,2],[5,1]]
>>> pyspedas.store_data("Variable2", data={'x':x_data, 'y':y_data})
>>> # Store a spectrogram
>>> x_data = [1,2,3]
>>> y_data = [ [1,2,3] , [4,5,6], [7,8,9] ]
>>> v_data = [1,2,3]
>>> pyspedas.store_data("Variable3", data={'x':x_data, 'y':y_data, 'v':v_data})
>>> # Combine two different line plots
>>> pyspedas.store_data("Variable1and2", data=['Variable1', 'Variable2'])
>>> #Rename TVar
>>> pyspedas.store_data('a', data={'x':[0,4,8,12,16], 'y':[1,2,3,4,5]})
>>> pyspedas.store_data('a',newname='f')
"""
# global tplot_num
create_time = datetime.datetime.now()
# If delete is specified, we are just deleting the variable
if delete is True:
del_data(name)
return False
if data is None and newname is None and attr_dict is None:
logging.error('store_data: data array, newname, and attr_dict all unspecified, nothing to do.')
return False
if data is None and newname is None and attr_dict is not None:
replace_metadata(name,attr_dict)
return True
# If newname is specified, we are just renaming the variable
if newname is not None:
tplot_rename(name, newname)
return True
# if isinstance(data, str):
# pyspedas.tplot_tools.data_quants[name] = {'name': name, 'data': data}
# return True
if isinstance(data, str):
data = data.split(' ')
# If the data is a list instead of a dictionary, user is looking to overplot
if isinstance(data, list):
base_data = _get_base_tplot_vars(name,data)
if len(base_data) == 0:
logging.warning("store_data: None of the base variables exist to construct pseudovariable %s",name)
return False
# Copying the first variable to use all of its plot options
# However, we probably want each overplot to retain its original plot option
pyspedas.tplot_tools.data_quants[name] = copy.deepcopy(pyspedas.tplot_tools.data_quants[base_data[0]])
pyspedas.tplot_tools.data_quants[name].attrs = copy.deepcopy(pyspedas.tplot_tools.data_quants[base_data[0]].attrs)
pyspedas.tplot_tools.data_quants[name].name = name
pyspedas.tplot_tools.data_quants[name].attrs['plot_options']['overplots'] = base_data[1:]
pyspedas.tplot_tools.data_quants[name].attrs['plot_options']['overplots_mpl'] = base_data
# These sets of options should default to the sub-variables' options, not simply
# copied from the first variable in the list. These options can be still be set
# on the pseudovariable, and they will override the sub-variable options.
pyspedas.tplot_tools.data_quants[name].attrs['plot_options']['yaxis_opt'] = {}
pyspedas.tplot_tools.data_quants[name].attrs['plot_options']['zaxis_opt'] = {}
pyspedas.tplot_tools.data_quants[name].attrs['plot_options']['line_opt'] = {}
pyspedas.tplot_tools.data_quants[name].attrs['plot_options']['extras'] = {}
return True
# if the data table doesn't contain an 'x', assume this is a non-record varying variable
if 'x' not in data.keys():
values = np.array(data.pop('y'))
pyspedas.tplot_tools.data_quants[name] = {'data': values}
pyspedas.tplot_tools.data_quants[name]['name'] = name
return True
times = data.pop('x')
with warnings.catch_warnings():
warnings.simplefilter("ignore")
values = np.array(data.pop('y'))
if 'dy' in data.keys():
err_values = np.array(data.pop('dy'))
if len(err_values) != len(times):
logging.warning('store_data: Warning: %s: length of error values (%d) does not match length of time values (%d)',name,len(err_values),
len(times))
else:
err_values = None
# Convert input time representation to np.datetime64 objects, if needed
if isinstance(times, pd.Series):
datetimes = times.to_numpy(dtype='datetime64[ns]') # if it is pandas series, convert to numpy array
elif isinstance(times[0],datetime.datetime):
# Timezone-naive datetime, do explicit conversion to np.datetime64[ns] and ensure container is a numpy array
if is_timezone_aware(times):
# Numpy will complain if it is given timezone-aware datetimes to convert.
# So we convert to UTC first, then drop the timezone entirely
tz_aware_utc = [aware_dt.astimezone(datetime.timezone.utc) for aware_dt in times]
tz_naive = [aware_dt.replace(tzinfo=None) for aware_dt in tz_aware_utc]
datetimes = np.array(tz_naive,dtype='datetime64[ns]')
elif isinstance(times,np.ndarray):
datetimes = times.astype('datetime64[ns]')
else:
datetimes = np.array(times,dtype='datetime64[ns]')
elif isinstance(times[0],np.datetime64):
# np.datetime64, use as-is, but we might have to convert the container to a numpy array
if isinstance(times,np.ndarray):
datetimes = times
else:
datetimes = np.array(times)
# We want the np.datetime64 resolution to be ns. If it already is, do nothing, otherwise,
# convert to ns. In the future, we might support storing times in any resolution,
# and dealing with the conversion in get_data or in client code.
dtype = datetimes.dtype
if dtype.name != 'datetime64[ns]':
datetimes = datetimes.astype('datetime64[ns]')
elif isinstance(times[0],(int,np.integer,float,np.float64)):
# Assume seconds since Unix epoch, convert to np.datetime64 with nanosecond precision
# Make sure we have a numpy array
if not isinstance(times,np.ndarray):
times=np.array(times)
# Replace any NaN or inf values with 0
cond = np.logical_not(np.isfinite(times))
times[cond] = 0
datetimes = np.array(times*1e09,dtype='datetime64[ns]')
elif isinstance(times[0],str):
# Interpret strings as timestamps, convert to np.datetime64 with nanosecond precision
datetimes = np.array(times,dtype='datetime64[ns]')
else:
# Hope it's convertable to a numpy array! This case will get hit for an xarray DataArray.
datetimes = np.array(times).astype('datetime64[ns]')
times = datetimes
# At this point, times should be a numpy array of datetime or np.datetime64 objects
if len(values.shape) == 0:
# This can happen for Cluster variables with only a single sample, as they can
# be incorrectly marked as NRV and lose their leading (time) dimension.
logging.warning("store_data: Data array for %s appears to be a zero-dimensional array; converting to 1-D array.",name)
if len(times) == 1:
logging.warning("store_data: This is possibly due to the leading array dimension being lost in a scalar variable with a single timestamp.")
values = np.array([values])
if len(values) == 0:
logging.warning('store_data: %s has empty y component, cannot create variable',name)
return False
if len(times) != len(values):
# This happens for a few MMS and other data sets. Rather than quitting immediately, go ahead and create
# the variable, but give an informational message about the mismatch. The fix would probably be for the
# data provider to mark the variable as non-record-variant, and avoid giving it a DEPEND_0 or DEPEND_TIME
# attribute.
logging.info("store_data: %s: lengths of x (%d) and y (%d) do not match! Mislabeled NRV variable?",name,len(times),len(values))
if not isinstance(times,np.ndarray):
logging.warning("store_data: times was not converted to a numpy array. This should not happen.")
times = np.array(times)
# assumes monotonically increasing time series
if isinstance(times[0], datetime.datetime):
# This may be dead code now?
trange = [times[0].replace(tzinfo=datetime.timezone.utc).timestamp(),
times[-1].replace(tzinfo=datetime.timezone.utc).timestamp()]
elif isinstance(times[0], np.datetime64):
trange = np.float64([times[0], times[-1]]) / 1e9
else:
trange = [times[0], times[-1]]
# Special case if y is 1-dimensional and 'v' or 'v1' is present
# This can happen if split_data is called on a vector-valued variable that has a DEPEND_1.
# We can't use v as a coordinate, or we'll get a ValueError creating the xarray object,
# so we'll save its value here, then after the xarray object is created, stash it in a different
# attribute. Then join_vec can find it and restore the depend_1 array from split-out components.
extra_v_values = None
if len(values.shape) == 1:
if 'v' in data.keys():
extra_v_values = np.array(data.pop('v'))
elif 'v1' in data.keys():
extra_v_values = np.array(data.pop('v1'))
# Figure out the 'v' data
# This seems to be conflating specplot bins with general DEPEND_N attributes.
# Maybe only do this stuff if it's marked as a spectrum? But what if it's from
# a NetCDF rather than a CDF?
spec_bins_exist = False
if 'v' in data or 'v1' in data or 'v2' in data or 'v3' in data:
# Generally the data is 1D, but occasionally
# the bins will vary in time.
spec_bins_exist = True
if 'v' in data:
spec_bins = data['v']
spec_bins_dimension = 'v'
elif ("v1" in data) and ("v2" in data) and ("v3" in data):
spec_bins = data['v2']
spec_bins_dimension = 'v2'
elif ("v1" in data) and ("v2" in data):
spec_bins = data['v2']
spec_bins_dimension = 'v2'
else:
# At least one vn is missing.
logging.warning("At least one Vn tag is missing, cannot create spec_bins from variable %s.", name)
spec_bins_exist = False
if spec_bins_exist and type(spec_bins) is not pd.DataFrame:
try:
spec_bins = pd.DataFrame(spec_bins)
except:
if spec_bins_dimension=='v':
spec_bins = np.arange(1, len(values[0])+1)
elif spec_bins_dimension=="v2":
spec_bins = np.arange(1, len(values[0][0]) + 1)
elif spec_bins_dimension=="v3":
spec_bins = np.arange(1, len(values[0][0][0]) + 1)
spec_bins = pd.DataFrame(spec_bins)
if spec_bins_exist and len(spec_bins.columns) != 1:
# The spec_bins are time varying
# Or maybe they're just DEPEND_N and nothing to do with spectra?
spec_bins_time_varying = True
if len(spec_bins) != len(times):
# Maybe it's not a spectrum at all?
# Cluster pressure tensor variablea havw a DEPEND_1 that's 2-D, 1x3 [['x','y','z']]
logging.error("store_data: Length of spec_bins (%d) and times (%d) do not match for variable %s.",len(spec_bins),len(times),name)
spec_bins = None
spec_bins_exist = False
elif spec_bins_exist:
spec_bins = spec_bins.transpose()
spec_bins_time_varying = False
else:
spec_bins = None
# Provide another dimension if values are more than 1 dimension
if len(values.shape) == 2:
data['v'] = None
if len(values.shape) > 2:
data['v1'] = None
data['v2'] = None
if len(values.shape) > 3:
data['v3'] = None
# Set up xarray dimension and coordinates
coordinate_list = sorted(list(data.keys()))
dimension_list = [d + '_dim' for d in coordinate_list]
if len(coordinate_list) < len(values.shape)-1:
logging.warning("store_data: Data array for variable %s has %d dimensions, but only %d v_n keys plus time. Adding empty v_n keys.", name, len(values.shape), len(coordinate_list))
if len(values.shape) == 2:
data['v'] = None
elif len(values.shape) == 3:
if 'v' in data:
vdat = data.pop('v')
data['v1'] = vdat
elif 'v1' in data:
pass
if 'v1' not in data:
data['v1'] = None
if 'v2' not in data:
data['v2'] = None
elif len(values.shape) == 4:
# ERG LEPI 3dflux quality flags have this issue
if 'v' in data:
vdat = data.pop('v')
data['v1'] = vdat
elif 'v1' in data:
pass
if 'v1' not in data:
data['v1'] = None
if 'v2' not in data:
data['v2'] = None
if 'v3' not in data:
data['v3'] = None
coordinate_list = sorted(list(data.keys()))
dimension_list = [d + '_dim' for d in coordinate_list]
# Don't try to use these dimensions as coordinates
spec_bins_exist = False
spec_bins = None
temp = None
# Ignore warnings about cdflib non-nanosecond precision timestamps for now
with warnings.catch_warnings():
warnings.filterwarnings("ignore",message="^.*non-nanosecond precision.*$")
try:
temp = xr.DataArray(values, dims=['time']+dimension_list,
coords={'time': ('time', times)})
except ValueError as err:
logging.warning("store_data: ValueError trying to set xarray coordinates for variable %s: %s", name, str(err))
spec_bins_exist = False
spec_bins = None
if len(times) == 1:
logging.warning("store_data: This is possibly due to the leading data dimension being lost in an array-valued or vector-valued variable with a single timestamp.")
# If data is 1-dimensional, ignore any DEPEND_N supplied
elif (len(values.shape) == 1) and len(dimension_list) > 0:
logging.warning("store_data: variable %s is 1-dimensional, but has additional keys defined: %s. Dropping redundant coordinate(s).",name, dimension_list)
temp = xr.DataArray(values, dims=['time'], coords={'time': ('time', times)})
coordinate_list=[]
dimension_list=[]
else:
logging.warning("Giving up on this variable.")
return
if temp is None:
# This can happen with mismatched times/data values, and no valid DEPEND_N.
# For example, POLAR MFE data, variable MF_Num
logging.warning("store_data: Unable to create xarray object for variable %s, giving up.", name)
return
if spec_bins_exist:
try:
if spec_bins_time_varying:
temp.coords['spec_bins'] = (('time', spec_bins_dimension+'_dim'), spec_bins.values)
else:
temp.coords['spec_bins'] = (spec_bins_dimension+'_dim', np.squeeze(spec_bins.values))
except ValueError as err:
logging.warning('store_data: conflicting size for at least one dimension for variable %s', name)
logging.warning('store_data: ValueError exception text: %s',str(err))
for d in coordinate_list:
if data[d] is None:
continue
try:
d_dimension = pd.DataFrame(data[d])
if len(d_dimension.columns) != 1:
if len(d_dimension) != len(times):
logging.warning("store_data: Length of %s (%d) and time (%d) do not match. Cannot create coordinate for %s.",d,len(d_dimension),len(times),name)
continue
temp.coords[d] = (('time', d+'_dim'), d_dimension.values)
else:
d_dimension = d_dimension.transpose()
squeezed_array = np.squeeze(d_dimension.values)# np.squeeze() does something funny here if this dimension has length 1, causing a ValueError exception
if d_dimension.size == 1:
logging.warning("store_data: Dimension %s of variable %s has length 1",d,name)
temp.coords[d] = (d+'_dim', d_dimension.values[0])
else:
temp.coords[d] = (d+'_dim', squeezed_array)
except ValueError as err:
logging.warning("store_data: Could not create coordinate %s_dim for variable %s",d, name)
logging.warning("store_data: ValueError exception text: %s", str(err))
# Set up Attributes Dictionaries
xaxis_opt = dict(axis_label='')
yaxis_opt = dict(axis_label=name) if (spec_bins is None) else dict(axis_label='')
zaxis_opt = dict(axis_label='Z-Axis') if (spec_bins is None) else dict(axis_label=name)
xaxis_opt['crosshair'] = 'X'
yaxis_opt['crosshair'] = 'Y'
zaxis_opt['crosshair'] = 'Z'
xaxis_opt['x_axis_type'] = 'linear'
yaxis_opt['y_axis_type'] = 'linear'
zaxis_opt['z_axis_type'] = 'linear'
line_opt = {}
time_bar = []
extras = dict(panel_size=1, border=True)
links = {}
# Add dicts to the xarray attrs
temp.name = name
temp.attrs = copy.deepcopy(attr_dict)
if extra_v_values is not None:
temp.attrs['extra_v_values'] = extra_v_values
if 'plot_options' not in temp.attrs.keys():
temp.attrs['plot_options'] = {}
temp.attrs['plot_options']['xaxis_opt'] = xaxis_opt
temp.attrs['plot_options']['yaxis_opt'] = yaxis_opt
temp.attrs['plot_options']['zaxis_opt'] = zaxis_opt
temp.attrs['plot_options']['line_opt'] = line_opt
temp.attrs['plot_options']['trange'] = trange
temp.attrs['plot_options']['time_bar'] = time_bar
temp.attrs['plot_options']['extras'] = extras
temp.attrs['plot_options']['create_time'] = create_time
temp.attrs['plot_options']['links'] = links
#temp.attrs['plot_options']['spec_bins_ascending'] = _check_spec_bins_ordering(times, spec_bins)
temp.attrs['plot_options']['overplots'] = []
temp.attrs['plot_options']['overplots_mpl'] = []
temp.attrs['plot_options']['interactive_xaxis_opt'] = {}
temp.attrs['plot_options']['interactive_yaxis_opt'] = {}
temp.attrs['plot_options']['error'] = err_values
pyspedas.tplot_tools.data_quants[name] = temp
pyspedas.tplot_tools.data_quants[name].attrs['plot_options']['yaxis_opt']['y_range'] = get_y_range(temp)
return True
def _get_base_tplot_vars(name,data):
base_vars = []
if not isinstance(data, list):
data = [data]
for var in data:
if var not in pyspedas.tplot_tools.data_quants:
logging.warning('store_data: Pseudovariable %s component %s not found, skipping', name, var)
elif isinstance(pyspedas.tplot_tools.data_quants[var].data, list):
base_vars += _get_base_tplot_vars(name,pyspedas.tplot_tools.data_quants[var].data)
else:
base_vars += [var]
return base_vars
def _check_spec_bins_ordering(times, spec_bins):
"""
This is a private function, this is run during
object creation to check if spec_bins are ascending or descending
"""
if spec_bins is None:
return
if len(spec_bins) == len(times):
break_top_loop = False
for index, row in spec_bins.iterrows():
if row.isnull().values.all():
continue
else:
for i in row.index:
if np.isfinite(row[i]) and np.isfinite(row[i + 1]):
ascending = row[i] < row[i + 1]
break_top_loop = True
break
else:
continue
if break_top_loop:
break
else:
ascending = spec_bins[0].iloc[0] < spec_bins[1].iloc[0]
return ascending
def store(name, data=None, delete=False, newname=None, metadata={}):
"""
Create tplot variables. This is a wrapper for store_data, with the only apparent
difference being that 'attr_dict' in store_data is replaced with 'metadata' in store().
This wrapper will likely be removed in a future release.
Parameters:
name : str
Name of the tplot variable that will be created
data : dict
A python dictionary object.
'x' should be a 1-dimensional array that represents the data's x axis. Typically this data is time,
represented in seconds since epoch (January 1st 1970)
'y' should be the data values. This can be 2 dimensions if multiple lines or a spectrogram are desired.
'v' is optional, and is only used for spectrogram plots. This will be a list of bins to be used. If this
is provided, then 'y' should have dimensions of x by z.
'v1/v2/v3/etc' are also optional, and are only used for to spectrogram plots. These will act as the coordinates
for 'y' if 'y' has numerous dimensions. By default, 'v2' is plotted in spectrogram plots.
'x' and 'y' can be any data format that can be read in by the pandas module. Python lists, numpy arrays,
or any pandas data type will all work.
delete : bool, optional
Deletes the tplot variable matching the "name" parameter
newname: str
Renames TVar to new name
metadata: dict
A dictionary object of attributes (these do not affect routines in pyspedas, this is merely to keep metadata alongside the file)
.. note::
If you want to combine multiple tplot variables into one, simply supply the list of tplot variables to the
"data" parameter. This will cause the data to overlay when plotted.
Returns:
None
Examples:
>>> # Store a single line
>>> import pyspedas
>>> x_data = [1,2,3,4,5]
>>> y_data = [1,2,3,4,5]
>>> pyspedas.store("Variable1", data={'x':x_data, 'y':y_data})
>>> # Store a two lines
>>> x_data = [1,2,3,4,5]
>>> y_data = [[1,5],[2,4],[3,3],[4,2],[5,1]]
>>> pyspedas.store("Variable2", data={'x':x_data, 'y':y_data})
>>> # Store a spectrogram
>>> x_data = [1,2,3]
>>> y_data = [ [1,2,3] , [4,5,6], [7,8,9] ]
>>> v_data = [1,2,3]
>>> pyspedas.store("Variable3", data={'x':x_data, 'y':y_data, 'v':v_data})
>>> # Combine two different line plots
>>> pyspedas.store("Variable1and2", data=['Variable1', 'Variable2'])
>>> #Rename TVar
>>> pyspedas.store('a', data={'x':[0,4,8,12,16], 'y':[1,2,3,4,5]})
>>> pyspedas.store('a',newname='f')
"""
return store_data(name, data=data, delete=delete, newname=newname, attr_dict=metadata)