"""
Creates a new tplot variable as the time average of original.
Notes
-----
Similar to avg_data.pro in IDL SPEDAS.
"""
import logging
import numpy as np
from pyspedas.tplot_tools import store_data, get_data, tnames, time_double
[docs]
def avg_data(names, trange=None, res=None, width=None,
newname=None, suffix=None, overwrite=False):
"""
Get a new tplot variable with averaged data.
Parameters
----------
names: str/list of str
List of pytplot names.
trange: list of float, optional
Start time, end time.
If empty, the data start and end time will be used.
res: float, optional
Time resolution in seconds for averaging data.
It can be less than 1 sec.
Default is 60 sec.
width: int, optional
Number of values for the averaging window.
If res is set, then width is ignored.
newname: str/list of str, optional
List of new names for tplot variables.
If not given, then a suffix is applied.
suffix: str, optional
A suffix to apply.
Default is '-avg'.
overwrite: bool, optional
Replace the existing tplot name.
Default is False.
Returns
-------
n_names: str/list of str
List of new pytplot names.
"""
old_names = tnames(names)
if res is None and width is None:
res = 60 # Default is 60 sec
if names is None or len(old_names) < 1:
logging.error('avg_data: No valid tplot names were provided.')
return
if suffix is None:
# IDL SPEDAS default suffix is '_avg'
suffix = '-avg'
if overwrite:
n_names = old_names
elif newname is None:
n_names = [s + suffix for s in old_names]
else:
n_names = newname
if isinstance(n_names, str):
n_names = [n_names]
if len(n_names) != len(old_names):
n_names = [s + suffix for s in old_names]
for old_idx, old in enumerate(old_names):
new = n_names[old_idx]
# Get times and data
d = get_data(old)
metadata = get_data(old, metadata=True)
time = d[0]
time = np.array(time_double(time))
time_len = len(time)
data = np.array(d[1])
dim = data.shape
dim0 = dim[0]
if dim0 != time_len:
logging.error('avg_data: Data and time length mismatch.')
continue
if len(dim) < 2:
dim1 = 1
else:
dim1 = dim[1]
# Data may also contain v, v1, v2, v3
process_energies = []
retain_energies = []
for i, lend in enumerate(d):
if i > 1:
if len(lend) == len(time):
process_energies.append(i)
else:
# These will retained in the results as-is
retain_energies.append(i)
process_v = {}
for i in process_energies:
process_v[d._fields[i]] = []
# Find start and end times
if trange is not None and len(trange) == 2 and trange[0] < trange[1]:
time_start = time_double(trange[0])
time_end = time_double(trange[1])
elif res is not None:
# Define start and end times similar to IDL
time_start = np.floor(time[0] / res) * res
time_end = np.floor(time[-1] / res + 1) * res
else:
time_start = time[0]
time_end = time[-1]
# Check for empty set
count_in_range = len(time[(time >= time_start) & (time <= time_end)])
if time_end <= time_start or count_in_range < 2:
logging.error('avg_data: No time values in provided time range.')
continue
# Find time bins
time_duration = time_end - time_start
if res is not None:
# Given the resolution, compute bins
dt = res
bin_count = int(time_duration / dt)
ind = np.floor((time - time_start) / dt)
else:
# Given the width, compute bins
bins = np.arange(count_in_range)
ind = np.floor(bins / width)
bin_count = int(count_in_range / width)
dt = time_duration / bin_count
if bin_count < 2:
msg = 'avg_data: too few bins. Bins=' + str(bin_count) \
+ ', Data points=' + str(count_in_range)
logging.error(msg)
continue
# Split time into bins
mdt = (time_end - time_start) / dt
if (mdt - int(mdt) >= 0.5):
max_ind = np.ceil(mdt)
else:
max_ind = np.floor(mdt)
w1 = np.asarray(ind < 0).nonzero()
ind[w1] = -1
w2 = np.asarray(ind >= max_ind).nonzero()
ind[w2] = -1
# Find new times
mx = np.max(ind) + 1
new_times = (np.arange(mx) + 0.5) * dt + time_start
# Find new data
new_data = []
for i in range(int(max_ind)):
if i < 0:
continue
idx0 = np.asarray(ind == i).nonzero()
isempty = True if len(idx0) < 1 else False
if dim1 < 2:
nd0 = np.nan if isempty else np.nanmean(data[idx0])
else:
nd0 = []
for j in range(dim1):
tmp = np.nan if isempty else np.nanmean(data[idx0, j])
nd0.append(tmp)
new_data.append(nd0)
for ii in process_energies:
# The following processes v, v1, v2, v3
dime1 = len(d[ii][0])
if dime1 < 2:
nd1 = np.nan if isempty else np.nanmean(d[ii][idx0])
else:
nd1 = []
for j in range(dime1):
tmp = np.nan if isempty else np.nanmean(d[ii][idx0, j])
nd1.append(tmp)
process_v[d._fields[ii]].append(nd1)
# Create the new tplot variable
data_dict = {'x': new_times, 'y': new_data}
for i in retain_energies:
data_dict[d._fields[i]] = d[i]
for i in process_energies:
data_dict[d._fields[i]] = process_v[d._fields[i]]
store_data(new, data=data_dict, attr_dict=metadata)
logging.info('avg_data was applied to: %s', new)
return n_names