"""
Get information and download files from CDAWeb using cdasws.
For cdasws documentation, see:
https://pypi.org/project/cdasws/
https://cdaweb.gsfc.nasa.gov/WebServices/REST/py/cdasws/index.html
"""
import logging
import os
import re
from cdasws import CdasWs
from pyspedas.tplot_tools import cdf_to_tplot, netcdf_to_tplot, time_clip as tclip
from pyspedas.utilities.download import download
from pyspedas.cdagui_tools.config import CONFIG
[docs]
class CDAWeb:
"""Get information and download files from CDAWeb using cdasws."""
def __init__(self):
"""Initialize."""
self.cdas = CdasWs(endpoint=CONFIG["cdas_endpoint"])
[docs]
def get_observatories(self):
"""Return a list of strings CDAWeb uses to designate missions or mission groups
Examples
--------
>>> from pyspedas import CDAWeb
>>> cdaweb_obj = CDAWeb()
>>> obs_names = cdaweb_obj.get_observatories()
"""
observatories = self.cdas.get_observatory_groups()
onames = []
for mission in observatories:
mission_name = mission["Name"].strip()
if len(mission_name) > 1 and mission_name != "(null)":
onames.append(mission_name)
return onames
[docs]
def get_instruments(self):
"""Return a list of strings CDAWeb uses to designate instrument or dataset types.
Examples
--------
>>> from pyspedas import CDAWeb
>>> cdaweb_obj = CDAWeb()
>>> obs_names = cdaweb_obj.get_instruments()
"""
instruments = self.cdas.get_instrument_types()
inames = []
for instrument in instruments:
instr_name = instrument["Name"].strip()
if len(instr_name) > 1 and instr_name != "(null)":
inames.append(instr_name)
return inames
def clean_time_str(self, t):
"""Remove the time part from datetime variable."""
t0 = re.sub("T.+Z", "", t)
return t0
[docs]
def get_datasets(self, mission_list, instrument_list):
"""Return a list of datasets recognized by CDAWeb, given lists of missions and instruments.
Parameters
----------
mission_list: list of str
List of mission names, as obtained from get_observatories()
instrument_list: list of str
List of instrument names, as obtained from get_instruments()
Returns
-------
list of str
A list of available datasets for the given missions and instruments.
Examples
--------
>>> from pyspedas import CDAWeb
>>> cdaweb_obj = CDAWeb()
>>> dataset_list = cdaweb_obj.get_datasets(['ARTEMIS'],['Electric Fields (space)'])
"""
thisdict = {"observatoryGroup": mission_list, "instrumentType": instrument_list}
datasets = self.cdas.get_datasets(**thisdict)
dnames = []
for dataset in datasets:
data_item = dataset["Id"].strip()
if len(data_item) > 0 and data_item != "(null)":
tinterval = dataset["TimeInterval"]
t1 = tinterval["Start"].strip()
t2 = tinterval["End"].strip()
t1 = self.clean_time_str(t1)
t2 = self.clean_time_str(t2)
data_item += " (" + t1 + " to " + t2 + ")"
dnames.append(data_item)
return dnames
[docs]
def get_filenames(self, dataset_list, t0, t1):
"""Return a list of urls for a dataset between dates t0 and t1.
Example: get_files(['THB_L2_FIT (2007-02-26 to 2020-01-17)'],
'2010-01-01 00:00:00', '2010-01-10 00:00:00')
Parameters
----------
dataset_list: list of str
A list of dataset names, as obtained from get_datasets()
t0: str
Start time for data to be retrieved
t1: str
End time for data to be retrieved
Returns
-------
list of str
A list of URLs for the given dataset and time range
Examples
--------
>>> from pyspedas import CDAWeb
>>> cdaweb_obj = CDAWeb()
>>> urllist = cdaweb_obj.get_filenames(['THB_L2_FIT (2007-02-26 to 2020-01-17)'], '2010-01-01 00:00:00', '2010-01-10 00:00:00')
"""
remote_url = []
# Set times to cdas format
t0 = t0.strip().replace(" ", "T", 1)
if len(t0) == 10:
t0 += "T00:00:01Z"
elif len(t0) > 10:
t0 += "Z"
t1 = t1.strip().replace(" ", "T", 1)
if len(t1) == 10:
t1 += "T23:23:59Z"
elif len(t1) > 10:
t1 += "Z"
# For each dataset, find the url of files
for d in dataset_list:
d0 = d.split(" ")
if len(d0) > 0:
status, result = self.cdas.get_data_file(d0[0], [], t0, t1)
if status == 200 and (result is not None):
r = result.get("FileDescription")
if r is not None:
for f in r:
remote_url.append(f.get("Name"))
return remote_url
[docs]
def cda_download(
self,
remote_files,
local_dir=None,
download_only=False,
varformat=None,
get_support_data=False,
prefix="",
suffix="",
varnames=None,
notplot=False,
merge=False,
trange=None,
time_clip=False,
force_download=False,
):
"""Download data files and (by default) load the data into tplot variables
Parameters
----------
remote_files : list of str
List of remote file URLs, as obtained from function get_datasets().
local_dir : str
Local directory to save the data in.
download_only : bool
If True, download the data, but do not load it into tplot variables.
varformat: str
If set, specifies a pattern for which CDF or NetCDF variables to load.
get_support_data: bool
If True, load CDF variables marked as 'support_data'.
prefix: str
If set, prepend this string to the variable name when creating the tplot variables.
suffix: str
If set, append this string to the variable name when creating the tplot variables.
varnames: list of str
If set, specifies a list of variables to load from the data files.
If None or [] or ['*'], load all variables.
notplot: bool
If True, return data directly as tplot data structures, rather than a list of tplot names.
merge: bool
If True, merge the data with existing tplot variables.
If False (the default), overwrite existing tplot variables.
trange: list of str
If set, clip the time range of the data to these values.
time_clip: bool
If True, clip the time range of the data to the values in trange.
force_download: bool
If True, download the data even if it already exists locally.
Returns
-------
tuple
A tuple containing the number of files downloaded, the number of variables loaded, and a list of the tplot variables loaded.
Examples
--------
>>> from pyspedas import CDAWeb
>>> from pyspedas import tplot
>>> cdaweb_obj = CDAWeb()
>>> urllist = cdaweb_obj.get_filenames(['THB_L2_FIT (2007-02-26 to 2020-01-17)'], '2010-01-01 00:00:00', '2010-01-10 00:00:00')
>>> result = cdaweb_obj.cda_download(urllist,local_dir="/tmp")
>>> tplot('thb_fgs_gsm')
"""
# Return quantities
no_of_files = 0
no_of_variables = 0
loaded_vars = []
# Set the local and remote directories
remotehttp = CONFIG["remote_data_dir"]
if local_dir is None:
local_dir = CONFIG["local_data_dir"]
cdf_files = []
netcdf_files = []
all_files = []
# Download the files
for remotef in remote_files:
f = remotef.strip().replace(remotehttp, "", 1)
localf = os.path.normpath(local_dir + os.path.sep + f)
localfiles = download(
remote_file=remotef,
local_file=localf,
force_download=force_download,
)
if localfiles is None:
continue
for f in localfiles:
if f is not None and len(f) > 0:
all_files.append(os.path.normpath(f))
no_of_files = len(all_files)
if no_of_files > 0:
# Sort the file list
all_files = list(set(all_files))
all_files.sort()
# Load the data into tplot variables
if not download_only:
# Separate cdf and netcdf files. All other files cannot be loaded into tplot.
for f in all_files:
if f.endswith(".cdf"):
cdf_files.append(f)
elif f.endswith(".nc"):
netcdf_files.append(f)
else:
logging.warning("File type not supported: %s", f)
if len(cdf_files) > 0:
cdf_files.sort()
logging.info("Downloaded %d CDF files.", len(cdf_files))
try:
cdf_vars = cdf_to_tplot(
cdf_files,
prefix=prefix,
suffix=suffix,
get_support_data=get_support_data,
varformat=varformat,
varnames=varnames,
notplot=notplot,
merge=merge,
)
if cdf_vars is not None:
loaded_vars.extend(cdf_vars)
except ValueError as err:
msg = "cdf_to_tplot could not load " + str(cdf_files)
msg += "\n\n"
msg += "Error from pytplot: " + str(err)
logging.error(msg)
if len(netcdf_files) > 0:
netcdf_files.sort()
logging.info("Downloaded %d NetCDF files.", len(netcdf_files))
try:
netcdf_vars = netcdf_to_tplot(
netcdf_files,
prefix=prefix,
suffix=suffix,
merge=merge,
)
if netcdf_vars is not None:
loaded_vars.extend(netcdf_vars)
except ValueError as err:
msg = "netcdf_to_tplot could not load " + str(netcdf_files)
msg += "\n\n"
msg += "Error from pytplot: " + str(err)
logging.error(msg)
loaded_vars = list(set(loaded_vars))
no_of_variables = len(loaded_vars)
logging.info("Number of tplot variables loaded: %d", no_of_variables)
if time_clip and trange is not None:
if trange[0] >= trange[1]:
logging.warning(
"trange values equal or out of order, no time clipping performed"
)
else:
tclip(
loaded_vars, trange[0], trange[1], suffix="", overwrite=True
)
elif time_clip:
logging.warning("Warning: No trange specified for time_clip")
return (no_of_files, no_of_variables, loaded_vars)