Source code for pyspedas.projects.cluster.load_csa

"""
Load data from the Cluster Science Archive.

This loading function uses the Cluster Science Archive:
    https://csa.esac.esa.int/
It is a web service, we create the query and the web service responds
with a CDF file which is packaged as tar.gz.

We download the tar.gz file directly, without using pyspedas.download().
"""
import logging
from pyspedas.tplot_tools import time_clip as tclip
from pyspedas.tplot_tools import time_string
from pyspedas.tplot_tools import time_double
from pyspedas.tplot_tools import cdf_to_tplot

import requests
import sys
import tarfile
import os
from pathlib import Path
from typing import List
from .config import CONFIG

from pyspedas.utilities.download import is_fsspec_uri
import fsspec

def cl_master_datatypes():
    """Return list of data types."""
    md = ['CE_WBD_WAVEFORM_CDF', 'CP_AUX_POSGSE_1M',
          'CP_CIS-CODIF_HS_H1_MOMENTS', 'CP_CIS-CODIF_HS_He1_MOMENTS',
          'CP_CIS-CODIF_HS_O1_MOMENTS', 'CP_CIS-CODIF_PAD_HS_H1_PF',
          'CP_CIS-CODIF_PAD_HS_He1_PF', 'CP_CIS-CODIF_PAD_HS_O1_PF',
          'CP_CIS-HIA_ONBOARD_MOMENTS', 'CP_CIS-HIA_PAD_HS_MAG_IONS_PF',
          'CP_EDI_AEDC', 'CP_EDI_MP', 'CP_EDI_SPIN', 'CP_EFW_L2_E3D_INERT',
          'CP_EFW_L2_P', 'CP_EFW_L2_V3D_INERT', 'CP_EFW_L3_E3D_INERT',
          'CP_EFW_L3_P', 'CP_EFW_L3_V3D_INERT', 'CP_FGM_5VPS', 'CP_FGM_FULL',
          'CP_FGM_SPIN', 'CP_PEA_MOMENTS', 'CP_PEA_PITCH_SPIN_DEFlux',
          'CP_PEA_PITCH_SPIN_DPFlux', 'CP_PEA_PITCH_SPIN_PSD', 'CP_RAP_ESPCT6',
          'CP_RAP_ESPCT6_R', 'CP_RAP_HSPCT', 'CP_RAP_HSPCT_R',
          'CP_RAP_ISPCT_CNO', 'CP_RAP_ISPCT_He', 'CP_STA_CS_HBR',
          'CP_STA_CS_NBR', 'CP_STA_CWF_GSE', 'CP_STA_CWF_HBR_ISR2',
          'CP_STA_CWF_NBR_ISR2', 'CP_STA_PSD', 'CP_WBD_WAVEFORM',
          'CP_WHI_ELECTRON_DENSITY', 'CP_WHI_NATURAL', 'JP_AUX_PMP', 'JP_AUX_PSE']
    return md


def cl_master_probes():
    """Return list of probe names."""
    mp = ['C1', 'C2', 'C3', 'C4']
    return mp


def cl_format_time(s):
    """Return a string formated for Cluster web services."""
    # Date format: YYYY-MM-DDThh:mm:ssZ
    r = time_string(time_double(s), "%Y-%m-%dT%H:%M:%SZ")
    return r


[docs] def load_csa(trange:List[str]=['2001-02-01', '2001-02-03'], probes:List[str]=['C1'], datatypes:List[str]=['CP_CIS-CODIF_HS_H1_MOMENTS'], downloadonly:bool=False, time_clip:bool=True, prefix:str='', suffix:str='', get_support_data:bool=False, varformat:str=None, varnames:List[str]=[], notplot:bool=False) -> List[str]: """Load data using the Cluster Science Data archive. Parameters ---------- trange : list of str Time range [start, end]. Default: ['2001-02-01', '2001-02-03'] probes : list of str List of Cluster probes. Valid options: 'C1','C2','C3','C4', '*' to load all probes Default: ['C1'] datatypes : list of str List of Cluster data types. Valid options:: 'CE_WBD_WAVEFORM_CDF', 'CP_AUX_POSGSE_1M', 'CP_CIS-CODIF_HS_H1_MOMENTS', 'CP_CIS-CODIF_HS_He1_MOMENTS', 'CP_CIS-CODIF_HS_O1_MOMENTS', 'CP_CIS-CODIF_PAD_HS_H1_PF', 'CP_CIS-CODIF_PAD_HS_He1_PF', 'CP_CIS-CODIF_PAD_HS_O1_PF', 'CP_CIS-HIA_ONBOARD_MOMENTS', 'CP_CIS-HIA_PAD_HS_MAG_IONS_PF', 'CP_EDI_AEDC', 'CP_EDI_MP', 'CP_EDI_SPIN', 'CP_EFW_L2_E3D_INERT', 'CP_EFW_L2_P', 'CP_EFW_L2_V3D_INERT', 'CP_EFW_L3_E3D_INERT', 'CP_EFW_L3_P', 'CP_EFW_L3_V3D_INERT', 'CP_FGM_5VPS', 'CP_FGM_FULL', 'CP_FGM_SPIN', 'CP_PEA_MOMENTS', 'CP_PEA_PITCH_SPIN_DEFlux', 'CP_PEA_PITCH_SPIN_DPFlux', 'CP_PEA_PITCH_SPIN_PSD', 'CP_RAP_ESPCT6', 'CP_RAP_ESPCT6_R', 'CP_RAP_HSPCT', 'CP_RAP_HSPCT_R', 'CP_RAP_ISPCT_CNO', 'CP_RAP_ISPCT_He', 'CP_STA_CS_HBR', 'CP_STA_CS_NBR', 'CP_STA_CWF_GSE', 'CP_STA_CWF_HBR_ISR2', 'CP_STA_CWF_NBR_ISR2', 'CP_STA_PSD', 'CP_WBD_WAVEFORM', 'CP_WHI_ELECTRON_DENSITY', 'CP_WHI_NATURAL', 'JP_AUX_PMP', 'JP_AUX_PSE' Default: ['CP_CIS-CODIF_HS_H1_MOMENTS'] downloadonly: bool If true, do not use cdf_to_tplot. Default: False time_clip: bool If true, apply time clip to data. Default: False suffix: str The tplot variable names will be given this suffix. Default: '' get_support_data: bool If True, data with an attribute "VAR_TYPE" with a value of "support_data" will be loaded into tplot. Default: False varformat : str The file variable formats to load into tplot. Wildcard character "*" is accepted. Default: None (all variables will be loaded) varnames: str or list of str Load these variables only. If [] or ['*'], then load everything. Default: [] notplot: bool If True, then data are returned in a hash table instead of being stored in tplot variables (useful for debugging, and access to multi-dimensional data products) Default: False Returns ------- list of str List of tplot variables created (unless notplot keyword is used). Examples -------- >>> import pyspedas >>> from pyspedas import tplot >>> fgm_vars = pyspedas.projects.cluster.load_csa(trange=['2008-11-01','2008-11-02'],datatypes=['CP_FGM_FULL']) >>> tplot(['B_vec_xyz_gse__C1_CP_FGM_FULL','B_mag__C1_CP_FGM_FULL']) """ # Empty output in case of errors. tvars = [] if prefix is None: prefix = '' if suffix is None: suffix = '' # Start and end dates start_date = cl_format_time(trange[0]) end_date = cl_format_time(trange[1]) # Delivery format delivery_format = 'CDF_ISTP' # Delivery interval delivery_interval = 'ALL' if not probes: return tvars if not datatypes: return tvars if not isinstance(probes, list): probes = [probes] if not isinstance(datatypes, list): datatypes = [datatypes] # TODO: Create a function that can resolve wildcards # similar to IDL spedas ssl_check_valid_name # my_datatypes=ssl_check_valid_name(uc_datatypes,master_datatypes) # my_probes=ssl_check_valid_name(uc_probes,master_probes) if probes[0] == '*': # load all probes probes = cl_master_probes() # Construct the query string base_url = 'https://csa.esac.esa.int/csa-sl-tap/data?' query_string = ('retrieval_type=PRODUCT&START_DATE=' + start_date + '&END_DATE=' + end_date + '&DELIVERY_FORMAT=' + delivery_format + '&DELIVERY_INTERVAL=' + delivery_interval + '&NON_BROWSER') for p in probes: for d in datatypes: query_string += '&DATASET_ID=' + p + '_' + d # Encode the url urllib.parse.quote url = base_url + (query_string) local_path = CONFIG['local_data_dir'] # could be URI if is_fsspec_uri(local_path): local_protocol, lpath = local_path.split("://") local_fs = fsspec.filesystem(local_protocol, anon=False) out_gz = '/'.join([local_path, 'temp_cluster_file.tar.gz']) # Temp file name fileobj = local_fs.open(out_gz, 'wb') else: Path(local_path).mkdir(parents=True, exist_ok=True) out_gz = os.path.join(local_path, 'temp_cluster_file.tar.gz') # Temp file name fileobj = open(out_gz, 'wb') # Download the file. logging.info("Downloading Cluster data, please wait....") try: r = requests.get(url, allow_redirects=True) r.raise_for_status() except requests.exceptions.HTTPError as err: logging.error("Download HTTP error: " + str(err)) return tvars except requests.exceptions.RequestException as e: logging.error("Download error: " + str(e)) return tvars logging.info("Download complete.") # Open the downloaded file. with fileobj as w: w.write(r.content) # Extract the tar archive. if is_fsspec_uri(out_gz): # Cloud-Awareness: Opens byte stream for tarfile package. bo = local_fs.open(out_gz, "rb") tar = tarfile.open(fileobj=bo) else: tar = tarfile.open(out_gz, "r:gz") f = tar.getnames() for member in tar.getmembers(): if member.isfile(): p = '/'.join([local_path, member.path]) if is_fsspec_uri(p): membo = local_fs.open(p, "wb") else: os.makedirs(str(Path(p).parent), exist_ok=True) membo = open(p, "wb") # Python > 3.9 requirement from setup.py # note: data is written after file is read into memory # https://stackoverflow.com/a/62247729 with tar.extractfile(member.path) as tarbo: membo.write(tarbo.read()) membo.close() tar.close() # Remove the tar.gz file but keep the extracted. if is_fsspec_uri(out_gz): local_fs.delete(out_gz) else: os.remove(out_gz) # Get unique set of files. f_set = set(f) # File list with full path. sep = "/" if is_fsspec_uri(local_path) else os.path.sep out_files = [sep.join([local_path, s]) for s in list(f_set)] out_files = sorted(out_files) if downloadonly: return out_files # Load data into tplot tvars = cdf_to_tplot(out_files, prefix=prefix, suffix=suffix, get_support_data=get_support_data, varformat=varformat, varnames=varnames, notplot=notplot) if notplot: return tvars if time_clip: for new_var in tvars: tclip(new_var, trange[0], trange[1], suffix='') return tvars