Source code for pyspedas.projects.cluster.load_csa

"""
Load data from the Cluster Science Archive.

This loading function uses the Cluster Science Archive:
    https://csa.esac.esa.int/
It is a web service, we create the query and the web service responds
with a CDF file which is packaged as tar.gz.

We download the tar.gz file directly, without using pyspedas.download().
"""
import logging
from pyspedas.tplot_tools import time_clip as tclip
from pyspedas.tplot_tools import time_string
from pyspedas.tplot_tools import time_double
from pyspedas.tplot_tools import cdf_to_tplot

import requests
import sys
import tarfile
import os
from pathlib import Path
from typing import List
from .config import CONFIG

from pyspedas.utilities.download import is_fsspec_uri
import fsspec

def cl_master_datatypes():
    """Return list of data types."""
    md = ['CE_WBD_WAVEFORM_CDF', 'CP_AUX_POSGSE_1M',
          'CP_CIS-CODIF_HS_H1_MOMENTS', 'CP_CIS-CODIF_HS_He1_MOMENTS',
          'CP_CIS-CODIF_HS_O1_MOMENTS', 'CP_CIS-CODIF_PAD_HS_H1_PF',
          'CP_CIS-CODIF_PAD_HS_He1_PF', 'CP_CIS-CODIF_PAD_HS_O1_PF',
          'CP_CIS-HIA_ONBOARD_MOMENTS', 'CP_CIS-HIA_PAD_HS_MAG_IONS_PF',
          'CP_EDI_AEDC', 'CP_EDI_MP', 'CP_EDI_SPIN', 'CP_EFW_L2_E3D_INERT',
          'CP_EFW_L2_P', 'CP_EFW_L2_V3D_INERT', 'CP_EFW_L3_E3D_INERT',
          'CP_EFW_L3_P', 'CP_EFW_L3_V3D_INERT', 'CP_FGM_5VPS', 'CP_FGM_FULL',
          'CP_FGM_SPIN', 'CP_PEA_MOMENTS', 'CP_PEA_PITCH_SPIN_DEFlux',
          'CP_PEA_PITCH_SPIN_DPFlux', 'CP_PEA_PITCH_SPIN_PSD', 'CP_RAP_ESPCT6',
          'CP_RAP_ESPCT6_R', 'CP_RAP_HSPCT', 'CP_RAP_HSPCT_R',
          'CP_RAP_ISPCT_CNO', 'CP_RAP_ISPCT_He', 'CP_STA_CS_HBR',
          'CP_STA_CS_NBR', 'CP_STA_CWF_GSE', 'CP_STA_CWF_HBR_ISR2',
          'CP_STA_CWF_NBR_ISR2', 'CP_STA_PSD', 'CP_WBD_WAVEFORM',
          'CP_WHI_ELECTRON_DENSITY', 'CP_WHI_NATURAL', 'JP_AUX_PMP', 'JP_AUX_PSE']
    return md


def cl_master_probes():
    """Return list of probe names."""
    mp = ['C1', 'C2', 'C3', 'C4']
    return mp


def cl_format_time(s):
    """Return a string formated for Cluster web services."""
    # Date format: YYYY-MM-DDThh:mm:ssZ
    r = time_string(time_double(s), "%Y-%m-%dT%H:%M:%SZ")
    return r



[docs]
def load_csa(trange:List[str]=['2001-02-01', '2001-02-03'],
             probes:List[str]=['C1'],
             datatypes:List[str]=['CP_CIS-CODIF_HS_H1_MOMENTS'],
             downloadonly:bool=False,
             time_clip:bool=True,
             prefix:str='',
             suffix:str='',
             get_support_data:bool=False,
             varformat:str=None,
             varnames:List[str]=[],
             notplot:bool=False) -> List[str]:
    """Load data using the Cluster Science Data archive.

    Parameters
    ----------
        trange : list of str
            Time range [start, end].
            Default: ['2001-02-01', '2001-02-03']

        probes : list of str
            List of Cluster probes. Valid options: 'C1','C2','C3','C4', '*' to load all probes
            Default: ['C1']

        datatypes : list of str
            List of Cluster data types. Valid options::
              'CE_WBD_WAVEFORM_CDF', 'CP_AUX_POSGSE_1M',
              'CP_CIS-CODIF_HS_H1_MOMENTS', 'CP_CIS-CODIF_HS_He1_MOMENTS',
              'CP_CIS-CODIF_HS_O1_MOMENTS', 'CP_CIS-CODIF_PAD_HS_H1_PF',
              'CP_CIS-CODIF_PAD_HS_He1_PF', 'CP_CIS-CODIF_PAD_HS_O1_PF',
              'CP_CIS-HIA_ONBOARD_MOMENTS', 'CP_CIS-HIA_PAD_HS_MAG_IONS_PF',
              'CP_EDI_AEDC', 'CP_EDI_MP', 'CP_EDI_SPIN', 'CP_EFW_L2_E3D_INERT',
              'CP_EFW_L2_P', 'CP_EFW_L2_V3D_INERT', 'CP_EFW_L3_E3D_INERT',
              'CP_EFW_L3_P', 'CP_EFW_L3_V3D_INERT', 'CP_FGM_5VPS', 'CP_FGM_FULL',
              'CP_FGM_SPIN', 'CP_PEA_MOMENTS', 'CP_PEA_PITCH_SPIN_DEFlux',
              'CP_PEA_PITCH_SPIN_DPFlux', 'CP_PEA_PITCH_SPIN_PSD', 'CP_RAP_ESPCT6',
              'CP_RAP_ESPCT6_R', 'CP_RAP_HSPCT', 'CP_RAP_HSPCT_R',
              'CP_RAP_ISPCT_CNO', 'CP_RAP_ISPCT_He', 'CP_STA_CS_HBR',
              'CP_STA_CS_NBR', 'CP_STA_CWF_GSE', 'CP_STA_CWF_HBR_ISR2',
              'CP_STA_CWF_NBR_ISR2', 'CP_STA_PSD', 'CP_WBD_WAVEFORM',
              'CP_WHI_ELECTRON_DENSITY', 'CP_WHI_NATURAL', 'JP_AUX_PMP', 'JP_AUX_PSE'

            Default: ['CP_CIS-CODIF_HS_H1_MOMENTS']

        downloadonly: bool
            If true, do not use cdf_to_tplot.
            Default: False

        time_clip: bool
            If true, apply time clip to data.
            Default: False

        suffix: str
            The tplot variable names will be given this suffix.
            Default: ''

        get_support_data: bool
            If True, data with an attribute "VAR_TYPE" with a value of "support_data"
            will be loaded into tplot.
            Default: False

        varformat : str
            The file variable formats to load into tplot.  Wildcard character
            "*" is accepted.
            Default: None (all variables will be loaded)

        varnames: str or list of str
            Load these variables only. If [] or ['*'], then load everything.
            Default: []

        notplot: bool
            If True, then data are returned in a hash table instead of
            being stored in tplot variables (useful for debugging, and
            access to multi-dimensional data products)
            Default: False

    Returns
    -------
        list of str
            List of tplot variables created (unless notplot keyword is used).

    Examples
    --------

    >>> import pyspedas
    >>> from pyspedas import tplot
    >>> fgm_vars = pyspedas.projects.cluster.load_csa(trange=['2008-11-01','2008-11-02'],datatypes=['CP_FGM_FULL'])
    >>> tplot(['B_vec_xyz_gse__C1_CP_FGM_FULL','B_mag__C1_CP_FGM_FULL'])
    """
    # Empty output in case of errors.
    tvars = []

    if prefix is None:
        prefix = ''
    if suffix is None:
        suffix = ''

    # Start and end dates
    start_date = cl_format_time(trange[0])
    end_date = cl_format_time(trange[1])

    # Delivery format
    delivery_format = 'CDF_ISTP'
    # Delivery interval
    delivery_interval = 'ALL'

    if not probes:
        return tvars

    if not datatypes:
        return tvars

    if not isinstance(probes, list):
        probes = [probes]

    if not isinstance(datatypes, list):
        datatypes = [datatypes]

    # TODO: Create a function that can resolve wildcards
    # similar to IDL spedas ssl_check_valid_name
    # my_datatypes=ssl_check_valid_name(uc_datatypes,master_datatypes)
    # my_probes=ssl_check_valid_name(uc_probes,master_probes)
    if probes[0] == '*':  # load all probes
        probes = cl_master_probes()

    # Construct the query string
    base_url = 'https://csa.esac.esa.int/csa-sl-tap/data?'
    query_string = ('retrieval_type=PRODUCT&START_DATE=' + start_date +
                    '&END_DATE=' + end_date +
                    '&DELIVERY_FORMAT=' + delivery_format +
                    '&DELIVERY_INTERVAL=' + delivery_interval +
                    '&NON_BROWSER')

    for p in probes:
        for d in datatypes:
            query_string += '&DATASET_ID=' + p + '_' + d

    # Encode the url urllib.parse.quote
    url = base_url + (query_string)

    local_path = CONFIG['local_data_dir'] # could be URI
    if is_fsspec_uri(local_path):
        local_protocol, lpath = local_path.split("://")
        local_fs = fsspec.filesystem(local_protocol, anon=False)

        out_gz = '/'.join([local_path, 'temp_cluster_file.tar.gz'])  # Temp file name
        fileobj = local_fs.open(out_gz, 'wb')
    else:
        Path(local_path).mkdir(parents=True, exist_ok=True)
        out_gz = os.path.join(local_path, 'temp_cluster_file.tar.gz')  # Temp file name
        fileobj = open(out_gz, 'wb')

    # Download the file.
    logging.info("Downloading Cluster data, please wait....")
    try:
        r = requests.get(url, allow_redirects=True)
        r.raise_for_status()
    except requests.exceptions.HTTPError as err:
        logging.error("Download HTTP error: " + str(err))
        return tvars
    except requests.exceptions.RequestException as e:
        logging.error("Download error: " + str(e))
        return tvars
    logging.info("Download complete.")

    # Open the downloaded file.
    with fileobj as w:
        w.write(r.content)

    # Extract the tar archive.
    if is_fsspec_uri(out_gz):
        # Cloud-Awareness: Opens byte stream for tarfile package.
        bo = local_fs.open(out_gz, "rb")
        tar = tarfile.open(fileobj=bo)
    else:
        tar = tarfile.open(out_gz, "r:gz")
    f = tar.getnames()

    for member in tar.getmembers():
        if member.isfile():
            p = '/'.join([local_path, member.path])
            if is_fsspec_uri(p):
                membo = local_fs.open(p, "wb")
            else:
                os.makedirs(str(Path(p).parent), exist_ok=True)
                membo = open(p, "wb")

            # Python > 3.9 requirement from setup.py
            # note: data is written after file is read into memory
            # https://stackoverflow.com/a/62247729
            with tar.extractfile(member.path) as tarbo:
                membo.write(tarbo.read())

            membo.close()
    tar.close()
    # Remove the tar.gz file but keep the extracted.
    if is_fsspec_uri(out_gz):
        local_fs.delete(out_gz)
    else:
        os.remove(out_gz)

    # Get unique set of files.
    f_set = set(f)
    # File list with full path.
    sep = "/" if is_fsspec_uri(local_path) else os.path.sep
    out_files = [sep.join([local_path, s]) for s in list(f_set)]
    out_files = sorted(out_files)

    if downloadonly:
        return out_files

    # Load data into tplot
    tvars = cdf_to_tplot(out_files,
                         prefix=prefix,
                         suffix=suffix,
                         get_support_data=get_support_data,
                         varformat=varformat,
                         varnames=varnames,
                         notplot=notplot)

    if notplot:
        return tvars

    if time_clip:
        for new_var in tvars:
            tclip(new_var, trange[0], trange[1], suffix='')

    return tvars