Source code for pyspedas.projects.kyoto.load_dst

import logging
import re
from pyspedas.tplot_tools import time_double, store_data, options, time_clip as tclip
from pyspedas import download, dailynames
from .kyoto_config import CONFIG


def parse_dst_html(html_text, year=None, month=None):
    """
    Parses the HTML content to extract relevant information.

    Parameters
    ----------
    html_text : str
        The HTML content to parse.
    year : int, optional
        The year to consider while parsing the HTML content. If not provided, all years are considered.
    month : int, optional
        The month to consider while parsing the HTML content. If not provided, all months are considered.

    Returns
    -------
    dict
        A dictionary containing the parsed information.
    """
    times = []
    data = []
    # remove all of the HTML before the table
    html_data = html_text[html_text.find("Hourly Equatorial Dst Values") :]
    # remove all of the HTML after the table
    html_data = html_data[: html_data.find("<!-- vvvvv S yyyymm_part3.html vvvvv -->")]
    html_lines = html_data.split("\n")
    data_strs = html_lines[5:]
    # loop over days
    for day_str in data_strs:
        # the first element of hourly_data is the day, the rest are the hourly Dst values
        hourly_data = re.findall(r'[-+]?\d+', day_str)
        ## if the data is not complete for a whole day (which is typically the case for real time data):
        if len(hourly_data[1:]) != 24:
            ## if the data is not completely missing for a whole day:
            if len(hourly_data[1:]) != 3:
                for idx, dst_value in enumerate(hourly_data[1:]):
                    ## The kyoto website uses a 4 digit format.
                    remainder = len(dst_value) % 4
                    ## if the remainder is not zero, it can be either the regular case '-23' or
                    ## the ill case '-159999'. index by 0:remainder gives the correct -23 or -15
                    if remainder > 0:
                        times.append(
                            time_double(
                                year + "-" + month + "-" + hourly_data[0] + "/" + str(idx) + ":30"
                            )
                        )
                        data.append(float(dst_value[0:remainder]))
                    ## if the remainder is zero, it can be either the regular case '-1599999' or
                    ## the ill case '9999...9999' with the number of nine being the multiple of 4.
                    ## we further test if the first four digits are 9999. If not, we simply index by
                    ## [0:4], which gives -159 in the regular case. Else, we ignore missing data.
                    elif dst_value[0:4] != '9999':
                        times.append(
                            time_double(
                                year + "-" + month + "-" + hourly_data[0] + "/" + str(idx) + ":30"
                            )
                        )
                        data.append(float(dst_value[0:4]))
        ## if the data is complete for a whole day.
        else:
            for idx, dst_value in enumerate(hourly_data[1:]):
                times.append(
                    time_double(
                        year + "-" + month + "-" + hourly_data[0] + "/" + str(idx) + ":30"
                    )
                )
                data.append(float(dst_value))

    return (times, data)

[docs] def dst( trange=None, datatypes=["final", "provisional", "realtime"], time_clip=True, remote_data_dir="http://wdc.kugi.kyoto-u.ac.jp/", prefix="", suffix="", no_download=False, local_data_dir="", download_only=False, force_download=False, ): """ Loads Dst index data from the Kyoto servers. Parameters ---------- trange : list of str, required Time range of interest with the format ['YYYY-MM-DD','YYYY-MM-DD'] or to specify more or less than a day ['YYYY-MM-DD/hh:mm:ss','YYYY-MM-DD/hh:mm:ss']. time_clip : bool, optional Time clip the variables to exactly the range specified in the trange keyword. Defaults to True. remote_data_dir : str, optional The remote directory from where to load the Dst index data. Defaults to "http://wdc.kugi.kyoto-u.ac.jp/". suffix : str, optional The tplot variable names will be given this suffix. By default, no suffix is added. force_download: bool Download file even if local version is more recent than server version Default: False Returns ------- list of str List of tplot variables created. Notes ----- There are three types of Dst data available: final, provisional, and realtime. Usually, only one type is available for a particular month. is function tries to download final data, if this is not available then it downloads provisional data, and if this is not available then it downloads realtime data. Examples -------- >>> from pyspedas.projects.kyoto import dst >>> dst_data = dst(trange=['2015-01-01', '2015-01-02']) >>> print(dst_data) kyoto_dst """ vars = [] # list of tplot variables created if trange is None or len(trange) != 2: logging.error("dst: Keyword trange with two datetimes is required to download data.") return vars trange_dbl = time_double(trange) earliest_data = time_double('1957-01-01') if trange_dbl[0] >= trange_dbl[1]: logging.error(f"dst: Invalid time range. End time {trange[1]} must be greater than start time {trange[0]}.") return vars if trange_dbl[1] < earliest_data: logging.error(f"dst: Invalid time range: specified end date {trange[1]} is earlier than 1957-01-01") return vars if local_data_dir == "": local_data_dir = CONFIG["local_data_dir"] if local_data_dir[-1] != "/": local_data_dir += "/" if remote_data_dir == "": remote_data_dir = CONFIG["remote_data_dir_dst"] try: file_names = dailynames(file_format="%Y%m/index.html", trange=trange) except Exception as e: logging.error("Error occurred while getting file names: " + str(e)) return vars # Keep unique files names only file_names = list(set(file_names)) ack = """ ****************************** The DST data are provided by the World Data Center for Geomagnetism, Kyoto, and are not for redistribution (http://wdc.kugi.kyoto-u.ac.jp/). Furthermore, we thank the geomagnetic observatories (Kakioka [JMA], Honolulu and San Juan [USGS], Hermanus [RSA], Alibag [IIG]), NiCT, INTERMAGNET, and many others for their cooperation to make the Dst index available. ****************************** """ times = [] data = [] datatypes = ["final", "provisional", "realtime"] # Final files for datatype in datatypes: for filename in file_names: yyyymm = "" if len(filename) > 6: yyyymm = filename[:6] url = remote_data_dir + "dst_" + datatype + "/" + filename local_path = local_data_dir + "dst_" + datatype + "/" + yyyymm + "/" fname = download( url, local_path=local_path, no_download=no_download, text_only=True, force_download=force_download, ) if download_only: continue # skip to the next file if fname is None or len(fname) < 1: logging.error("Error occurred while downloading: " + url) continue # skip to the next file try: with open(fname[0], "r") as file: html_text = file.read() file_times, file_data = parse_dst_html( html_text, year=filename[:4], month=filename[4:6] ) times.extend(file_times) data.extend(file_data) except Exception as e: logging.error( "Error occurred while parsing " + filename + ": " + str(e) ) continue # skip to the next file # At this point, we have all data for one datatype # If we have data, we can break the loop, otherwise we try other datatypes if len(times) != 0: break if len(times) == 0: logging.error("No data found.") return vars varname = prefix + "kyoto_dst" + suffix store_data(varname, data={"x": times, "y": data}) options(varname, "ytitle", "Dst (" + datatype + ")") vars.append(varname) if time_clip: tclip(varname, trange[0], trange[1], overwrite=True) logging.info(ack) return vars