Source code for stglib.rsk.rsk2cdf

import sqlite3
import warnings

import numpy as np
import pandas as pd
import xarray as xr

from ..core import utils


[docs]def rsk_to_cdf(metadata):
    """
    Main function to load data from RSK file and save to raw .CDF
    """

    warnings.warn(
        "The use of rsk_to_cdf is deprecated. Use csv_to_cdf instead. Refer to the stglib documentation for more details.",
        DeprecationWarning,
        stacklevel=2,
    )

    ds = rsk_to_xr(metadata)

    print("Writing to raw netCDF")

    ds.to_netcdf(ds.attrs["filename"] + "-raw.cdf")

    print("Done")

    return ds


def init_connection(rskfile):
    """Initialize an sqlite3 connection and return a cursor"""

    conn = sqlite3.connect(rskfile)
    return conn.cursor()


def rsk_to_xr(metadata):
    """
    Load data from RSK file and generate an xarray Dataset
    """

    rskfile = metadata.pop("basefile") + ".rsk"

    ds = xr.Dataset()

    ds = utils.write_metadata(ds, metadata)

    print(
        ("Loading from sqlite file %s; " "this may take a while for large datasets")
        % rskfile
    )

    # Assume RBRvirtuoso in burst mode if no attrs
    if "instrument_type" not in ds.attrs:
        (d, ds) = read_virtuoso_burst(rskfile, ds)
    # Else, check for duo or virtuoso, duo, and recording mode
    elif ds.attrs["instrument_type"] == "rbr_duo":
        if ds.attrs["recording_type"] == "continuous":
            # Continuous
            (d, d2, ds) = read_duo_continuous(rskfile, ds)
        elif ds.attrs["recording_type"] == "burst":
            # Burst
            (d, d2, ds) = read_duo_burst(rskfile, ds)
        else:
            raise ValueError(
                "recording_type in config file, {:s}, is invalid".format(
                    ds.attrs["recording_type"]
                )
            )
    elif ds.attrs["instrument_type"] == "rbr_virtuoso":
        if ds.attrs["recording_type"] == "continuous":
            # Continuous
            (d, ds) = read_virtuoso_continuous(rskfile, ds)
        elif ds.attrs["recording_type"] == "burst":
            # Burst
            (d, ds) = read_virtuoso_burst(rskfile, ds)
        else:
            raise ValueError(
                "recording_type in config file, {:s}, is invalid".format(
                    ds.attrs["recording_type"]
                )
            )
    else:
        raise ValueError(
            "instrument_type in config file, {:s}, is invalid".format(
                ds.attrs["instrument_type"]
            )
        )

    samplingcount = ds.attrs["samples_per_burst"]

    # Pressure
    a = {}
    a["unixtime"] = d[:, 0].copy()
    a["pres"] = d[:, 1].copy()
    # sort by time (not sorted for some reason)
    sort = np.argsort(a["unixtime"])
    a["unixtime"] = a["unixtime"][sort]
    a["pres"] = a["pres"][sort]

    # get indices that end at the end of the final burst
    datlength = a["unixtime"].shape[0] - a["unixtime"].shape[0] % samplingcount

    # reshape
    for k in a:
        a[k] = a[k][:datlength].reshape((int(datlength / samplingcount), samplingcount))

    # If duo, also process temperature
    if ("instrument_type" in ds.attrs) and (ds.attrs["instrument_type"] == "rbr_duo"):
        t = {}
        t["unixtime"] = d2[:, 0].copy()
        t["temp"] = d2[:, 1].copy()
        # sort by time (not sorted for some reason)
        sort = np.argsort(t["unixtime"])
        t["unixtime"] = t["unixtime"][sort]
        t["temp"] = t["temp"][sort]

        # get indices that end at the end of the final burst
        datlength = t["unixtime"].shape[0] - t["unixtime"].shape[0] % samplingcount

        # reshape
        for k in t:
            t[k] = t[k][:datlength].reshape(
                (int(datlength / samplingcount), samplingcount)
            )

    times = pd.to_datetime(a["unixtime"][:, 0], unit="ms")
    samples = np.arange(samplingcount)

    ds["P_1"] = xr.DataArray(
        a["pres"],
        coords=[times, samples],
        dims=("time", "sample"),
        name="Pressure",
        attrs={
            "long_name": "Uncorrected pressure",
            "units": "dbar",
            "epic_code": 1,
            "height_depth_units": "m",
            "initial_instrument_height": ds.attrs["initial_instrument_height"],
            "serial_number": ds.attrs["serial_number"],
        },
    )

    # If duo, also save temp
    if ("instrument_type" in ds.attrs) and (ds.attrs["instrument_type"] == "rbr_duo"):
        ds["T_28"] = xr.DataArray(
            t["temp"],
            coords=[times, samples],
            dims=("time", "sample"),
            name="Temperature",
            attrs={
                "units": "degree_C",
                "long_name": "Temperature",
                "epic_code": 28,
                "serial_number": ds.attrs["serial_number"],
            },
        )

    ds["time"] = xr.DataArray(times, dims=("time"), name="time")

    ds["sample"] = xr.DataArray(samples, dims=("sample"), name="sample")

    ds["latitude"] = xr.DataArray(
        [ds.attrs["latitude"]],
        dims="latitude",
        attrs={"units": "degree_north", "standard_name": "latitude", "axis": "Y"},
    )

    ds["longitude"] = xr.DataArray(
        [ds.attrs["longitude"]],
        dims="longitude",
        attrs={"units": "degree_east", "standard_name": "longitude", "axis": "X"},
    )

    # need to add  time attrs after DataArrays have been combined into Dataset
    ds["time"].attrs.update(
        {"standard_name": "time", "axis": "T", "long_name": "time (UTC)"}
    )

    return ds


def read_virtuoso_burst(rskfile, ds):
    conn = init_connection(rskfile)

    try:
        conn.execute(
            "SELECT tstamp, channel01 FROM burstdata"
        )  # sometimes maybe is case sensitive?
    except sqlite3.OperationalError:
        conn.execute("SELECT tstamp, channel01 FROM burstData")  # note capital "D"
    data = conn.fetchall()
    print("Done fetching pressure data")
    d = np.asarray(data)

    # Read sampling meta info
    ds = utils.read_samplingrates_burst(ds, conn)

    # Get instr meta
    ds.attrs["serial_number"] = str(
        conn.execute("select serialID from instruments").fetchall()[0][0]
    )
    ds.attrs["INST_TYPE"] = "RBR Virtuoso d|wave"

    conn.close()

    return (d, ds)


def read_virtuoso_continuous(rskfile, ds):  # UNTESTED
    conn = init_connection(rskfile)

    conn.execute("SELECT tstamp, channel01 FROM data")
    data = conn.fetchall()
    print("Done fetching pressure data")
    d = np.asarray(data)

    # Read sampling meta info
    ds = utils.read_samplingrates_continuous(ds, conn)

    # Get meta
    ds.attrs["serial_number"] = str(
        conn.execute("select serialID from instruments").fetchall()[0][0]
    )
    ds.attrs["INST_TYPE"] = "RBR Virtuoso d|wave"

    conn.close()

    return (d, ds)


def read_duo_continuous(rskfile, ds):
    conn = init_connection(rskfile)

    # First load in pressure
    conn.execute("SELECT tstamp, channel02 FROM data")
    data = conn.fetchall()
    print("Done fetching pressure data")
    d = np.asarray(data)

    # Second load in temprature
    conn.execute("SELECT tstamp, channel01 FROM data")
    data = conn.fetchall()
    print("Done fetching temperature data")
    t = np.asarray(data)

    # Read sampling meta info
    ds = utils.read_samplingrates_continuous(ds, conn)

    # Get instr meta
    ds.attrs["serial_number"] = str(
        conn.execute("select serialID from instruments").fetchall()[0][0]
    )
    ds.attrs["INST_TYPE"] = "RBR Duo d|wave"

    conn.close()

    return (d, t, ds)


def read_duo_burst(rskfile, ds):
    conn = init_connection(rskfile)

    # First pressure
    conn.execute("SELECT tstamp, channel02 FROM burstdata")
    data = conn.fetchall()
    print("Done fetching pressure data")
    d = np.asarray(data)

    # Second load in temprature
    conn.execute("SELECT tstamp, channel01 FROM data")
    data = conn.fetchall()
    print("Done fetching temperature data")
    t = np.asarray(data)

    # Read sampling meta info
    ds = utils.read_samplingrates_burst(ds, conn)

    # Get instr meta
    ds.attrs["serial_number"] = str(
        conn.execute("select serialID from instruments").fetchall()[0][0]
    )
    ds.attrs["INST_TYPE"] = "RBR Virtuoso d|wave"

    conn.close()

    return (d, t, ds)

    # # TODO: add the following??
    # # {'positive','down';
    # #                'long_name', 'Depth';
    # #                'axis','z';
    # #                'units', 'm';
    # #                'epic_code', 3};
    #
    # Pressid = rg.createVariable('Pressure', 'f', ('time','sample',),
    # Pressid.units = 'dbar'
    # Pressid.long_name = 'Pressure (dbar)'
    # Pressid.generic_name = 'press'
    # Pressid.note = 'raw pressure from instrument, not corrected for...
    # Pressid.epic_code = 1
    # Pressid.height_depth_units = 'm'