Source code for stglib.eco

import numpy as np
import pandas as pd
import xarray as xr

from .core import qaqc, utils


[docs]def read_par(filnam, spb=False, skiprows=None, skipfooter=0):
    """Read data from a WET Labs PAR csv file into an xarray
    Dataset.

    Parameters
    ----------
    filnam : string
        The filename
    spb: bool, optional
        Samples per burst if using burst sampling
    skiprows : int, optional
        How many header rows to skip. Default None
    skipfooter : into, optional
        How many footer rows to skip. Default 0

    Returns
    -------
    xarray.Dataset
        An xarray Dataset of the PAR data
    """

    names = ["date", "time", "counts"]

    par = read_eco_csv(filnam, names, skiprows=skiprows, skipfooter=skipfooter)

    return eco_pd_to_xr(par, spb=spb)


[docs]def read_ntu(filnam, spb=False, skiprows=None, skipfooter=0):
    """Read data from a WET Labs NTU csv file into an xarray
    Dataset.

    Parameters
    ----------
    filnam : string
        The filename
    spb: bool, optional
        Samples per burst if using burst sampling
    skiprows : int, optional
        How many header rows to skip. Default None
    skipfooter : into, optional
        How many footer rows to skip. Default 0

    Returns
    -------
    xarray.Dataset
        An xarray Dataset of the PAR data
    """

    names = ["date", "time", "a", "counts", "b"]

    ntu = read_eco_csv(filnam, names, skiprows=skiprows, skipfooter=skipfooter)

    return eco_pd_to_xr(ntu, spb=spb)


def read_eco_csv(filnam, names, skiprows=None, skipfooter=0):
    return pd.read_csv(
        filnam,
        sep="\t",
        names=names,
        parse_dates=[["date", "time"]],
        engine="python",
        skiprows=skiprows,
        skipfooter=skipfooter,
    )


def eco_pd_to_xr(df, spb=False):
    if spb:
        # get middle time
        times = df["date_time"].values.reshape((-1, spb))[:, int(spb / 2)]
        counts = df["counts"].values.reshape((-1, spb))
        sample = range(spb)

        ds = xr.Dataset(
            {
                "time": ("time", times),
                "counts": (["time", "sample"], counts),
                "sample": ("sample", sample),
            }
        )
    else:
        times = df["date_time"]
        counts = df["counts"]

        ds = xr.Dataset({"time": ("time", times), "counts": ("time", counts)})

    return ds


def csv_to_cdf(metadata):
    """
    Process ECO .csv file to a raw .cdf file
    """

    basefile = metadata["basefile"]

    if "INST_TYPE" in metadata:
        metadata["instrument_type"] = metadata.pop("INST_TYPE")

    if "par" in metadata["instrument_type"].lower():
        f = read_par
    elif "ntu" in metadata["instrument_type"].lower():
        f = read_ntu
    kwargs = {
        "spb": metadata["spb"],
        "skiprows": metadata["skiprows"],
        "skipfooter": metadata["skipfooter"],
    }
    try:
        ds = f(basefile, **kwargs)
    except UnicodeDecodeError:
        # try reading as Mac OS Western for old versions of Mac Excel
        ds = f(basefile, encoding="mac-roman", **kwargs)

    metadata.pop("skiprows")
    metadata.pop("skipfooter")

    # write out metadata first, then deal exclusively with xarray attrs
    ds = utils.write_metadata(ds, metadata)

    del metadata

    ds = utils.ensure_cf(ds)

    # configure file
    cdf_filename = ds.attrs["filename"] + "-raw.cdf"

    ds.to_netcdf(cdf_filename, unlimited_dims=["time"])

    print("Finished writing data to %s" % cdf_filename)

    return ds


def cdf_to_nc(cdf_filename):
    """
    Load a raw .cdf file and generate a processed .nc file
    """

    # Load raw .cdf data
    ds = xr.open_dataset(cdf_filename)

    # definition of PAR is
    # PAR = Im * 10 ^ ((x-a0)/a1)
    # Where
    # Im is the immersion coefficient
    # a1 is the scaling factor
    # a0 is the voltage offset, typically 0
    # x is the voltage
    # The manufacturer calculates PAR in units of μmol photons/m2/s1
    # from Sea-Bird Scientific, ECO PAR User Manual
    # Document No. par170706, 2017-07-06, Version B
    # https://www.seabird.com/asset-get.download.jsa?id=54627862518

    if "par" in ds.attrs["instrument_type"].lower():
        ds["PAR_905"] = ds.attrs["Im"] * 10 ** (
            (ds["counts"].mean(dim="sample") - ds.attrs["a0"]) / ds.attrs["a1"]
        )
        ds["PAR_905"].attrs["units"] = "umol m-2 s-1"
        ds["PAR_905"].attrs["long_name"] = "Photosynthetically active " "radiation"

    if "ntu" in ds.attrs["instrument_type"].lower():
        if "user_ntucal_coeffs" in ds.attrs:
            ds["Turb"] = xr.DataArray(
                np.polyval(ds.attrs["user_ntucal_coeffs"], ds["counts"]),
                dims=["time", "sample"],
            ).mean(dim="sample")
            ds["Turb"].attrs["units"] = "1"
            ds["Turb"].attrs["long_name"] = "Turbidity (NTU)"
            ds["Turb"].attrs["standard_name"] = "sea_water_turbidity"
            ds["Turb"].attrs["comments"] = "Nephelometric turbidity units (NTU)"
            ds["Turb_std"] = xr.DataArray(
                np.polyval(ds.attrs["user_ntucal_coeffs"], ds["counts"]),
                dims=["time", "sample"],
            ).std(dim="sample")
            ds["Turb_std"].attrs["units"] = "1"
            ds["Turb_std"].attrs[
                "long_name"
            ] = "Turbidity burst standard deviation (NTU)"
            ds["Turb_std"].attrs["standard_name"] = "sea_water_turbidity"
            ds["Turb_std"].attrs["comments"] = "Nephelometric turbidity units (NTU)"
            ds["Turb_std"].attrs["cell_methods"] = "time: standard_deviation"

    ds = ds.drop(["counts", "sample"])

    # Clip data to in/out water times or via good_ens
    ds = utils.clip_ds(ds)

    ds = eco_qaqc(ds)

    # assign min/max:
    ds = utils.add_min_max(ds)

    ds = utils.add_start_stop_time(ds)

    ds = utils.add_delta_t(ds)

    # add lat/lon coordinates
    ds = utils.ds_add_lat_lon(ds)

    ds = ds_add_attrs(ds)

    ds = utils.create_z(ds)

    ds = utils.ds_coord_no_fillvalue(ds)

    # add lat/lon coordinates to each variable
    # for var in ds.variables:
    #     if (var not in ds.coords) and ("time" not in var):
    #         # ds = utils.add_lat_lon(ds, var)
    #         # ds = utils.no_p_add_depth(ds, var)
    #         ds = utils.add_z_if_no_pressure(ds, var)
    #         # cast as float32
    #         # ds = utils.set_var_dtype(ds, var)

    # Write to .nc file
    print("Writing cleaned/trimmed data to .nc file")
    nc_filename = ds.attrs["filename"] + "-a.nc"

    ds.to_netcdf(
        nc_filename, unlimited_dims=["time"], encoding={"time": {"dtype": "i4"}}
    )
    utils.check_compliance(nc_filename, conventions=ds.attrs["Conventions"])
    print("Done writing netCDF file", nc_filename)


def ds_add_attrs(ds):
    # Update attributes for EPIC and STG compliance
    ds = utils.ds_coord_no_fillvalue(ds)

    ds["time"].attrs.update(
        {"standard_name": "time", "axis": "T", "long_name": "time (UTC)"}
    )

    """
    def add_attributes(var, dsattrs):
        var.attrs.update(
            {
                "initial_instrument_height": dsattrs["initial_instrument_height"],
                # 'nominal_instrument_depth': dsattrs['nominal_instrument_depth'],
                "height_depth_units": "m",
            }
        )

    #for var in ds.variables:
    #    if (var not in ds.coords) and ("time" not in var):
    #        add_attributes(ds[var], ds.attrs)
    """
    return ds


def eco_qaqc(ds):
    # QA/QC ECO data
    if "ntu" in ds.attrs["instrument_type"].lower():
        for var in ["Turb"]:
            ds = qaqc.trim_min_diff(ds, var)

            ds = qaqc.trim_min_diff_pct(ds, var)

            ds = qaqc.trim_max_diff(ds, var)

            ds = qaqc.trim_max_diff_pct(ds, var)

            ds = qaqc.trim_med_diff(ds, var)

            ds = qaqc.trim_med_diff_pct(ds, var)

            ds = qaqc.trim_maxabs_diff_2d(ds, var)

            ds = qaqc.trim_maxabs_diff(ds, var)

            ds = qaqc.trim_max_std(ds, var)

            ds = qaqc.trim_min(ds, var)

            ds = qaqc.trim_max(ds, var)

            ds = qaqc.trim_bad_ens(ds, var)

            ds = qaqc.trim_std_ratio(ds, var)

        # after check for masking vars by others
        for var in ["Turb", "Turb_std"]:
            ds = qaqc.trim_mask(ds, var)

    return ds