Source code for stglib.rsk.rsk2cdf

import sqlite3
import warnings

import numpy as np
import pandas as pd
import xarray as xr

from ..core import utils


[docs]def rsk_to_cdf(metadata): """ Main function to load data from RSK file and save to raw .CDF """ warnings.warn( "The use of rsk_to_cdf is deprecated. Use csv_to_cdf instead. Refer to the stglib documentation for more details.", DeprecationWarning, stacklevel=2, ) ds = rsk_to_xr(metadata) print("Writing to raw netCDF") ds.to_netcdf(ds.attrs["filename"] + "-raw.cdf") print("Done") return ds
def init_connection(rskfile): """Initialize an sqlite3 connection and return a cursor""" conn = sqlite3.connect(rskfile) return conn.cursor() def rsk_to_xr(metadata): """ Load data from RSK file and generate an xarray Dataset """ rskfile = metadata.pop("basefile") + ".rsk" ds = xr.Dataset() ds = utils.write_metadata(ds, metadata) print( ("Loading from sqlite file %s; " "this may take a while for large datasets") % rskfile ) # Assume RBRvirtuoso in burst mode if no attrs if "instrument_type" not in ds.attrs: (d, ds) = read_virtuoso_burst(rskfile, ds) # Else, check for duo or virtuoso, duo, and recording mode elif ds.attrs["instrument_type"] == "rbr_duo": if ds.attrs["recording_type"] == "continuous": # Continuous (d, d2, ds) = read_duo_continuous(rskfile, ds) elif ds.attrs["recording_type"] == "burst": # Burst (d, d2, ds) = read_duo_burst(rskfile, ds) else: raise ValueError( "recording_type in config file, {:s}, is invalid".format( ds.attrs["recording_type"] ) ) elif ds.attrs["instrument_type"] == "rbr_virtuoso": if ds.attrs["recording_type"] == "continuous": # Continuous (d, ds) = read_virtuoso_continuous(rskfile, ds) elif ds.attrs["recording_type"] == "burst": # Burst (d, ds) = read_virtuoso_burst(rskfile, ds) else: raise ValueError( "recording_type in config file, {:s}, is invalid".format( ds.attrs["recording_type"] ) ) else: raise ValueError( "instrument_type in config file, {:s}, is invalid".format( ds.attrs["instrument_type"] ) ) samplingcount = ds.attrs["samples_per_burst"] # Pressure a = {} a["unixtime"] = d[:, 0].copy() a["pres"] = d[:, 1].copy() # sort by time (not sorted for some reason) sort = np.argsort(a["unixtime"]) a["unixtime"] = a["unixtime"][sort] a["pres"] = a["pres"][sort] # get indices that end at the end of the final burst datlength = a["unixtime"].shape[0] - a["unixtime"].shape[0] % samplingcount # reshape for k in a: a[k] = a[k][:datlength].reshape((int(datlength / samplingcount), samplingcount)) # If duo, also process temperature if ("instrument_type" in ds.attrs) and (ds.attrs["instrument_type"] == "rbr_duo"): t = {} t["unixtime"] = d2[:, 0].copy() t["temp"] = d2[:, 1].copy() # sort by time (not sorted for some reason) sort = np.argsort(t["unixtime"]) t["unixtime"] = t["unixtime"][sort] t["temp"] = t["temp"][sort] # get indices that end at the end of the final burst datlength = t["unixtime"].shape[0] - t["unixtime"].shape[0] % samplingcount # reshape for k in t: t[k] = t[k][:datlength].reshape( (int(datlength / samplingcount), samplingcount) ) times = pd.to_datetime(a["unixtime"][:, 0], unit="ms") samples = np.arange(samplingcount) ds["P_1"] = xr.DataArray( a["pres"], coords=[times, samples], dims=("time", "sample"), name="Pressure", attrs={ "long_name": "Uncorrected pressure", "units": "dbar", "epic_code": 1, "height_depth_units": "m", "initial_instrument_height": ds.attrs["initial_instrument_height"], "serial_number": ds.attrs["serial_number"], }, ) # If duo, also save temp if ("instrument_type" in ds.attrs) and (ds.attrs["instrument_type"] == "rbr_duo"): ds["T_28"] = xr.DataArray( t["temp"], coords=[times, samples], dims=("time", "sample"), name="Temperature", attrs={ "units": "degree_C", "long_name": "Temperature", "epic_code": 28, "serial_number": ds.attrs["serial_number"], }, ) ds["time"] = xr.DataArray(times, dims=("time"), name="time") ds["sample"] = xr.DataArray(samples, dims=("sample"), name="sample") ds["latitude"] = xr.DataArray( [ds.attrs["latitude"]], dims="latitude", attrs={"units": "degree_north", "standard_name": "latitude", "axis": "Y"}, ) ds["longitude"] = xr.DataArray( [ds.attrs["longitude"]], dims="longitude", attrs={"units": "degree_east", "standard_name": "longitude", "axis": "X"}, ) # need to add time attrs after DataArrays have been combined into Dataset ds["time"].attrs.update( {"standard_name": "time", "axis": "T", "long_name": "time (UTC)"} ) return ds def read_virtuoso_burst(rskfile, ds): conn = init_connection(rskfile) try: conn.execute( "SELECT tstamp, channel01 FROM burstdata" ) # sometimes maybe is case sensitive? except sqlite3.OperationalError: conn.execute("SELECT tstamp, channel01 FROM burstData") # note capital "D" data = conn.fetchall() print("Done fetching pressure data") d = np.asarray(data) # Read sampling meta info ds = utils.read_samplingrates_burst(ds, conn) # Get instr meta ds.attrs["serial_number"] = str( conn.execute("select serialID from instruments").fetchall()[0][0] ) ds.attrs["INST_TYPE"] = "RBR Virtuoso d|wave" conn.close() return (d, ds) def read_virtuoso_continuous(rskfile, ds): # UNTESTED conn = init_connection(rskfile) conn.execute("SELECT tstamp, channel01 FROM data") data = conn.fetchall() print("Done fetching pressure data") d = np.asarray(data) # Read sampling meta info ds = utils.read_samplingrates_continuous(ds, conn) # Get meta ds.attrs["serial_number"] = str( conn.execute("select serialID from instruments").fetchall()[0][0] ) ds.attrs["INST_TYPE"] = "RBR Virtuoso d|wave" conn.close() return (d, ds) def read_duo_continuous(rskfile, ds): conn = init_connection(rskfile) # First load in pressure conn.execute("SELECT tstamp, channel02 FROM data") data = conn.fetchall() print("Done fetching pressure data") d = np.asarray(data) # Second load in temprature conn.execute("SELECT tstamp, channel01 FROM data") data = conn.fetchall() print("Done fetching temperature data") t = np.asarray(data) # Read sampling meta info ds = utils.read_samplingrates_continuous(ds, conn) # Get instr meta ds.attrs["serial_number"] = str( conn.execute("select serialID from instruments").fetchall()[0][0] ) ds.attrs["INST_TYPE"] = "RBR Duo d|wave" conn.close() return (d, t, ds) def read_duo_burst(rskfile, ds): conn = init_connection(rskfile) # First pressure conn.execute("SELECT tstamp, channel02 FROM burstdata") data = conn.fetchall() print("Done fetching pressure data") d = np.asarray(data) # Second load in temprature conn.execute("SELECT tstamp, channel01 FROM data") data = conn.fetchall() print("Done fetching temperature data") t = np.asarray(data) # Read sampling meta info ds = utils.read_samplingrates_burst(ds, conn) # Get instr meta ds.attrs["serial_number"] = str( conn.execute("select serialID from instruments").fetchall()[0][0] ) ds.attrs["INST_TYPE"] = "RBR Virtuoso d|wave" conn.close() return (d, t, ds) # # TODO: add the following?? # # {'positive','down'; # # 'long_name', 'Depth'; # # 'axis','z'; # # 'units', 'm'; # # 'epic_code', 3}; # # Pressid = rg.createVariable('Pressure', 'f', ('time','sample',), # Pressid.units = 'dbar' # Pressid.long_name = 'Pressure (dbar)' # Pressid.generic_name = 'press' # Pressid.note = 'raw pressure from instrument, not corrected for... # Pressid.epic_code = 1 # Pressid.height_depth_units = 'm'