import numpy as np
import pandas as pd
import xarray as xr
from ..core import utils
from . import aqdutils
[docs]def wad_to_cdf(metadata, writefile=True):
"""Load Aquadopp waves data and create raw netCDF file
Parameters
----------
metadata : dict
Dictionary of required metadata
writefile : bool, optional
Flag to write raw .cdf file. Default True
Returns
-------
xarray.Dataset
Raw waves data in an xarray Dataset
"""
basefile = metadata["basefile"]
# get instrument metadata from the HDR file
instmeta = aqdutils.read_aqd_hdr(basefile)
metadata["instmeta"] = instmeta
ds = load_whd(metadata)
# write out metadata first, then deal exclusively with xarray attrs
ds = utils.write_metadata(ds, metadata)
del metadata
del instmeta
ds = utils.ensure_cf(ds)
ds = load_wad(ds)
# Deal with metadata peculiarities
ds = aqdutils.check_attrs(ds, waves=True)
ds.attrs["center_first_bin"] = ds["cellpos"][0].values
ds = aqdutils.create_bindist(ds, waves=True)
# Compute time stamps
fs = float(ds.attrs["WaveSampleRate"].split()[0])
ds.attrs["sample_interval"] = 1 / fs
ds.attrs["samples_per_burst"] = ds.attrs["WaveNumberOfSamples"]
ds = utils.ds_coord_no_fillvalue(ds)
ds = aqdutils.update_attrs(ds, waves=True)
# need to drop datetime
ds = ds.drop_vars("datetime")
if writefile:
cdf_filename = ds.attrs["filename"] + "wv-raw.cdf"
ds.to_netcdf(cdf_filename)
print("Finished writing data to %s" % cdf_filename)
return ds
def load_whd(metadata):
"""Load data from .whd file
Parameters
----------
metadata : dict
Dictionary of required metadata
Returns
-------
xarray.Dataset
Data from .whd file as an xarray Dataset
"""
whdfile = metadata["basefile"] + ".whd"
WHD = pd.read_csv(
whdfile,
header=None,
sep="\s+",
parse_dates={"datetime": [2, 0, 1, 3, 4, 5]},
date_format="%Y %m %d %H %M %S",
usecols=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20],
)
# rename columns from numeric to human-readable
WHD.rename(
columns={
6: "burst",
7: "nrecs",
8: "cellpos",
9: "Battery",
10: "soundspeed",
11: "Heading",
12: "Pitch",
13: "Roll",
14: "minpressure",
16: "Temperature",
17: "cellsize",
18: "avgamp1",
19: "avgamp2",
20: "avgamp3",
},
inplace=True,
)
ds = xr.Dataset.from_dataframe(WHD)
ds = ds.rename({"index": "time"})
ds["time"] = ds["datetime"]
ds = ds.drop_vars(["minpressure", "cellsize", "nrecs", "soundspeed"])
return ds
def load_wad(ds):
"""Load data from .wad file and add to existing xarray Dataset
Parameters
----------
ds : xarray.Dataset
Existing xarray Dataset
Returns
-------
xarray.Dataset
xarray Dataset including data from .wad file
"""
wadfile = ds.attrs["basefile"] + ".wad"
print("Loading wave data from " + wadfile + "; this may take some time")
# pd.read_csv is ~10x faster than np.loadtxt or np.genfromtxt
WAD = pd.read_csv(wadfile, header=None, sep="\s+").values
r, c = np.shape(WAD)
print(wadfile + " has " + str(r) + " rows and " + str(c) + " columns")
if (
"num_wave_bursts" in ds.attrs
): # we can override the number of samples if need be
print(
"Overriding number of samples using attr num_wave_bursts of {}".format(
ds.attrs["num_wave_bursts"]
)
)
ds = ds.sel(time=ds.time[0 : ds.attrs["num_wave_bursts"]])
nburst = ds.attrs["num_wave_bursts"]
elif r % ds.attrs["WaveNumberOfSamples"]:
print(
"Number of rows read is not a multiple of %d. Truncating data "
"to last full burst" % ds.attrs["WaveNumberOfSamples"]
)
ds = ds.sel(
time=ds.time[0 : int(np.floor(r / ds.attrs["WaveNumberOfSamples"]))]
)
nburst = int(np.floor(r / ds.attrs["WaveNumberOfSamples"]))
else:
nburst = int(np.floor(r / ds.attrs["WaveNumberOfSamples"]))
nsamps = int(nburst * ds.attrs["WaveNumberOfSamples"])
wavensamps = int(ds.attrs["WaveNumberOfSamples"])
print(
"Metadata reports "
+ str(nburst)
+ " bursts, "
+ str(nsamps)
+ " samples, "
+ str(wavensamps)
+ " samples per burst"
)
ds["sample"] = xr.DataArray(range(wavensamps), dims="sample")
if ds.attrs["AQDCoordinateSystem"] == "BEAM":
thevars = ["Pressure", "VEL1", "VEL2", "VEL3", "AMP1", "AMP2", "AMP3"]
elif ds.attrs["AQDCoordinateSystem"] == "ENU":
thevars = ["Pressure", "U", "V", "W", "AMP1", "AMP2", "AMP3"]
elif ds.attrs["AQDCoordinateSystem"] == "ENU":
thevars = ["Pressure", "X", "Y", "Z", "AMP1", "AMP2", "AMP3"]
thecols = [2, 5, 6, 7, 9, 10, 11]
for var, n in zip(thevars, thecols):
ds[var] = xr.DataArray(
np.reshape(WAD[0:nsamps, n], (nburst, wavensamps)), dims=("time", "sample")
)
print("Done loading " + wadfile)
return ds