Commit e1ec0b43 authored by Willi Rath's avatar Willi Rath
Browse files

Add MIMOC_cf

parent 1d4db832
Loading
Loading
Loading
Loading
+24 −0
Original line number Diff line number Diff line
repo_name: MIMOC_cf

people: Willi Rath (<wrath@geomar.de>)

http_path_remote: https://git.geomar.de/data/MIMOC_cf

git_path_remote: git@git.geomar.de:data/MIMOC_cf.git

repo_description: |
     A CF compliant version of MIMOC (See
     <https://www.pmel.noaa.gov/mimoc/index.html> and
     <https://git.geomar.de/data/MIMOC/>.)

prefixes: data doc

pre_processing:

    - GIT_LFS_SKIP_SMUDGE=1 git submodule add git@git.geomar.de:data/MIMOC.git non_cf_data || echo "non-CF submodule exitst"

    - cd _non_cf_data; git pull; git lfs pull; git describe

post_processing:

    - util/make_CF_compliant.sh
+298 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3

"""
Make MIMOC data CF compliant.
"""

import netCDF4 as nc
import numpy as np
import sys


# Constants
FILL_VALUE = np.float32(-1.e+34)

# dict of possible spatial dim names and their target properties
spatial_dims = {}
spatial_dims["LONG"] = {
    "source_var": "LONGITUDE",
    "source_dim": "LONG",
    "target_name": "longitude",
    "nc_atts": {
        "standard_name": "longitude",
        "long_name": "longitude",
        "units": "degrees_east",
        "axis": "X"}
    }
spatial_dims["LAT"] = {
    "source_var": "LATITUDE",
    "source_dim": "LAT",
    "target_name": "latitude",
    "nc_atts": {
        "standard_name": "latitude",
        "long_name": "latitude",
        "units": "degrees_north",
        "axis": "Y"}
    }
spatial_dims["PRES"] = {
    "source_var": "PRESSURE",
    "source_dim": "PRES",
    "target_name": "pressure",
    "nc_atts": {
        "standard_name": "pressure",
        "long_name": "pressure",
        "units": "dbar",
        "axis": "Z",
        "positive": "down"}
    }
spatial_dims["SIG"] = {
    "source_var": "SIGMA_0",
    "source_dim": "SIG",
    "target_name": "sigma0",
    "nc_atts": {
        "standard_name": "sigma0",
        "long_name": "sigma0",
        "units": "kg/m3",
        "axis": "Z",
        "positive": "down"}
    }


# dict of variables and their target properties
variable_properties = {}
variable_properties["PRESSURE"] = {
    "source_name": "PRESSURE",
    "target_name": "pressure",
    "nc_atts": {
        "long_name": "Pressure of sigma0-surface",
        "units": "dbar",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["SUMMED_WEIGHT"] = {
    "source_name": "SUMMED_WEIGHT",
    "target_name": "summed_weight",
    "nc_atts": {
        "long_name": "Sum of raw data weights used for grid point",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["YEAR_OF_DATA"] = {
    "source_name": "YEAR_OF_DATA",
    "target_name": "year_of_data",
    "nc_atts": {
        "long_name": "Mapped year of grid point",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["POTENTIAL_TEMPERATURE"] = {
    "source_name": "POTENTIAL_TEMPERATURE",
    "target_name": "potential_temperature",
    "nc_atts": {
        "long_name": "Potential Temperature (IPTS-90)",
        "units": "degC",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["CONSERVATIVE_TEMPERATURE"] = {
    "source_name": "CONSERVATIVE_TEMPERATURE",
    "target_name": "conservative_temperature",
    "nc_atts": {
        "long_name": "Conservative Temperature (TEOS-10)",
        "units": "degC",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["POTENTIAL_TEMPERATURE_MIXED_LAYER"] = {
    "source_name": "POTENTIAL_TEMPERATURE_MIXED_LAYER",
    "target_name": "potential_temperature_mixed_layer",
    "nc_atts": {
        "long_name": "Mixed-Layer Potential Temperature (IPTS-90)",
        "units": "degC",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["CONSERVATIVE_TEMPERATURE_MIXED_LAYER"] = {
    "source_name": "CONSERVATIVE_TEMPERATURE_MIXED_LAYER",
    "target_name": "conservative_temperature_mixed_layer",
    "nc_atts": {
        "long_name": "Mixed-Layer Conservative Temperature (TEOS-10)",
        "units": "degC",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["ABSOLUTE_SALINITY"] = {
    "source_name": "ABSOLUTE_SALINITY",
    "target_name": "absolute_salinity",
    "nc_atts": {
        "long_name": "Absolute Salinity (TEOS-10)",
        "units": "g/kg",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["SALINITY"] = {
    "source_name": "SALINITY",
    "target_name": "practical_salinity",
    "nc_atts": {
        "long_name": "Practical Salinity (PSS-78)",
        "units": "psu",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["ABSOLUTE_SALINITY_MIXED_LAYER"] = {
    "source_name": "ABSOLUTE_SALINITY_MIXED_LAYER",
    "target_name": "absolute_salinity_mixed_layer",
    "nc_atts": {
        "long_name": "Mixed-Layer Absolute Salinity (TEOS-10)",
        "units": "g/kg",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["SALINITY_MIXED_LAYER"] = {
    "source_name": "SALINITY_MIXED_LAYER",
    "target_name": "practical_salinity_mixed_layer",
    "nc_atts": {
        "long_name": "Mixed-Layer Practical Salinity (PSS-78)",
        "units": "psu",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["DEPTH_MIXED_LAYER"] = {
    "source_name": "DEPTH_MIXED_LAYER",
    "target_name": "depth_mixed_layer",
    "nc_atts": {
        "long_name": "Mixed-Layer Depth",
        "units": "m",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}


def _copy_spatial_dimensions(input_data_set, output_data_set):

    # cycle throug input dims and create / copy
    for key in input_data_set.dimensions.keys():
        print("... copying dimension {}".format(key))
        input_dimension = input_data_set.dimensions[key]
        input_variable = input_data_set.variables[
            spatial_dims[key]["source_var"]]
        output_data_set.createDimension(spatial_dims[key]["target_name"],
                                        input_dimension.size)
        output_data_set.createVariable(spatial_dims[key]["target_name"],
                                       input_variable.dtype,
                                       spatial_dims[key]["target_name"])
        output_variable = output_data_set.variables[
            spatial_dims[key]["target_name"]]
        output_variable.setncatts(spatial_dims[key]["nc_atts"])
        output_variable[:] = input_variable[:]


def _create_time_dimension(month, output_data_set):

    # sequence of days per month, centered points in time, and bounds
    months = np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
                      np.double)
    time = np.cumsum(months) - months / 2.0
    time_bnds = np.array([np.cumsum(months)-months, np.cumsum(months)])

    # index associated with month
    imonth = month - 1

    # create time dim and add attributes
    output_data_set.createDimension(u"time", None)
    output_data_set.createVariable(u"time", "d", u"time")
    output_data_set.variables[u"time"][0] = time[imonth]
    output_data_set.variables[u"time"].standard_name = "time"
    output_data_set.variables[u"time"].long_name = "time"
    output_data_set.variables[u"time"].calendar = "NOLEAP"
    output_data_set.variables[u"time"].units = "days since 0000-01-01 00:00:00"
    output_data_set.variables[u"time"].bounds = "time_bnds"

    # create bounds dim
    output_data_set.createDimension(u"tbnds", 2)
    output_data_set.createVariable(u"time_bnds", "d", (u"time", u"tbnds"))
    output_data_set.variables[u"time_bnds"][0, :] = time_bnds[:, imonth]


def _copy_variables(input_data_set, output_data_set):

    # copy existing vars
    for key in input_data_set.variables.keys():
        if key not in variable_properties.keys():
            print("... skipping variable {}".format(key))
            continue
        if key == "PRESSURE" and len(input_data_set.variables[key].shape) == 1:
            print("... skipping variable {}".format(key))
            continue
        print("... copying variable {}".format(key))
        input_variable = input_data_set.variables[key]
        output_dimension_list = []
        output_dimension_list.append("time")
        for dim in list(input_variable.dimensions):
            output_dimension_list.append(spatial_dims[dim]["target_name"])
        output_data_set.createVariable(variable_properties[key]["target_name"],
                                       input_variable.dtype,
                                       tuple(output_dimension_list))
        output_variable = output_data_set.variables[
            variable_properties[key]["target_name"]]
        output_variable.setncatts(variable_properties[key]["nc_atts"])
        output_variable[0, ...] = input_variable[:]
        output_variable[:] = np.ma.array(output_variable[:],
                                         mask=np.isnan(output_variable[:]))


def make_cf_compliant(month, input_file_name, output_file_name):
    """Copy data to a CF compliant file."""

    # status report
    print("month {:02d}, input file {}, output file {}".format(
        month, input_file_name, output_file_name))

    # open files
    input_data_set = nc.Dataset(input_file_name, "r")
    output_data_set = nc.Dataset(output_file_name, "w",
                                 clobber=True, format="NETCDF3_CLASSIC")

    # copy (generalized) spatial dimensions and associated variables, rename to
    # CF compliant, add attributes
    _copy_spatial_dimensions(input_data_set, output_data_set)

    # create time dimension and time_bnds
    _create_time_dimension(month, output_data_set)

    # copy data variables, adding time dimension and attributes, and set valid
    # missing value
    _copy_variables(input_data_set, output_data_set)

    # close data sets
    input_data_set.close()
    output_data_set.close()


if __name__ == "__main__":
    """If called as standalone, get input args (file name and month)."""

    # check args
    if len(sys.argv) != 4:  # (inlcuding command itself)
        raise Exception("Need exactly three arguments: month, input_file_name,"
                        " output_file_name.")

    # read args
    month = int(sys.argv[1])
    input_file_name = sys.argv[2]
    output_file_name = sys.argv[3]

    # work on file
    make_cf_compliant(month, input_file_name, output_file_name)
+12 −0
Original line number Diff line number Diff line
#!/bin/bash

(cd _non_cf_data/ && echo Using MIMOC "`git describe`";)

for month in {01..12}; do
    echo "==== month" ${month} "===="
    for f in _non_cf_data/data/MIMOC*month${month}.nc; do
        infile=$f
        outfile=data/`basename $f .nc`_cf.nc
        util/make_CF_compliant.py ${month} ${infile} ${outfile}
    done
done