Commit 9dac1110 authored by Willi Rath's avatar Willi Rath
Browse files

Merge branch '42-add-mimoc-cf' into 'develop'

Resolve "Render MIMOC and MIMOC_cf"

See merge request !54
parents 1d4db832 f6de1ff3
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ before_script:
test:
  stage: test
  script:
    - pytest --cov=data_repo_renderer --cov-report term-missing --cov-fail-under=90 -v tests/
    - pytest --cov=data_repo_renderer --cov-report term-missing --cov-fail-under=85 -v tests/

style:
  stage: test
+12 −6
Original line number Diff line number Diff line
@@ -461,13 +461,19 @@ def cli_run_renderer(argvec=None):

    GitUpdate(yaml_dict, output_file=update_script)

    # append all data download lines
    # append all data download lines (if any)
    try:
        for data in yaml_dict["data"]:
            data["method"](data, output_file=update_script)
    except KeyError as e:
        pass

    # append all doc download lines
    # append all doc download lines (if any)
    try:
        for doc in yaml_dict["doc"]:
            doc["method"](doc, output_file=update_script)
    except KeyError as e:
        pass

    PostProcessing(yaml_dict, output_file=update_script)
    GitFinalize(yaml_dict, output_file=update_script)
+25 −0
Original line number Diff line number Diff line
repo_name: MIMOC_cf

people: Willi Rath (<wrath@geomar.de>)

http_path_remote: https://git.geomar.de/data/MIMOC_cf

git_path_remote: git@git.geomar.de:data/MIMOC_cf.git

repo_description: |
     A CF compliant version of MIMOC (See
     <https://www.pmel.noaa.gov/mimoc/index.html> and
     <https://git.geomar.de/data/MIMOC/>.)

prefixes: data doc

pre_processing:

    - GIT_LFS_SKIP_SMUDGE=1 git submodule add git@git.geomar.de:data/MIMOC.git _non_cf_data || echo "non-CF submodule exitst"
    - (cd _non_cf_data; git pull; git lfs pull; git describe;)
    - pwd

post_processing:

    - util/make_CF_compliant.sh
    - util/convert_to_netCDF4_classic.sh
+20 −0
Original line number Diff line number Diff line
#!/bin/bash

function _convert_to_nc4c {
    infile=$1
    tmpfile=_tmp_`date +%s%N`_`basename ${infile}`
    nccopy -s -7 -d 1 \
        -c"sigma0/1,latitude/90,longitude/90" \
        -c"pressure/1,latitude/90,longitude/90" \
        -c"latitude/90,longitude/90" \
        -c"time/1,sigma0/1,latitude/90,longitude/90" \
        -c"time/1,pressure/1,latitude/90,longitude/90" \
        -c"time/1,latitude/90,longitude/90" \
        ${infile} ${tmpfile}
    cdo diff ${tmpfile} ${infile} && mv ${tmpfile} ${infile}
}

export -f _convert_to_nc4c

find data -type f -name \*.nc -print0 | \
    xargs -0 -n1 -P8 -I {} bash -c "_convert_to_nc4c {}"
+298 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3

"""
Make MIMOC data CF compliant.
"""

import netCDF4 as nc
import numpy as np
import sys


# Constants
FILL_VALUE = np.float32(-1.e+34)

# dict of possible spatial dim names and their target properties
spatial_dims = {}
spatial_dims["LONG"] = {
    "source_var": "LONGITUDE",
    "source_dim": "LONG",
    "target_name": "longitude",
    "nc_atts": {
        "standard_name": "longitude",
        "long_name": "longitude",
        "units": "degrees_east",
        "axis": "X"}
    }
spatial_dims["LAT"] = {
    "source_var": "LATITUDE",
    "source_dim": "LAT",
    "target_name": "latitude",
    "nc_atts": {
        "standard_name": "latitude",
        "long_name": "latitude",
        "units": "degrees_north",
        "axis": "Y"}
    }
spatial_dims["PRES"] = {
    "source_var": "PRESSURE",
    "source_dim": "PRES",
    "target_name": "pressure",
    "nc_atts": {
        "standard_name": "pressure",
        "long_name": "pressure",
        "units": "dbar",
        "axis": "Z",
        "positive": "down"}
    }
spatial_dims["SIG"] = {
    "source_var": "SIGMA_0",
    "source_dim": "SIG",
    "target_name": "sigma0",
    "nc_atts": {
        "standard_name": "sigma0",
        "long_name": "sigma0",
        "units": "kg/m3",
        "axis": "Z",
        "positive": "down"}
    }


# dict of variables and their target properties
variable_properties = {}
variable_properties["PRESSURE"] = {
    "source_name": "PRESSURE",
    "target_name": "pressure",
    "nc_atts": {
        "long_name": "Pressure of sigma0-surface",
        "units": "dbar",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["SUMMED_WEIGHT"] = {
    "source_name": "SUMMED_WEIGHT",
    "target_name": "summed_weight",
    "nc_atts": {
        "long_name": "Sum of raw data weights used for grid point",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["YEAR_OF_DATA"] = {
    "source_name": "YEAR_OF_DATA",
    "target_name": "year_of_data",
    "nc_atts": {
        "long_name": "Mapped year of grid point",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["POTENTIAL_TEMPERATURE"] = {
    "source_name": "POTENTIAL_TEMPERATURE",
    "target_name": "potential_temperature",
    "nc_atts": {
        "long_name": "Potential Temperature (IPTS-90)",
        "units": "degC",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["CONSERVATIVE_TEMPERATURE"] = {
    "source_name": "CONSERVATIVE_TEMPERATURE",
    "target_name": "conservative_temperature",
    "nc_atts": {
        "long_name": "Conservative Temperature (TEOS-10)",
        "units": "degC",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["POTENTIAL_TEMPERATURE_MIXED_LAYER"] = {
    "source_name": "POTENTIAL_TEMPERATURE_MIXED_LAYER",
    "target_name": "potential_temperature_mixed_layer",
    "nc_atts": {
        "long_name": "Mixed-Layer Potential Temperature (IPTS-90)",
        "units": "degC",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["CONSERVATIVE_TEMPERATURE_MIXED_LAYER"] = {
    "source_name": "CONSERVATIVE_TEMPERATURE_MIXED_LAYER",
    "target_name": "conservative_temperature_mixed_layer",
    "nc_atts": {
        "long_name": "Mixed-Layer Conservative Temperature (TEOS-10)",
        "units": "degC",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["ABSOLUTE_SALINITY"] = {
    "source_name": "ABSOLUTE_SALINITY",
    "target_name": "absolute_salinity",
    "nc_atts": {
        "long_name": "Absolute Salinity (TEOS-10)",
        "units": "g/kg",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["SALINITY"] = {
    "source_name": "SALINITY",
    "target_name": "practical_salinity",
    "nc_atts": {
        "long_name": "Practical Salinity (PSS-78)",
        "units": "psu",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["ABSOLUTE_SALINITY_MIXED_LAYER"] = {
    "source_name": "ABSOLUTE_SALINITY_MIXED_LAYER",
    "target_name": "absolute_salinity_mixed_layer",
    "nc_atts": {
        "long_name": "Mixed-Layer Absolute Salinity (TEOS-10)",
        "units": "g/kg",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["SALINITY_MIXED_LAYER"] = {
    "source_name": "SALINITY_MIXED_LAYER",
    "target_name": "practical_salinity_mixed_layer",
    "nc_atts": {
        "long_name": "Mixed-Layer Practical Salinity (PSS-78)",
        "units": "psu",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}
variable_properties["DEPTH_MIXED_LAYER"] = {
    "source_name": "DEPTH_MIXED_LAYER",
    "target_name": "depth_mixed_layer",
    "nc_atts": {
        "long_name": "Mixed-Layer Depth",
        "units": "m",
        "_FillValue": FILL_VALUE,
        "missing_value": FILL_VALUE
    }
}


def _copy_spatial_dimensions(input_data_set, output_data_set):

    # cycle throug input dims and create / copy
    for key in input_data_set.dimensions.keys():
        print("... copying dimension {}".format(key))
        input_dimension = input_data_set.dimensions[key]
        input_variable = input_data_set.variables[
            spatial_dims[key]["source_var"]]
        output_data_set.createDimension(spatial_dims[key]["target_name"],
                                        input_dimension.size)
        output_data_set.createVariable(spatial_dims[key]["target_name"],
                                       input_variable.dtype,
                                       spatial_dims[key]["target_name"])
        output_variable = output_data_set.variables[
            spatial_dims[key]["target_name"]]
        output_variable.setncatts(spatial_dims[key]["nc_atts"])
        output_variable[:] = input_variable[:]


def _create_time_dimension(month, output_data_set):

    # sequence of days per month, centered points in time, and bounds
    months = np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],
                      np.double)
    time = np.cumsum(months) - months / 2.0
    time_bnds = np.array([np.cumsum(months)-months, np.cumsum(months)])

    # index associated with month
    imonth = month - 1

    # create time dim and add attributes
    output_data_set.createDimension(u"time", None)
    output_data_set.createVariable(u"time", "d", u"time")
    output_data_set.variables[u"time"][0] = time[imonth]
    output_data_set.variables[u"time"].standard_name = "time"
    output_data_set.variables[u"time"].long_name = "time"
    output_data_set.variables[u"time"].calendar = "NOLEAP"
    output_data_set.variables[u"time"].units = "days since 0000-01-01 00:00:00"
    output_data_set.variables[u"time"].bounds = "time_bnds"

    # create bounds dim
    output_data_set.createDimension(u"tbnds", 2)
    output_data_set.createVariable(u"time_bnds", "d", (u"time", u"tbnds"))
    output_data_set.variables[u"time_bnds"][0, :] = time_bnds[:, imonth]


def _copy_variables(input_data_set, output_data_set):

    # copy existing vars
    for key in input_data_set.variables.keys():
        if key not in variable_properties.keys():
            print("... skipping variable {}".format(key))
            continue
        if key == "PRESSURE" and len(input_data_set.variables[key].shape) == 1:
            print("... skipping variable {}".format(key))
            continue
        print("... copying variable {}".format(key))
        input_variable = input_data_set.variables[key]
        output_dimension_list = []
        output_dimension_list.append("time")
        for dim in list(input_variable.dimensions):
            output_dimension_list.append(spatial_dims[dim]["target_name"])
        output_data_set.createVariable(variable_properties[key]["target_name"],
                                       input_variable.dtype,
                                       tuple(output_dimension_list))
        output_variable = output_data_set.variables[
            variable_properties[key]["target_name"]]
        output_variable.setncatts(variable_properties[key]["nc_atts"])
        output_variable[0, ...] = input_variable[:]
        output_variable[:] = np.ma.array(output_variable[:],
                                         mask=np.isnan(output_variable[:]))


def make_cf_compliant(month, input_file_name, output_file_name):
    """Copy data to a CF compliant file."""

    # status report
    print("month {:02d}, input file {}, output file {}".format(
        month, input_file_name, output_file_name))

    # open files
    input_data_set = nc.Dataset(input_file_name, "r")
    output_data_set = nc.Dataset(output_file_name, "w",
                                 clobber=True, format="NETCDF3_CLASSIC")

    # copy (generalized) spatial dimensions and associated variables, rename to
    # CF compliant, add attributes
    _copy_spatial_dimensions(input_data_set, output_data_set)

    # create time dimension and time_bnds
    _create_time_dimension(month, output_data_set)

    # copy data variables, adding time dimension and attributes, and set valid
    # missing value
    _copy_variables(input_data_set, output_data_set)

    # close data sets
    input_data_set.close()
    output_data_set.close()


if __name__ == "__main__":
    """If called as standalone, get input args (file name and month)."""

    # check args
    if len(sys.argv) != 4:  # (inlcuding command itself)
        raise Exception("Need exactly three arguments: month, input_file_name,"
                        " output_file_name.")

    # read args
    month = int(sys.argv[1])
    input_file_name = sys.argv[2]
    output_file_name = sys.argv[3]

    # work on file
    make_cf_compliant(month, input_file_name, output_file_name)
Loading