Loading input_data/MIMOC_cf/meta.yaml 0 → 100644 +24 −0 Original line number Diff line number Diff line repo_name: MIMOC_cf people: Willi Rath (<wrath@geomar.de>) http_path_remote: https://git.geomar.de/data/MIMOC_cf git_path_remote: git@git.geomar.de:data/MIMOC_cf.git repo_description: | A CF compliant version of MIMOC (See <https://www.pmel.noaa.gov/mimoc/index.html> and <https://git.geomar.de/data/MIMOC/>.) prefixes: data doc pre_processing: - GIT_LFS_SKIP_SMUDGE=1 git submodule add git@git.geomar.de:data/MIMOC.git non_cf_data || echo "non-CF submodule exitst" - cd _non_cf_data; git pull; git lfs pull; git describe post_processing: - util/make_CF_compliant.sh input_data/MIMOC_cf/util/make_CF_compliant.py 0 → 100644 +298 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 """ Make MIMOC data CF compliant. """ import netCDF4 as nc import numpy as np import sys # Constants FILL_VALUE = np.float32(-1.e+34) # dict of possible spatial dim names and their target properties spatial_dims = {} spatial_dims["LONG"] = { "source_var": "LONGITUDE", "source_dim": "LONG", "target_name": "longitude", "nc_atts": { "standard_name": "longitude", "long_name": "longitude", "units": "degrees_east", "axis": "X"} } spatial_dims["LAT"] = { "source_var": "LATITUDE", "source_dim": "LAT", "target_name": "latitude", "nc_atts": { "standard_name": "latitude", "long_name": "latitude", "units": "degrees_north", "axis": "Y"} } spatial_dims["PRES"] = { "source_var": "PRESSURE", "source_dim": "PRES", "target_name": "pressure", "nc_atts": { "standard_name": "pressure", "long_name": "pressure", "units": "dbar", "axis": "Z", "positive": "down"} } spatial_dims["SIG"] = { "source_var": "SIGMA_0", "source_dim": "SIG", "target_name": "sigma0", "nc_atts": { "standard_name": "sigma0", "long_name": "sigma0", "units": "kg/m3", "axis": "Z", "positive": "down"} } # dict of variables and their target properties variable_properties = {} variable_properties["PRESSURE"] = { "source_name": "PRESSURE", "target_name": "pressure", "nc_atts": { "long_name": "Pressure of sigma0-surface", "units": "dbar", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["SUMMED_WEIGHT"] = { "source_name": "SUMMED_WEIGHT", "target_name": "summed_weight", "nc_atts": { "long_name": "Sum of raw data weights used for grid point", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["YEAR_OF_DATA"] = { "source_name": "YEAR_OF_DATA", "target_name": "year_of_data", "nc_atts": { "long_name": "Mapped year of grid point", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["POTENTIAL_TEMPERATURE"] = { "source_name": "POTENTIAL_TEMPERATURE", "target_name": "potential_temperature", "nc_atts": { "long_name": "Potential Temperature (IPTS-90)", "units": "degC", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["CONSERVATIVE_TEMPERATURE"] = { "source_name": "CONSERVATIVE_TEMPERATURE", "target_name": "conservative_temperature", "nc_atts": { "long_name": "Conservative Temperature (TEOS-10)", "units": "degC", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["POTENTIAL_TEMPERATURE_MIXED_LAYER"] = { "source_name": "POTENTIAL_TEMPERATURE_MIXED_LAYER", "target_name": "potential_temperature_mixed_layer", "nc_atts": { "long_name": "Mixed-Layer Potential Temperature (IPTS-90)", "units": "degC", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["CONSERVATIVE_TEMPERATURE_MIXED_LAYER"] = { "source_name": "CONSERVATIVE_TEMPERATURE_MIXED_LAYER", "target_name": "conservative_temperature_mixed_layer", "nc_atts": { "long_name": "Mixed-Layer Conservative Temperature (TEOS-10)", "units": "degC", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["ABSOLUTE_SALINITY"] = { "source_name": "ABSOLUTE_SALINITY", "target_name": "absolute_salinity", "nc_atts": { "long_name": "Absolute Salinity (TEOS-10)", "units": "g/kg", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["SALINITY"] = { "source_name": "SALINITY", "target_name": "practical_salinity", "nc_atts": { "long_name": "Practical Salinity (PSS-78)", "units": "psu", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["ABSOLUTE_SALINITY_MIXED_LAYER"] = { "source_name": "ABSOLUTE_SALINITY_MIXED_LAYER", "target_name": "absolute_salinity_mixed_layer", "nc_atts": { "long_name": "Mixed-Layer Absolute Salinity (TEOS-10)", "units": "g/kg", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["SALINITY_MIXED_LAYER"] = { "source_name": "SALINITY_MIXED_LAYER", "target_name": "practical_salinity_mixed_layer", "nc_atts": { "long_name": "Mixed-Layer Practical Salinity (PSS-78)", "units": "psu", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["DEPTH_MIXED_LAYER"] = { "source_name": "DEPTH_MIXED_LAYER", "target_name": "depth_mixed_layer", "nc_atts": { "long_name": "Mixed-Layer Depth", "units": "m", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } def _copy_spatial_dimensions(input_data_set, output_data_set): # cycle throug input dims and create / copy for key in input_data_set.dimensions.keys(): print("... copying dimension {}".format(key)) input_dimension = input_data_set.dimensions[key] input_variable = input_data_set.variables[ spatial_dims[key]["source_var"]] output_data_set.createDimension(spatial_dims[key]["target_name"], input_dimension.size) output_data_set.createVariable(spatial_dims[key]["target_name"], input_variable.dtype, spatial_dims[key]["target_name"]) output_variable = output_data_set.variables[ spatial_dims[key]["target_name"]] output_variable.setncatts(spatial_dims[key]["nc_atts"]) output_variable[:] = input_variable[:] def _create_time_dimension(month, output_data_set): # sequence of days per month, centered points in time, and bounds months = np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], np.double) time = np.cumsum(months) - months / 2.0 time_bnds = np.array([np.cumsum(months)-months, np.cumsum(months)]) # index associated with month imonth = month - 1 # create time dim and add attributes output_data_set.createDimension(u"time", None) output_data_set.createVariable(u"time", "d", u"time") output_data_set.variables[u"time"][0] = time[imonth] output_data_set.variables[u"time"].standard_name = "time" output_data_set.variables[u"time"].long_name = "time" output_data_set.variables[u"time"].calendar = "NOLEAP" output_data_set.variables[u"time"].units = "days since 0000-01-01 00:00:00" output_data_set.variables[u"time"].bounds = "time_bnds" # create bounds dim output_data_set.createDimension(u"tbnds", 2) output_data_set.createVariable(u"time_bnds", "d", (u"time", u"tbnds")) output_data_set.variables[u"time_bnds"][0, :] = time_bnds[:, imonth] def _copy_variables(input_data_set, output_data_set): # copy existing vars for key in input_data_set.variables.keys(): if key not in variable_properties.keys(): print("... skipping variable {}".format(key)) continue if key == "PRESSURE" and len(input_data_set.variables[key].shape) == 1: print("... skipping variable {}".format(key)) continue print("... copying variable {}".format(key)) input_variable = input_data_set.variables[key] output_dimension_list = [] output_dimension_list.append("time") for dim in list(input_variable.dimensions): output_dimension_list.append(spatial_dims[dim]["target_name"]) output_data_set.createVariable(variable_properties[key]["target_name"], input_variable.dtype, tuple(output_dimension_list)) output_variable = output_data_set.variables[ variable_properties[key]["target_name"]] output_variable.setncatts(variable_properties[key]["nc_atts"]) output_variable[0, ...] = input_variable[:] output_variable[:] = np.ma.array(output_variable[:], mask=np.isnan(output_variable[:])) def make_cf_compliant(month, input_file_name, output_file_name): """Copy data to a CF compliant file.""" # status report print("month {:02d}, input file {}, output file {}".format( month, input_file_name, output_file_name)) # open files input_data_set = nc.Dataset(input_file_name, "r") output_data_set = nc.Dataset(output_file_name, "w", clobber=True, format="NETCDF3_CLASSIC") # copy (generalized) spatial dimensions and associated variables, rename to # CF compliant, add attributes _copy_spatial_dimensions(input_data_set, output_data_set) # create time dimension and time_bnds _create_time_dimension(month, output_data_set) # copy data variables, adding time dimension and attributes, and set valid # missing value _copy_variables(input_data_set, output_data_set) # close data sets input_data_set.close() output_data_set.close() if __name__ == "__main__": """If called as standalone, get input args (file name and month).""" # check args if len(sys.argv) != 4: # (inlcuding command itself) raise Exception("Need exactly three arguments: month, input_file_name," " output_file_name.") # read args month = int(sys.argv[1]) input_file_name = sys.argv[2] output_file_name = sys.argv[3] # work on file make_cf_compliant(month, input_file_name, output_file_name) input_data/MIMOC_cf/util/make_CF_compliant.sh 0 → 100644 +12 −0 Original line number Diff line number Diff line #!/bin/bash (cd _non_cf_data/ && echo Using MIMOC "`git describe`";) for month in {01..12}; do echo "==== month" ${month} "====" for f in _non_cf_data/data/MIMOC*month${month}.nc; do infile=$f outfile=data/`basename $f .nc`_cf.nc util/make_CF_compliant.py ${month} ${infile} ${outfile} done done Loading
input_data/MIMOC_cf/meta.yaml 0 → 100644 +24 −0 Original line number Diff line number Diff line repo_name: MIMOC_cf people: Willi Rath (<wrath@geomar.de>) http_path_remote: https://git.geomar.de/data/MIMOC_cf git_path_remote: git@git.geomar.de:data/MIMOC_cf.git repo_description: | A CF compliant version of MIMOC (See <https://www.pmel.noaa.gov/mimoc/index.html> and <https://git.geomar.de/data/MIMOC/>.) prefixes: data doc pre_processing: - GIT_LFS_SKIP_SMUDGE=1 git submodule add git@git.geomar.de:data/MIMOC.git non_cf_data || echo "non-CF submodule exitst" - cd _non_cf_data; git pull; git lfs pull; git describe post_processing: - util/make_CF_compliant.sh
input_data/MIMOC_cf/util/make_CF_compliant.py 0 → 100644 +298 −0 Original line number Diff line number Diff line #!/usr/bin/env python3 """ Make MIMOC data CF compliant. """ import netCDF4 as nc import numpy as np import sys # Constants FILL_VALUE = np.float32(-1.e+34) # dict of possible spatial dim names and their target properties spatial_dims = {} spatial_dims["LONG"] = { "source_var": "LONGITUDE", "source_dim": "LONG", "target_name": "longitude", "nc_atts": { "standard_name": "longitude", "long_name": "longitude", "units": "degrees_east", "axis": "X"} } spatial_dims["LAT"] = { "source_var": "LATITUDE", "source_dim": "LAT", "target_name": "latitude", "nc_atts": { "standard_name": "latitude", "long_name": "latitude", "units": "degrees_north", "axis": "Y"} } spatial_dims["PRES"] = { "source_var": "PRESSURE", "source_dim": "PRES", "target_name": "pressure", "nc_atts": { "standard_name": "pressure", "long_name": "pressure", "units": "dbar", "axis": "Z", "positive": "down"} } spatial_dims["SIG"] = { "source_var": "SIGMA_0", "source_dim": "SIG", "target_name": "sigma0", "nc_atts": { "standard_name": "sigma0", "long_name": "sigma0", "units": "kg/m3", "axis": "Z", "positive": "down"} } # dict of variables and their target properties variable_properties = {} variable_properties["PRESSURE"] = { "source_name": "PRESSURE", "target_name": "pressure", "nc_atts": { "long_name": "Pressure of sigma0-surface", "units": "dbar", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["SUMMED_WEIGHT"] = { "source_name": "SUMMED_WEIGHT", "target_name": "summed_weight", "nc_atts": { "long_name": "Sum of raw data weights used for grid point", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["YEAR_OF_DATA"] = { "source_name": "YEAR_OF_DATA", "target_name": "year_of_data", "nc_atts": { "long_name": "Mapped year of grid point", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["POTENTIAL_TEMPERATURE"] = { "source_name": "POTENTIAL_TEMPERATURE", "target_name": "potential_temperature", "nc_atts": { "long_name": "Potential Temperature (IPTS-90)", "units": "degC", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["CONSERVATIVE_TEMPERATURE"] = { "source_name": "CONSERVATIVE_TEMPERATURE", "target_name": "conservative_temperature", "nc_atts": { "long_name": "Conservative Temperature (TEOS-10)", "units": "degC", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["POTENTIAL_TEMPERATURE_MIXED_LAYER"] = { "source_name": "POTENTIAL_TEMPERATURE_MIXED_LAYER", "target_name": "potential_temperature_mixed_layer", "nc_atts": { "long_name": "Mixed-Layer Potential Temperature (IPTS-90)", "units": "degC", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["CONSERVATIVE_TEMPERATURE_MIXED_LAYER"] = { "source_name": "CONSERVATIVE_TEMPERATURE_MIXED_LAYER", "target_name": "conservative_temperature_mixed_layer", "nc_atts": { "long_name": "Mixed-Layer Conservative Temperature (TEOS-10)", "units": "degC", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["ABSOLUTE_SALINITY"] = { "source_name": "ABSOLUTE_SALINITY", "target_name": "absolute_salinity", "nc_atts": { "long_name": "Absolute Salinity (TEOS-10)", "units": "g/kg", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["SALINITY"] = { "source_name": "SALINITY", "target_name": "practical_salinity", "nc_atts": { "long_name": "Practical Salinity (PSS-78)", "units": "psu", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["ABSOLUTE_SALINITY_MIXED_LAYER"] = { "source_name": "ABSOLUTE_SALINITY_MIXED_LAYER", "target_name": "absolute_salinity_mixed_layer", "nc_atts": { "long_name": "Mixed-Layer Absolute Salinity (TEOS-10)", "units": "g/kg", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["SALINITY_MIXED_LAYER"] = { "source_name": "SALINITY_MIXED_LAYER", "target_name": "practical_salinity_mixed_layer", "nc_atts": { "long_name": "Mixed-Layer Practical Salinity (PSS-78)", "units": "psu", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } variable_properties["DEPTH_MIXED_LAYER"] = { "source_name": "DEPTH_MIXED_LAYER", "target_name": "depth_mixed_layer", "nc_atts": { "long_name": "Mixed-Layer Depth", "units": "m", "_FillValue": FILL_VALUE, "missing_value": FILL_VALUE } } def _copy_spatial_dimensions(input_data_set, output_data_set): # cycle throug input dims and create / copy for key in input_data_set.dimensions.keys(): print("... copying dimension {}".format(key)) input_dimension = input_data_set.dimensions[key] input_variable = input_data_set.variables[ spatial_dims[key]["source_var"]] output_data_set.createDimension(spatial_dims[key]["target_name"], input_dimension.size) output_data_set.createVariable(spatial_dims[key]["target_name"], input_variable.dtype, spatial_dims[key]["target_name"]) output_variable = output_data_set.variables[ spatial_dims[key]["target_name"]] output_variable.setncatts(spatial_dims[key]["nc_atts"]) output_variable[:] = input_variable[:] def _create_time_dimension(month, output_data_set): # sequence of days per month, centered points in time, and bounds months = np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31], np.double) time = np.cumsum(months) - months / 2.0 time_bnds = np.array([np.cumsum(months)-months, np.cumsum(months)]) # index associated with month imonth = month - 1 # create time dim and add attributes output_data_set.createDimension(u"time", None) output_data_set.createVariable(u"time", "d", u"time") output_data_set.variables[u"time"][0] = time[imonth] output_data_set.variables[u"time"].standard_name = "time" output_data_set.variables[u"time"].long_name = "time" output_data_set.variables[u"time"].calendar = "NOLEAP" output_data_set.variables[u"time"].units = "days since 0000-01-01 00:00:00" output_data_set.variables[u"time"].bounds = "time_bnds" # create bounds dim output_data_set.createDimension(u"tbnds", 2) output_data_set.createVariable(u"time_bnds", "d", (u"time", u"tbnds")) output_data_set.variables[u"time_bnds"][0, :] = time_bnds[:, imonth] def _copy_variables(input_data_set, output_data_set): # copy existing vars for key in input_data_set.variables.keys(): if key not in variable_properties.keys(): print("... skipping variable {}".format(key)) continue if key == "PRESSURE" and len(input_data_set.variables[key].shape) == 1: print("... skipping variable {}".format(key)) continue print("... copying variable {}".format(key)) input_variable = input_data_set.variables[key] output_dimension_list = [] output_dimension_list.append("time") for dim in list(input_variable.dimensions): output_dimension_list.append(spatial_dims[dim]["target_name"]) output_data_set.createVariable(variable_properties[key]["target_name"], input_variable.dtype, tuple(output_dimension_list)) output_variable = output_data_set.variables[ variable_properties[key]["target_name"]] output_variable.setncatts(variable_properties[key]["nc_atts"]) output_variable[0, ...] = input_variable[:] output_variable[:] = np.ma.array(output_variable[:], mask=np.isnan(output_variable[:])) def make_cf_compliant(month, input_file_name, output_file_name): """Copy data to a CF compliant file.""" # status report print("month {:02d}, input file {}, output file {}".format( month, input_file_name, output_file_name)) # open files input_data_set = nc.Dataset(input_file_name, "r") output_data_set = nc.Dataset(output_file_name, "w", clobber=True, format="NETCDF3_CLASSIC") # copy (generalized) spatial dimensions and associated variables, rename to # CF compliant, add attributes _copy_spatial_dimensions(input_data_set, output_data_set) # create time dimension and time_bnds _create_time_dimension(month, output_data_set) # copy data variables, adding time dimension and attributes, and set valid # missing value _copy_variables(input_data_set, output_data_set) # close data sets input_data_set.close() output_data_set.close() if __name__ == "__main__": """If called as standalone, get input args (file name and month).""" # check args if len(sys.argv) != 4: # (inlcuding command itself) raise Exception("Need exactly three arguments: month, input_file_name," " output_file_name.") # read args month = int(sys.argv[1]) input_file_name = sys.argv[2] output_file_name = sys.argv[3] # work on file make_cf_compliant(month, input_file_name, output_file_name)
input_data/MIMOC_cf/util/make_CF_compliant.sh 0 → 100644 +12 −0 Original line number Diff line number Diff line #!/bin/bash (cd _non_cf_data/ && echo Using MIMOC "`git describe`";) for month in {01..12}; do echo "==== month" ${month} "====" for f in _non_cf_data/data/MIMOC*month${month}.nc; do infile=$f outfile=data/`basename $f .nc`_cf.nc util/make_CF_compliant.py ${month} ${infile} ${outfile} done done