Loading README.md 0 → 100644 +1 −0 Original line number Diff line number Diff line # esm-scripts calculate_trends.py 0 → 100644 +401 −0 Original line number Diff line number Diff line # # # Calculate spatial trends from (t,y,x) data # # import time, sys import argparse import numpy as np from scipy import stats from netCDF4 import Dataset, num2date, date2num import multiprocessing as mp def calc_spatial_trend_masked(data_list,threshold=0.3): """ Calculates spatial trends from a masked array of shape (time,y,x) """ vt = data_list[0] data = data_list[1] slope = np.ma.zeros((data.shape[1],data.shape[2])) sign = np.ma.zeros(slope.shape) corr = np.ma.zeros(slope.shape) n_threshold = data.shape[0] * threshold for i in xrange(0,data.shape[2]): for j in xrange(0,data.shape[1]): ## Masked vector tmp = data[:,j,i] n = tmp.compressed().shape[0] ## Make vx and vy from valid data vx = vt[~tmp.mask] vy = tmp[~tmp.mask] if (n >= n_threshold): k, m, r, p, sig = stats.linregress(vx,vy) slope[j,i] = k sign[j,i] = p corr[j,i] = r else: slope[j,i] = np.ma.masked sign[j,i] = np.ma.masked corr[j,i] = np.ma.masked return slope,sign,corr def calc_spatial_trend_masked_parallel(data,vt=np.array([]),ncpu=-1): """ Calculates spatial trends from a masked array of shape (time,y,x) """ t0 = time.time() if ncpu == -1: ncpu = mp.cpu_count() ## Spawn threads p = mp.Pool(ncpu) nump = ncpu #p._processes slope = np.ma.zeros((data.shape[1],data.shape[2])) sign = np.ma.zeros(slope.shape) corr = np.ma.zeros(slope.shape) if (vt.shape[0] < 1): vt = np.arange(0,data.shape[0]) ## ## Split array into ncpu smaller arrays and put ## them in a list ## nx = data.shape[2] ## number of columns in data ## Find number of tasks in Pool ntask = ncpu #p._processes ## Split array into smaller arrays of size (nt,ny,10) ## Each task will make calculations on one array and then go ## on the the next ## ## More sub-arrays gives better load-balance but also more overhead. ## For 362 i points, 30-40 sub-arrays seems good. s = np.array( np.ceil( np.arange(0,nx,10) ),dtype='int32' ) ##start i for sub arrays e = np.array( np.append(s[1:],nx) ,dtype='int32') ## end i for sub arrays ## List with all sub-arrays l = [] for j in xrange(0,s.shape[0]): l.append([vt,data[:,:,s[j]:e[j]]]) ## Let all workers in Pool do calculations asynchronously using routine ## calc_spatial_trend_masked ## ## Results are returned to a list results = p.map_async(calc_spatial_trend_masked,l) result = results.get() ## Put results from sub-arrays into full-size arrays for j in xrange(0,s.shape[0]): k,m,r = result[j] slope[:,s[j]:e[j]] = k sign[:,s[j]:e[j]] = m corr[:,s[j]:e[j]] = r p.terminate() p.join() t1 = time.time() print(' calc_spatial_trend_masked_parallel in ',t1-t0,' sec on ',nump,' threads ') return slope,sign,corr if 1: # Parse input arguments line1 = ' --------------------------------------------- ' ## Parse input arguments parser = argparse.ArgumentParser(description='Calculate linear trends of geospatial data') parser.add_argument('--infile', dest='infile', action='store', default='data.nc',\ help='Name of file (must be netCDF)') parser.add_argument('--outfile', dest='outfile', action='store', default='trends.nc',\ help='Name of output file (will be netCDF)') parser.add_argument('--names', dest='names', action='store', default='all',\ help='Variable names (comma separated)') parser.add_argument('--method', dest='method', action='store', default='linregress',\ help='Method used (linregress)') parser.add_argument('--time_dmn', dest='time_dmn', action='store', default='time',\ help='Name of time dimension') parser.add_argument('--time_var', dest='time_var', action='store', default='time',\ help='Name of time variable') parser.add_argument('--nthreads', dest='ncpu', action='store', default=1) args = parser.parse_args() # # Open input file # nc1 = Dataset(args.infile,'r') # # Find time dimension # time_names = ['t','time_counter'] # some possible names of time dimension if args.time_dmn in nc1.dimensions.keys(): print(' Found time dimension %s ' % (args.time_dmn,)) else: # search dimension names for common names of time for time_name in time_names: if time_name in nc1.dimensions.keys(): args.time_dmn = time_name print(' Found time dimension %s ' % (time_name,) ) if args.time_dmn not in nc1.dimensions.keys(): print(' Could not find time dimension! ') print(' Please give name of time dimension with --time_dmn= argument' ) sys.exit() # # Now we find the variables to calculate trends for # varlist = [] if args.names == 'all': # Find all 3D variables with time as left-most dimension allvars = nc1.variables.keys() for var in allvars: if nc1.variables[var].ndim == 3 and nc1.variables[var].dimensions[0] == args.time_dmn: varlist.append(var) else: # Or use the comma-separated list of variable names varlist = args.names.split(',') # Find all time-invariant variables, and assume these are coordinate variables coordvars = [] for var in nc1.variables.keys(): if (args.time_dmn not in nc1.variables[var].dimensions and nc1.variables[var].ndim < 3) \ or (args.time_dmn in nc1.variables[var].dimensions and nc1.variables[var].ndim == 1) : coordvars.append(var) # Find time variable # Typically, it is called time, but in NEMO it is often named time_counter. # The name of time variable can also be given as an argument time_coord = None if args.time_var in nc1.variables.keys(): time_coord = nc1.variables[args.time_var] else: time_names = ['time','time_counter'] for time_name in time_names: if time_name in nc1.variables.keys(): time_coord = nc1.variables[time_name] args.time_var = time_name if time_coord == None: print(' Did not find a time coordinate ') print(' I will assume data is equally spaced in time ') else: print(' Found time coordinate: %s ' % (args.time_var,)) print(' From ',num2date(time_coord[0:1],units=time_coord.units,calendar=time_coord.calendar)) print(' To ',num2date(time_coord[-1:],units=time_coord.units,calendar=time_coord.calendar)) print(' Found coordinate variables: ') for var in coordvars: print(' %s ' % (var,)) print(' Calculate trends for variables: ') for var in varlist: print(' %s ' % (var,)) # Get settings from input file ndims = len(nc1.dimensions) dim_names = nc1.dimensions.keys() nattrs = len(nc1.ncattrs()) att_names = nc1.ncattrs() nvars = len(nc1.variables.keys())#len(varlist) var_names = nc1.variables.keys()#varlist # Create the output file print(' Create dimensions in output file... ') nc2 = Dataset(args.outfile,'w') dim_ids = [] outdim_ids = [] outdim_names = [] lverbose = False for i in range(0,ndims): ## Loop over dimensions in python order ii = ndims - i - 1 ## Dimensions in Fortran order #dimname = dim_names[ii] #dim_ids.append(nc1.dimensions[dim_names[ii]]) dimname = dim_names[i] dim_ids.append(nc1.dimensions[dim_names[i]]) outdim_names.append(dimname) # Make only one time step in output file if dimname == args.time_dmn: outdim_ids.append( nc2.createDimension(dimname,1) ) else: if dim_ids[i].isunlimited(): outdim_ids.append( nc2.createDimension(dimname,None) ) else: dimlen = len(dim_ids[i]) outdim_ids.append( nc2.createDimension(dimname,dimlen) ) if (lverbose): print(' Created dimension ',outdim_ids[i],outdim_names[i]) ## ## Copy file attributes ## print(' Copy file attributes... ') for i in range(0,nattrs): att = getattr(nc1,att_names[i]) if (lverbose): print('Copying attribute ',att) nc2.setncattr(att_names[i],att) if att_names[i] == 'history': if lverbose: print(' Reset history attribute') nc2.setncattr('history',' ') #nc2.setncattr('trends',outfile) ## ## Copy variable definitions and attributes ## print(' Copy variable definitions and attributes ') kvar_ids = [] pvar_ids = [] rvar_ids = [] for i in range(0,nvars): varname = var_names[i] kvarname = var_names[i] + '_k' pvarname = var_names[i] + '_p' rvarname = var_names[i] + '_r' var_id = nc1.variables[varname] prec = var_id.dtype dims = var_id.dimensions if varname in varlist: ## Create variable kvar_ids.append( nc2.createVariable(kvarname,'float32',dims) ) pvar_ids.append( nc2.createVariable(pvarname,'float32',dims) ) rvar_ids.append( nc2.createVariable(rvarname,'float32',dims) ) ## Find and copy attributes n_varatt = len(var_id.ncattrs()) var_att_names = var_id.ncattrs() for j in range(0,n_varatt): att = getattr(var_id,var_att_names[j]) kvar_ids[i].setncattr(var_att_names[j],att) if (lverbose): print(' Created variables: ') print(kvar_ids[i]) print(pvar_ids[i]) print(rvar_ids[i]) elif varname in coordvars: ## Create variable ovar_id = nc2.createVariable(varname,prec,dims) ## Find and copy attributes n_varatt = len(var_id.ncattrs()) var_att_names = var_id.ncattrs() for j in range(0,n_varatt): att = getattr(var_id,var_att_names[j]) ovar_id.setncattr(var_att_names[j],att) if (args.time_dmn in dims) and len(dims) == 1: ovar_id[0] = var_id[0] elif (args.time_dmn in dims) and len(dims) > 1: ovar_id[0,:] = var_id[0,:] else: ovar_id[:] = var_id[:] if lverbose: print(' Created variable ',ovar_id,varname) if varname not in varlist: kvar_ids.append(None) pvar_ids.append(None) rvar_ids.append(None) ## ## Read in data from domain file and put in output ## vmin = np.array([]) vmax = np.array([]) if 1: #for var in varlist: for ivar in range(0,nvars): var = var_names[ivar] if var in varlist: t0 = time.time() data = nc1.variables[var][:,:,:] t1 = time.time() print(' Read %s in %4f seconds ' % (var,t1-t0)) if time_coord == None: print(' Assume a time coordinate ') vt = np.arange(0,data.shape[0]) else: print(' Use time coord from file: %s ' % (args.time_dmn,)) vt = time_coord[:] data = np.ma.array(data) k,p,r = calc_spatial_trend_masked_parallel(data,vt=vt,ncpu=args.ncpu) #var_ids.append( nc1.variables[var] ) #nvardims = len(var_ids[ivar].dimensions) ## Calculate variable min/max vmin = np.ma.min(k) vmax = np.ma.max(k) kvar_ids[ivar].valid_min = vmin kvar_ids[ivar].valid_max = vmax pvar_ids[ivar].valid_min = 0. pvar_ids[ivar].valid_max = 1. rvar_ids[ivar].valid_min = -1. rvar_ids[ivar].valid_max = 1. print('min max') print(kvar_ids[ivar].valid_min,kvar_ids[ivar].valid_max) print(pvar_ids[ivar].valid_min,pvar_ids[ivar].valid_max) print(rvar_ids[ivar].valid_min,rvar_ids[ivar].valid_max) kvar_ids[ivar][0,:,:] = k[:] pvar_ids[ivar][0,:,:] = p[:] rvar_ids[ivar][0,:,:] = r[:] nc2.close() diag_openifs.sh 0 → 100644 +92 −0 Original line number Diff line number Diff line #!/bin/bash # # Diagnostics of OpenIFS run # # Load modules module load cdo/1.9.2 module load nco obs_file=$1 obs_name=$2 model_file=$3 model_name=$4 model_off=$5 model_frac=$6 date1=$7 date2=$8 echo "dates " $date1 $date2 tmp_dir="tmp" mkdir -p ${tmp_dir} names=(${obs_name} ${model_name}) files=(${obs_file} ${model_file}) interp_files=() newname=${model_name} cdo="cdo" for (( ii=0 ; ii<=1 ; ii++ )) do datafile=${files[$ii]} varname=${names[$ii]} echo " Make climatology and regrid ${varname} in ${datafile}" interp_method="remapbil" target_grid="r180x90" # Get filename without extension filename=$(basename -- "$datafile") extension="${filename##*.}" filename="${filename%.*}" # Filenames diff_file=${tmp_dir}/${filename}_${varname}_diff.nc clim_file=${tmp_dir}/${filename}_${varname}_clim.nc interp_file=${tmp_dir}/${filename}_${varname}_clim_${target_grid}.nc # Make climatology MAKE_CLIM=1 if [ ${MAKE_CLIM} -gt 0 ] then echo " Make climatology to ${clim_file} " ${cdo} yseasmean -seldate,${date1},${date2} -selname,${varname} ${datafile} ${clim_file} else echo " Not making climatology " ${cdo} seldate,${date1},${date2} -selname,${varname} ${datafile} ${clim_file} fi # Interpolate data to coarse grid echo " Interpolate data to ${interp_file}" echo "${cdo} -sellonlatbox,-180,180,-90,90 -${interp_method},${target_grid} ${clim_file} ${interp_file}" ${cdo} -sellonlatbox,-180,180,-90,90 -${interp_method},${target_grid} -chname,${obs_name},${newname} ${clim_file} ${interp_file} if [ $ii -gt 0 ] then # For model data, convert units conv_file=${interp_file}_conv ${cdo} -O addc,${model_off} ${interp_file} ${conv_file} ${cdo} -O mulc,${model_frac} ${conv_file} ${interp_file} rm -rf ${conv_file} fi # Add final file to list of files to analyse interp_files+=(${interp_file}) done echo ${interp_files[*]} COMPARE=1 if [ $COMPARE -gt 0 ] then # Compare files echo " Compare ${interp_files[1]} and ${interp_files[0]} " ${cdo} sub ${interp_files[1]} ${interp_files[0]} ${diff_file} fi diagnose_and_plot_openifs.sh 0 → 100644 +33 −0 Original line number Diff line number Diff line #!/bin/bash module load anaconda2 PRECIP=0 UVEL=1 if [ $PRECIP -gt 0 ] then DATE1=1997-01-01T00:00:00 DATE2=2010-01-01T00:00:00 PRECIP_OBS=/gfs1/work/shkjocke/data/OBS/gpcp_v01r03_monthly_1996-2017.nc4 PRECIP_OBS_NAME=precip PRECIP_MODEL=/gfs1/work/shkjocke/data/OIFS/T159L91/g001/NETCDF/g001_surface_diff_1m_1982-2009.nc PRECIP_MODEL_NAME=TP ./diag_openifs.sh ${PRECIP_OBS} ${PRECIP_OBS_NAME} ${PRECIP_MODEL} ${PRECIP_MODEL_NAME} 0 4000 ${DATE1} ${DATE2} python plot_diags.py --file=tmp/g001_surface_diff_1m_1982-2009_TP_diff.nc --vars=TP --diff python plot_diags.py --file=tmp/g001_surface_diff_1m_1982-2009_TP_clim_r180x90.nc --vars=TP --file2=tmp/gpcp_v01r03_monthly_1996-2017_precip_clim_r180x90.nc fi if [ $UVEL -gt 0 ] then DATE1=1982-01-01T00:00:00 DATE2=2010-01-01T00:00:00 UVEL_OBS=/gfs1/work/shkjocke/data/OBS/ERAinterim/uas.reana.eraint.1979-2017.r144x73.nc UVEL_OBS_NAME=uas UVEL_MODEL=/gfs1/work/shkjocke/data/OIFS/T159L91/g001/NETCDF/g001_surface_1m_1982-2009.nc UVEL_MODEL_NAME=U10M ./diag_openifs.sh ${UVEL_OBS} ${UVEL_OBS_NAME} ${UVEL_MODEL} ${UVEL_MODEL_NAME} 0 1 ${DATE1} ${DATE2} python plot_diags.py --file=tmp/g001_surface_1m_1982-2009_U10M_diff.nc --vars=${UVEL_MODEL_NAME} --diff fi oifs/ifs2nc_10.sh 0 → 100644 +146 −0 Original line number Diff line number Diff line #!/bin/bash # # ifs2nc.sh # # Purpose # ------- # Convert OpenIFS output to netCDF files # # Methods # ------- # * Use CDO to split the ICMSH and ICMGG files # to one file vertical coordinate, e.g. hybrid, surface etc. # As surface variables are always stored as GRIB1 and hybrid variables # are always stored as GRIB2, this will not cause problems for CDO. # It may be necessary to split according to GRIB edition sometime later # * Use CDO to convert from reduced Gaussian or spectral to regular Gaussian grid # * Use CDO to convert the grib file to netCDF4/HDF5. # Level of compression set by CDO_ZIP variable. # # History # ------- # 2018-07-27: J. Kjellsson. First version # # # Here we need CDO and grib module load cdo GRIB_API_DIR=/sw/rhel6-x64/grib_api/grib_api-1.15.0-intel14/ grib_copy=${GRIB_API_DIR}/bin/grib_copy OIFS_OUTPUT_DIR=$5 OIFS_TMP_DIR=./TMP-IFS2NC/ OIFS_PROCESSED_DIR=$4 OIFS_EXPID=$6 OIFS_FIRSTSTEP=$1 OIFS_LASTSTEP=$2 OIFS_STEP=$3 mkdir -vp $OIFS_TMP_DIR mkdir -vp $OIFS_PROCESSED_DIR # Level of zipping (1-9). 0 means no zipping. # This can reduce the data size by up to 90% # It adds a little bit to the processing time. CDO_ZIP=1 echo " Process data from ${OIFS_FIRSTSTEP} to ${OIFS_LASTSTEP} " for (( step=${OIFS_FIRSTSTEP} ; step<=${OIFS_LASTSTEP} ; step=step+${OIFS_STEP} )) do timestamp=$( printf "%010d" $step ) # We split the data into one file per vertical grid # For writing in netCDF files, I think it makes no sense # to put data on different grids into the same file. # # Each netCDF file will have either surface, hybrid or isobaricInhPa as vert. coord # Other types could be depthBelowLandLayer for typeOfLevel in surface isobaricInhPa hybrid do FILES_ARRAY=() FILES_STRING=" " for GRID in SH GG do ORIGINAL_FILE=ICM${GRID}${OIFS_EXPID}+${timestamp} echo " Get data to ${OIFS_TMP_DIR}/${ORIGINAL_FILE}_${typeOfLevel}.grb " $grib_copy -w typeOfLevel=$typeOfLevel ${OIFS_OUTPUT_DIR}/${ORIGINAL_FILE} ${OIFS_TMP_DIR}/${ORIGINAL_FILE}_${typeOfLevel}.grb LEVEL_FILE=${OIFS_TMP_DIR}/${ORIGINAL_FILE}_${typeOfLevel}.grb REGULAR_FILE=${OIFS_TMP_DIR}/${ORIGINAL_FILE}_${typeOfLevel}_R.grb NETCDF_FILE=${OIFS_TMP_DIR}/${ORIGINAL_FILE}_${typeOfLevel}_R.nc if [ -e ${LEVEL_FILE} ]; then if [ $GRID == "GG" ]; then echo " Convert ${LEVEL_FILE} to ${REGULAR_FILE} (reduced Gaussian to regular Gaussian)" cdo -O setgridtype,regular ${LEVEL_FILE} ${REGULAR_FILE} elif [ $GRID == "SH" ]; then echo " Convert ${LEVEL_FILE} to ${REGULAR_FILE} (spectral harmonics to regular Gaussian)" cdo -O sp2gpl ${LEVEL_FILE} ${REGULAR_FILE} fi rm -f ${LEVEL_FILE} # Using grib_to_netcdf to convert data has the advantage of retaining variable names # but does not get the correct longitudes and latitudes. It also does not put a,b coeffs # into files with hybrid coordinates. #OPTS="-u time -k 3 -D NC_FLOAT" #grib_to_netcdf ${OPTS} ${REGULAR_FILE} -o ${NETCDF_FILE} # CDO OPTS="-t ecmwf -f nc4c -z zip_1 copy -sellonlatbox,-180,180,-90,90" cdo ${OPTS} ${REGULAR_FILE} ${NETCDF_FILE} rm -f ${REGULAR_FILE} #if [ $CDO_ZIP -gt 0 ]; then # cdo -O -f nc4c -z zip_1 copy ${NETCDF_FILE} ${NETCDF_FILE}4 # rm ${NETCDF_FILE}; mv ${NETCDF_FILE}4 ${NETCDF_FILE} #fi dtime=$(cdo -s showtimestamp ${NETCDF_FILE}) dtime2=$(echo $dtime | sed -r 's/T/ /g') DTIME=$(date -d "$dtime2" "+%F_%H%M%S") echo " Datetime for file ${NETCDF_FILE} is ${DTIME} " FILES_ARRAY+=(${NETCDF_FILE}) FILES_STRING=${FILES_STRING}" "${NETCDF_FILE} else echo " No file named ${LEVEL_FILE}. No such data in your output? " fi done GLUED_FILE=${OIFS_PROCESSED_DIR}/${OIFS_EXPID}_${typeOfLevel}_${DTIME}.nc echo " Glue files $FILES_STRING to one file $GLUED_FILE " cdo -O merge $FILES_STRING $GLUED_FILE rm -f ${FILES_STRING} if [ "${typeOfLevel}" == "hybrid" ]; then ZH_FILE=${OIFS_PROCESSED_DIR}/${OIFS_EXPID}_${typeOfLevel}_zh_${DTIME}.nc NEW_GLUED_FILE=${OIFS_PROCESSED_DIR}/${OIFS_EXPID}_${typeOfLevel}_with_zh_${DTIME}.nc echo " Calculate geopotential height from hybrid data " ${NEW_GLUED_FILE} cdo -O geopotheight -chname,lnsp,lsp ${GLUED_FILE} ${ZH_FILE} cdo -O merge ${GLUED_FILE} ${ZH_FILE} ${NEW_GLUED_FILE} rm ${GLUED_FILE} ${ZH_FILE} mv ${NEW_GLUED_FILE} ${GLUED_FILE} PLEV_FILE=${OIFS_PROCESSED_DIR}/${OIFS_EXPID}_plevel_${DTIME}.nc STD_LEVELS="100000,92500,85000,77500,70000,60000,50000,40000,30000,25000,20000,15000," STD_LEVELS=${STD_LEVELS}"10000,7000,5000,3000,2000,1000,700,500,300,200,100,50,20,10,5,2,1 " echo " Interpolate to pressure levels ${PLEV_FILE} " cdo -O ml2pl,${STD_LEVELS} ${GLUED_FILE} ${PLEV_FILE} fi done done Loading
calculate_trends.py 0 → 100644 +401 −0 Original line number Diff line number Diff line # # # Calculate spatial trends from (t,y,x) data # # import time, sys import argparse import numpy as np from scipy import stats from netCDF4 import Dataset, num2date, date2num import multiprocessing as mp def calc_spatial_trend_masked(data_list,threshold=0.3): """ Calculates spatial trends from a masked array of shape (time,y,x) """ vt = data_list[0] data = data_list[1] slope = np.ma.zeros((data.shape[1],data.shape[2])) sign = np.ma.zeros(slope.shape) corr = np.ma.zeros(slope.shape) n_threshold = data.shape[0] * threshold for i in xrange(0,data.shape[2]): for j in xrange(0,data.shape[1]): ## Masked vector tmp = data[:,j,i] n = tmp.compressed().shape[0] ## Make vx and vy from valid data vx = vt[~tmp.mask] vy = tmp[~tmp.mask] if (n >= n_threshold): k, m, r, p, sig = stats.linregress(vx,vy) slope[j,i] = k sign[j,i] = p corr[j,i] = r else: slope[j,i] = np.ma.masked sign[j,i] = np.ma.masked corr[j,i] = np.ma.masked return slope,sign,corr def calc_spatial_trend_masked_parallel(data,vt=np.array([]),ncpu=-1): """ Calculates spatial trends from a masked array of shape (time,y,x) """ t0 = time.time() if ncpu == -1: ncpu = mp.cpu_count() ## Spawn threads p = mp.Pool(ncpu) nump = ncpu #p._processes slope = np.ma.zeros((data.shape[1],data.shape[2])) sign = np.ma.zeros(slope.shape) corr = np.ma.zeros(slope.shape) if (vt.shape[0] < 1): vt = np.arange(0,data.shape[0]) ## ## Split array into ncpu smaller arrays and put ## them in a list ## nx = data.shape[2] ## number of columns in data ## Find number of tasks in Pool ntask = ncpu #p._processes ## Split array into smaller arrays of size (nt,ny,10) ## Each task will make calculations on one array and then go ## on the the next ## ## More sub-arrays gives better load-balance but also more overhead. ## For 362 i points, 30-40 sub-arrays seems good. s = np.array( np.ceil( np.arange(0,nx,10) ),dtype='int32' ) ##start i for sub arrays e = np.array( np.append(s[1:],nx) ,dtype='int32') ## end i for sub arrays ## List with all sub-arrays l = [] for j in xrange(0,s.shape[0]): l.append([vt,data[:,:,s[j]:e[j]]]) ## Let all workers in Pool do calculations asynchronously using routine ## calc_spatial_trend_masked ## ## Results are returned to a list results = p.map_async(calc_spatial_trend_masked,l) result = results.get() ## Put results from sub-arrays into full-size arrays for j in xrange(0,s.shape[0]): k,m,r = result[j] slope[:,s[j]:e[j]] = k sign[:,s[j]:e[j]] = m corr[:,s[j]:e[j]] = r p.terminate() p.join() t1 = time.time() print(' calc_spatial_trend_masked_parallel in ',t1-t0,' sec on ',nump,' threads ') return slope,sign,corr if 1: # Parse input arguments line1 = ' --------------------------------------------- ' ## Parse input arguments parser = argparse.ArgumentParser(description='Calculate linear trends of geospatial data') parser.add_argument('--infile', dest='infile', action='store', default='data.nc',\ help='Name of file (must be netCDF)') parser.add_argument('--outfile', dest='outfile', action='store', default='trends.nc',\ help='Name of output file (will be netCDF)') parser.add_argument('--names', dest='names', action='store', default='all',\ help='Variable names (comma separated)') parser.add_argument('--method', dest='method', action='store', default='linregress',\ help='Method used (linregress)') parser.add_argument('--time_dmn', dest='time_dmn', action='store', default='time',\ help='Name of time dimension') parser.add_argument('--time_var', dest='time_var', action='store', default='time',\ help='Name of time variable') parser.add_argument('--nthreads', dest='ncpu', action='store', default=1) args = parser.parse_args() # # Open input file # nc1 = Dataset(args.infile,'r') # # Find time dimension # time_names = ['t','time_counter'] # some possible names of time dimension if args.time_dmn in nc1.dimensions.keys(): print(' Found time dimension %s ' % (args.time_dmn,)) else: # search dimension names for common names of time for time_name in time_names: if time_name in nc1.dimensions.keys(): args.time_dmn = time_name print(' Found time dimension %s ' % (time_name,) ) if args.time_dmn not in nc1.dimensions.keys(): print(' Could not find time dimension! ') print(' Please give name of time dimension with --time_dmn= argument' ) sys.exit() # # Now we find the variables to calculate trends for # varlist = [] if args.names == 'all': # Find all 3D variables with time as left-most dimension allvars = nc1.variables.keys() for var in allvars: if nc1.variables[var].ndim == 3 and nc1.variables[var].dimensions[0] == args.time_dmn: varlist.append(var) else: # Or use the comma-separated list of variable names varlist = args.names.split(',') # Find all time-invariant variables, and assume these are coordinate variables coordvars = [] for var in nc1.variables.keys(): if (args.time_dmn not in nc1.variables[var].dimensions and nc1.variables[var].ndim < 3) \ or (args.time_dmn in nc1.variables[var].dimensions and nc1.variables[var].ndim == 1) : coordvars.append(var) # Find time variable # Typically, it is called time, but in NEMO it is often named time_counter. # The name of time variable can also be given as an argument time_coord = None if args.time_var in nc1.variables.keys(): time_coord = nc1.variables[args.time_var] else: time_names = ['time','time_counter'] for time_name in time_names: if time_name in nc1.variables.keys(): time_coord = nc1.variables[time_name] args.time_var = time_name if time_coord == None: print(' Did not find a time coordinate ') print(' I will assume data is equally spaced in time ') else: print(' Found time coordinate: %s ' % (args.time_var,)) print(' From ',num2date(time_coord[0:1],units=time_coord.units,calendar=time_coord.calendar)) print(' To ',num2date(time_coord[-1:],units=time_coord.units,calendar=time_coord.calendar)) print(' Found coordinate variables: ') for var in coordvars: print(' %s ' % (var,)) print(' Calculate trends for variables: ') for var in varlist: print(' %s ' % (var,)) # Get settings from input file ndims = len(nc1.dimensions) dim_names = nc1.dimensions.keys() nattrs = len(nc1.ncattrs()) att_names = nc1.ncattrs() nvars = len(nc1.variables.keys())#len(varlist) var_names = nc1.variables.keys()#varlist # Create the output file print(' Create dimensions in output file... ') nc2 = Dataset(args.outfile,'w') dim_ids = [] outdim_ids = [] outdim_names = [] lverbose = False for i in range(0,ndims): ## Loop over dimensions in python order ii = ndims - i - 1 ## Dimensions in Fortran order #dimname = dim_names[ii] #dim_ids.append(nc1.dimensions[dim_names[ii]]) dimname = dim_names[i] dim_ids.append(nc1.dimensions[dim_names[i]]) outdim_names.append(dimname) # Make only one time step in output file if dimname == args.time_dmn: outdim_ids.append( nc2.createDimension(dimname,1) ) else: if dim_ids[i].isunlimited(): outdim_ids.append( nc2.createDimension(dimname,None) ) else: dimlen = len(dim_ids[i]) outdim_ids.append( nc2.createDimension(dimname,dimlen) ) if (lverbose): print(' Created dimension ',outdim_ids[i],outdim_names[i]) ## ## Copy file attributes ## print(' Copy file attributes... ') for i in range(0,nattrs): att = getattr(nc1,att_names[i]) if (lverbose): print('Copying attribute ',att) nc2.setncattr(att_names[i],att) if att_names[i] == 'history': if lverbose: print(' Reset history attribute') nc2.setncattr('history',' ') #nc2.setncattr('trends',outfile) ## ## Copy variable definitions and attributes ## print(' Copy variable definitions and attributes ') kvar_ids = [] pvar_ids = [] rvar_ids = [] for i in range(0,nvars): varname = var_names[i] kvarname = var_names[i] + '_k' pvarname = var_names[i] + '_p' rvarname = var_names[i] + '_r' var_id = nc1.variables[varname] prec = var_id.dtype dims = var_id.dimensions if varname in varlist: ## Create variable kvar_ids.append( nc2.createVariable(kvarname,'float32',dims) ) pvar_ids.append( nc2.createVariable(pvarname,'float32',dims) ) rvar_ids.append( nc2.createVariable(rvarname,'float32',dims) ) ## Find and copy attributes n_varatt = len(var_id.ncattrs()) var_att_names = var_id.ncattrs() for j in range(0,n_varatt): att = getattr(var_id,var_att_names[j]) kvar_ids[i].setncattr(var_att_names[j],att) if (lverbose): print(' Created variables: ') print(kvar_ids[i]) print(pvar_ids[i]) print(rvar_ids[i]) elif varname in coordvars: ## Create variable ovar_id = nc2.createVariable(varname,prec,dims) ## Find and copy attributes n_varatt = len(var_id.ncattrs()) var_att_names = var_id.ncattrs() for j in range(0,n_varatt): att = getattr(var_id,var_att_names[j]) ovar_id.setncattr(var_att_names[j],att) if (args.time_dmn in dims) and len(dims) == 1: ovar_id[0] = var_id[0] elif (args.time_dmn in dims) and len(dims) > 1: ovar_id[0,:] = var_id[0,:] else: ovar_id[:] = var_id[:] if lverbose: print(' Created variable ',ovar_id,varname) if varname not in varlist: kvar_ids.append(None) pvar_ids.append(None) rvar_ids.append(None) ## ## Read in data from domain file and put in output ## vmin = np.array([]) vmax = np.array([]) if 1: #for var in varlist: for ivar in range(0,nvars): var = var_names[ivar] if var in varlist: t0 = time.time() data = nc1.variables[var][:,:,:] t1 = time.time() print(' Read %s in %4f seconds ' % (var,t1-t0)) if time_coord == None: print(' Assume a time coordinate ') vt = np.arange(0,data.shape[0]) else: print(' Use time coord from file: %s ' % (args.time_dmn,)) vt = time_coord[:] data = np.ma.array(data) k,p,r = calc_spatial_trend_masked_parallel(data,vt=vt,ncpu=args.ncpu) #var_ids.append( nc1.variables[var] ) #nvardims = len(var_ids[ivar].dimensions) ## Calculate variable min/max vmin = np.ma.min(k) vmax = np.ma.max(k) kvar_ids[ivar].valid_min = vmin kvar_ids[ivar].valid_max = vmax pvar_ids[ivar].valid_min = 0. pvar_ids[ivar].valid_max = 1. rvar_ids[ivar].valid_min = -1. rvar_ids[ivar].valid_max = 1. print('min max') print(kvar_ids[ivar].valid_min,kvar_ids[ivar].valid_max) print(pvar_ids[ivar].valid_min,pvar_ids[ivar].valid_max) print(rvar_ids[ivar].valid_min,rvar_ids[ivar].valid_max) kvar_ids[ivar][0,:,:] = k[:] pvar_ids[ivar][0,:,:] = p[:] rvar_ids[ivar][0,:,:] = r[:] nc2.close()
diag_openifs.sh 0 → 100644 +92 −0 Original line number Diff line number Diff line #!/bin/bash # # Diagnostics of OpenIFS run # # Load modules module load cdo/1.9.2 module load nco obs_file=$1 obs_name=$2 model_file=$3 model_name=$4 model_off=$5 model_frac=$6 date1=$7 date2=$8 echo "dates " $date1 $date2 tmp_dir="tmp" mkdir -p ${tmp_dir} names=(${obs_name} ${model_name}) files=(${obs_file} ${model_file}) interp_files=() newname=${model_name} cdo="cdo" for (( ii=0 ; ii<=1 ; ii++ )) do datafile=${files[$ii]} varname=${names[$ii]} echo " Make climatology and regrid ${varname} in ${datafile}" interp_method="remapbil" target_grid="r180x90" # Get filename without extension filename=$(basename -- "$datafile") extension="${filename##*.}" filename="${filename%.*}" # Filenames diff_file=${tmp_dir}/${filename}_${varname}_diff.nc clim_file=${tmp_dir}/${filename}_${varname}_clim.nc interp_file=${tmp_dir}/${filename}_${varname}_clim_${target_grid}.nc # Make climatology MAKE_CLIM=1 if [ ${MAKE_CLIM} -gt 0 ] then echo " Make climatology to ${clim_file} " ${cdo} yseasmean -seldate,${date1},${date2} -selname,${varname} ${datafile} ${clim_file} else echo " Not making climatology " ${cdo} seldate,${date1},${date2} -selname,${varname} ${datafile} ${clim_file} fi # Interpolate data to coarse grid echo " Interpolate data to ${interp_file}" echo "${cdo} -sellonlatbox,-180,180,-90,90 -${interp_method},${target_grid} ${clim_file} ${interp_file}" ${cdo} -sellonlatbox,-180,180,-90,90 -${interp_method},${target_grid} -chname,${obs_name},${newname} ${clim_file} ${interp_file} if [ $ii -gt 0 ] then # For model data, convert units conv_file=${interp_file}_conv ${cdo} -O addc,${model_off} ${interp_file} ${conv_file} ${cdo} -O mulc,${model_frac} ${conv_file} ${interp_file} rm -rf ${conv_file} fi # Add final file to list of files to analyse interp_files+=(${interp_file}) done echo ${interp_files[*]} COMPARE=1 if [ $COMPARE -gt 0 ] then # Compare files echo " Compare ${interp_files[1]} and ${interp_files[0]} " ${cdo} sub ${interp_files[1]} ${interp_files[0]} ${diff_file} fi
diagnose_and_plot_openifs.sh 0 → 100644 +33 −0 Original line number Diff line number Diff line #!/bin/bash module load anaconda2 PRECIP=0 UVEL=1 if [ $PRECIP -gt 0 ] then DATE1=1997-01-01T00:00:00 DATE2=2010-01-01T00:00:00 PRECIP_OBS=/gfs1/work/shkjocke/data/OBS/gpcp_v01r03_monthly_1996-2017.nc4 PRECIP_OBS_NAME=precip PRECIP_MODEL=/gfs1/work/shkjocke/data/OIFS/T159L91/g001/NETCDF/g001_surface_diff_1m_1982-2009.nc PRECIP_MODEL_NAME=TP ./diag_openifs.sh ${PRECIP_OBS} ${PRECIP_OBS_NAME} ${PRECIP_MODEL} ${PRECIP_MODEL_NAME} 0 4000 ${DATE1} ${DATE2} python plot_diags.py --file=tmp/g001_surface_diff_1m_1982-2009_TP_diff.nc --vars=TP --diff python plot_diags.py --file=tmp/g001_surface_diff_1m_1982-2009_TP_clim_r180x90.nc --vars=TP --file2=tmp/gpcp_v01r03_monthly_1996-2017_precip_clim_r180x90.nc fi if [ $UVEL -gt 0 ] then DATE1=1982-01-01T00:00:00 DATE2=2010-01-01T00:00:00 UVEL_OBS=/gfs1/work/shkjocke/data/OBS/ERAinterim/uas.reana.eraint.1979-2017.r144x73.nc UVEL_OBS_NAME=uas UVEL_MODEL=/gfs1/work/shkjocke/data/OIFS/T159L91/g001/NETCDF/g001_surface_1m_1982-2009.nc UVEL_MODEL_NAME=U10M ./diag_openifs.sh ${UVEL_OBS} ${UVEL_OBS_NAME} ${UVEL_MODEL} ${UVEL_MODEL_NAME} 0 1 ${DATE1} ${DATE2} python plot_diags.py --file=tmp/g001_surface_1m_1982-2009_U10M_diff.nc --vars=${UVEL_MODEL_NAME} --diff fi
oifs/ifs2nc_10.sh 0 → 100644 +146 −0 Original line number Diff line number Diff line #!/bin/bash # # ifs2nc.sh # # Purpose # ------- # Convert OpenIFS output to netCDF files # # Methods # ------- # * Use CDO to split the ICMSH and ICMGG files # to one file vertical coordinate, e.g. hybrid, surface etc. # As surface variables are always stored as GRIB1 and hybrid variables # are always stored as GRIB2, this will not cause problems for CDO. # It may be necessary to split according to GRIB edition sometime later # * Use CDO to convert from reduced Gaussian or spectral to regular Gaussian grid # * Use CDO to convert the grib file to netCDF4/HDF5. # Level of compression set by CDO_ZIP variable. # # History # ------- # 2018-07-27: J. Kjellsson. First version # # # Here we need CDO and grib module load cdo GRIB_API_DIR=/sw/rhel6-x64/grib_api/grib_api-1.15.0-intel14/ grib_copy=${GRIB_API_DIR}/bin/grib_copy OIFS_OUTPUT_DIR=$5 OIFS_TMP_DIR=./TMP-IFS2NC/ OIFS_PROCESSED_DIR=$4 OIFS_EXPID=$6 OIFS_FIRSTSTEP=$1 OIFS_LASTSTEP=$2 OIFS_STEP=$3 mkdir -vp $OIFS_TMP_DIR mkdir -vp $OIFS_PROCESSED_DIR # Level of zipping (1-9). 0 means no zipping. # This can reduce the data size by up to 90% # It adds a little bit to the processing time. CDO_ZIP=1 echo " Process data from ${OIFS_FIRSTSTEP} to ${OIFS_LASTSTEP} " for (( step=${OIFS_FIRSTSTEP} ; step<=${OIFS_LASTSTEP} ; step=step+${OIFS_STEP} )) do timestamp=$( printf "%010d" $step ) # We split the data into one file per vertical grid # For writing in netCDF files, I think it makes no sense # to put data on different grids into the same file. # # Each netCDF file will have either surface, hybrid or isobaricInhPa as vert. coord # Other types could be depthBelowLandLayer for typeOfLevel in surface isobaricInhPa hybrid do FILES_ARRAY=() FILES_STRING=" " for GRID in SH GG do ORIGINAL_FILE=ICM${GRID}${OIFS_EXPID}+${timestamp} echo " Get data to ${OIFS_TMP_DIR}/${ORIGINAL_FILE}_${typeOfLevel}.grb " $grib_copy -w typeOfLevel=$typeOfLevel ${OIFS_OUTPUT_DIR}/${ORIGINAL_FILE} ${OIFS_TMP_DIR}/${ORIGINAL_FILE}_${typeOfLevel}.grb LEVEL_FILE=${OIFS_TMP_DIR}/${ORIGINAL_FILE}_${typeOfLevel}.grb REGULAR_FILE=${OIFS_TMP_DIR}/${ORIGINAL_FILE}_${typeOfLevel}_R.grb NETCDF_FILE=${OIFS_TMP_DIR}/${ORIGINAL_FILE}_${typeOfLevel}_R.nc if [ -e ${LEVEL_FILE} ]; then if [ $GRID == "GG" ]; then echo " Convert ${LEVEL_FILE} to ${REGULAR_FILE} (reduced Gaussian to regular Gaussian)" cdo -O setgridtype,regular ${LEVEL_FILE} ${REGULAR_FILE} elif [ $GRID == "SH" ]; then echo " Convert ${LEVEL_FILE} to ${REGULAR_FILE} (spectral harmonics to regular Gaussian)" cdo -O sp2gpl ${LEVEL_FILE} ${REGULAR_FILE} fi rm -f ${LEVEL_FILE} # Using grib_to_netcdf to convert data has the advantage of retaining variable names # but does not get the correct longitudes and latitudes. It also does not put a,b coeffs # into files with hybrid coordinates. #OPTS="-u time -k 3 -D NC_FLOAT" #grib_to_netcdf ${OPTS} ${REGULAR_FILE} -o ${NETCDF_FILE} # CDO OPTS="-t ecmwf -f nc4c -z zip_1 copy -sellonlatbox,-180,180,-90,90" cdo ${OPTS} ${REGULAR_FILE} ${NETCDF_FILE} rm -f ${REGULAR_FILE} #if [ $CDO_ZIP -gt 0 ]; then # cdo -O -f nc4c -z zip_1 copy ${NETCDF_FILE} ${NETCDF_FILE}4 # rm ${NETCDF_FILE}; mv ${NETCDF_FILE}4 ${NETCDF_FILE} #fi dtime=$(cdo -s showtimestamp ${NETCDF_FILE}) dtime2=$(echo $dtime | sed -r 's/T/ /g') DTIME=$(date -d "$dtime2" "+%F_%H%M%S") echo " Datetime for file ${NETCDF_FILE} is ${DTIME} " FILES_ARRAY+=(${NETCDF_FILE}) FILES_STRING=${FILES_STRING}" "${NETCDF_FILE} else echo " No file named ${LEVEL_FILE}. No such data in your output? " fi done GLUED_FILE=${OIFS_PROCESSED_DIR}/${OIFS_EXPID}_${typeOfLevel}_${DTIME}.nc echo " Glue files $FILES_STRING to one file $GLUED_FILE " cdo -O merge $FILES_STRING $GLUED_FILE rm -f ${FILES_STRING} if [ "${typeOfLevel}" == "hybrid" ]; then ZH_FILE=${OIFS_PROCESSED_DIR}/${OIFS_EXPID}_${typeOfLevel}_zh_${DTIME}.nc NEW_GLUED_FILE=${OIFS_PROCESSED_DIR}/${OIFS_EXPID}_${typeOfLevel}_with_zh_${DTIME}.nc echo " Calculate geopotential height from hybrid data " ${NEW_GLUED_FILE} cdo -O geopotheight -chname,lnsp,lsp ${GLUED_FILE} ${ZH_FILE} cdo -O merge ${GLUED_FILE} ${ZH_FILE} ${NEW_GLUED_FILE} rm ${GLUED_FILE} ${ZH_FILE} mv ${NEW_GLUED_FILE} ${GLUED_FILE} PLEV_FILE=${OIFS_PROCESSED_DIR}/${OIFS_EXPID}_plevel_${DTIME}.nc STD_LEVELS="100000,92500,85000,77500,70000,60000,50000,40000,30000,25000,20000,15000," STD_LEVELS=${STD_LEVELS}"10000,7000,5000,3000,2000,1000,700,500,300,200,100,50,20,10,5,2,1 " echo " Interpolate to pressure levels ${PLEV_FILE} " cdo -O ml2pl,${STD_LEVELS} ${GLUED_FILE} ${PLEV_FILE} fi done done