Commit d055a51d authored by Jean Matthieu Haussaire's avatar Jean Matthieu Haussaire
Browse files

Refactor the code so it works for both empa and nilu setup. Create a new...

Refactor the code so it works for both empa and nilu setup. Create a new background for the rödenbeck scheme but the job is actually done in the obsvect
parent 6a4c1da5
from __future__ import absolute_import
import os
from types import MethodType
from pycif.utils import path
from pycif.utils.check import verbose
from .read_conc import read_conc
from .calc_init import calc_init
requirements = {'domain': {'name': 'FLEXPART', 'version': 'std', 'empty': False}}
import os
import pandas as pd
import xarray as xr
import numpy as np
from datetime import datetime, timedelta
import pycif.utils.check as check
from pycif.utils.netcdf import readnc
from pycif.utils.dates import j2d
def calc_init(self, model, dir_initconc, file_initconc, **kwargs):
""" Calculate background concentration contribution to observations
Args:
self: the background plugin
model: the model plugin
"""
subsimu_dates = model.subsimu_dates
path = os.path.join(dir_initconc,file_initconc)
file_baseline = subsimu_dates[0].strftime(path)
#'/store/empa/em05/projects/VERIFY/TM5-4DVAR/BASELINES_ECMWF/data/TM5-to-FLE_v1_CH4_%Y_stations.nc'
with Dataset(file_baseline) as bkg:
#find the station index in the baseline file
ids = [''.join([c.decode().lower() for c in name]) for name in bkg['identification'][:]]
station_index = np.argmax([station in name for name in ids])
# find the time index in the baseline file
ts = np.array([pd.Timestamp(t[0],t[1],t[2],t[3],t[4]) for t in bkg['time1'][:]])
try:
time_index = np.where(ts == row.Index)[0][0]
obs_bkg[obs_i] = bkg['CH4_c_trans'][time_index,station_index]
except IndexError:
print('HJM ERROR: time not found', row.Index)
#HJM TODO obs_bkgerr ?
obs_bkgerr[obs_i] = 0
import os
import pandas as pd
import xarray as xr
import numpy as np
from datetime import datetime, timedelta
import pycif.utils.check as check
from pycif.utils.netcdf import readnc
from pycif.utils.dates import j2d
def read_conc(self, model, dir_initconc, file_initconc, **kwargs):
pass
......@@ -9,3 +9,4 @@ from . import flexpart
from pycif.utils.classes.background import Background
Background.register_plugin('FLEXPART', 'nc', flexpart)
Background.register_plugin('TM5-4DVAR', 'nc', flexpart)
......@@ -77,9 +77,9 @@ def read_flexpart_grid(subdir, file_name, fp_header, **kwargs):
grid = np.ndarray((fp_header.numx, fp_header.numy, fp_header.maxngrid))
path_file = os.path.join(subdir, file_name)
#HJM
if False:
if '.nc' in file_name:
grid_fp, ngrid, gtime_dt = read_grid_nc(path_file, file_dates, fp_header)
else:
grid_fp, ngrid, gtime = mod_flexpart.read_grid(
path_file, file_dates, fp_header.numx, fp_header.numy, \
fp_header.maxngrid, fp_header.xshift, fp_header.ndgrid, fp_header.trajdays)
......@@ -92,10 +92,6 @@ def read_flexpart_grid(subdir, file_name, fp_header, **kwargs):
if gtime[i] == 0.:
break
gtime_dt.append(j2d(gtime[i]))
else:
print('HJM', path_file, file_dates, fp_header.numx, fp_header.numy,
fp_header.maxngrid, fp_header.xshift, fp_header.ndgrid, fp_header.trajdays)
grid_fp, ngrid, gtime_dt = read_grid_nc(path_file, file_dates, fp_header)
# Convert from ppt to ppmv or ppbv
......@@ -118,27 +114,24 @@ def read_flexpart_grid(subdir, file_name, fp_header, **kwargs):
return grid_fp, gtime_dt, ngrid
def read_grid_nc(path_file, file_dates, fp_header):
print('HJM reading nc')
# grid_fp, ngrid, gtime = mod_flexpart.read_grid(
# path_file, file_dates, fp_header.numx, fp_header.numy, \
# fp_header.maxngrid, fp_header.xshift, fp_header.ndgrid, fp_header.trajdays)
with Dataset(path_file) as nc:
#spec001 has dimension nageclass, numpoint, time, level, lat, lon
# Numpoint determines the station
#TODO add station pick
# For now, 0
grid = nc['spec001'][0,0,:,0,:,:] # time, lat, lon
# swap axes to get it to lon, lat, time,
# Assume that there is 1 numpoint/station per file
shape = nc['spec001'].shape
if shape[0]>0 or shape[1]>0:
verbose('WARNING: There are more than 1 station in the flexpart file.')
if shape[3]>0:
verbose('INFO: Select the bottom layer of the grid')
grid = nc['spec001'][0,0,:,0,:,:] # time, lat, lon
# swap axes to get it to lon, lat, time
grid_fp = np.swapaxes(grid,0,-1)
#iedate = "20120111"
times = np.sort(nc['time'][:])
print(times)
enddate = datetime.strptime(nc.getncattr('iedate'),'%Y%m%d')
gtime = []
for t in times:
gtime.append(enddate + timedelta(seconds=int(t)))
print('HJM done reading')
grid_fp *= 1e12 #This is applied in mod_flexpart
......
......@@ -3,9 +3,6 @@ import datetime
import numpy as np
import os
#HJM
import time
from pycif.utils import path
from pycif.utils.check import verbose
from pycif.utils.datastores.dump import dump_datastore, read_datastore
......@@ -76,41 +73,41 @@ def obsoper(self, inputs, mode,
if mode == 'fwd':
controlvect = inputs
obsvect.datastore.loc[:, 'sim'] = np.nan
# Various initializations
# Get flexpart headers
# subdir = subsimu_dates[0].strftime("%Y%m")
# fp_header_glob = model.utils.flexpart_header.Flexpartheader()
# try:
# fp_header_glob.read_header(
# os.path.join(model.run_dir_glob, obsvect.datastore.head(1)['station'][0].decode().
# upper(),subdir, 'header'))
# except AttributeError as e:
# fp_header_glob.read_header(
# os.path.join(model.run_dir_glob, obsvect.datastore.head(1)['station'][0].
# upper(),subdir, 'header'))
# if model.plugin.nested:
# try:
# fp_header_nest = model.utils.flexpart_header.Flexpartheader()
# fp_header_nest.read_header(
# os.path.join(model.run_dir_nest, obsvect.datastore.head(1)['station'][0].
# decode().upper(), subdir, 'header_nest'))
# except AttributeError as e:
# fp_header_nest.read_header(
# os.path.join(model.run_dir_nest, obsvect.datastore.head(1)['station'][0].
# upper(), subdir, 'header_nest'))
subdir_fmt = getattr(model, "subdir", "%Y%m")
subdir = subsimu_dates[0].strftime(subdir_fmt)
if model.plugin.nested:
fp_header_glob = model.utils.flexpart_header.Flexpartheader()
try:
fp_header_glob.read_header(
os.path.join(model.run_dir_glob, obsvect.datastore.head(1)['station'][0].decode().
upper(),subdir, 'header'))
except AttributeError as e:
fp_header_glob.read_header(
os.path.join(model.run_dir_glob, obsvect.datastore.head(1)['station'][0].
upper(),subdir, 'header'))
header_name = "header_nest"
else:
header_name = "header"
try:
fp_header_nest = model.utils.flexpart_header.Flexpartheader()
fp_header_nest.read_header(
os.path.join(model.run_dir_nest, obsvect.datastore.head(1)['station'][0].
decode().upper(), subdir, header_name))
except AttributeError as e:
fp_header_nest.read_header(
os.path.join(model.run_dir_nest, obsvect.datastore.head(1)['station'][0].
upper(), subdir, header_name))
# Trajectory life time
#trajdays = fp_header_nest.trajdays
fp_header_nest = model.utils.flexpart_header.Flexpartheader()
subdir = subsimu_dates[0].strftime("%Y%m%d")
hjm_path = os.path.join(model.run_dir_nest, obsvect.datastore.head(1)['station'][0].upper(),subdir, 'header')
fp_header_nest.read_header(hjm_path)
# Trajectory life time
trajdays = fp_header_nest.trajdays
......@@ -161,53 +158,86 @@ def obsoper(self, inputs, mode,
print("di, df", di, df)
for obs_i, row in enumerate(obsvect.datastore.itertuples()):
#HJM TODO add station pick
# Or split my grid file into one folder per station
subdir = row.Index.strftime("%Y%m%d")
# Subdirectory for FLEXPART footprints
subdir = row.Index.strftime(subdir_fmt)
# For debugging
obs_check[obs_i] = obs_i
station = row.station
try:
runsubdir_nest = os.path.join(
model.run_dir_nest, station.decode().upper(), subdir)
if model.plugin.nested:
runsubdir_glob = os.path.join(
model.run_dir_glob, station.decode().upper(), subdir)
except AttributeError as e:
runsubdir_nest = os.path.join(
model.run_dir_nest, station.upper(), subdir)
if model.plugin.nested:
runsubdir_glob = os.path.join(
model.run_dir_glob, station.upper(), subdir)
file_fmt = '%Y%m%d%H%M%S'
model_fmt = getattr(model, "format", 'bin')
if model_fmt == 'bin':
file_fmt += '_001'
elif model_fmt == 'nc':
file_fmt += '.nc'
if model.plugin.nested:
file_fmt_glob = 'grid_time_' + file_fmt
file_fmt = 'grid_time_nest_' + file_fmt
else:
file_fmt = 'grid_time_' + file_fmt
#d1 = row.Index+datetime.timedelta(days=1)
#file_name = d1.strftime('grid_time_%Y%m%d%H%M%S.nc')
file_name = row.Index.strftime('grid_time_%Y%m%d%H%M%S.nc')
# Read nested grids
file_name = row.Index.strftime(file_fmt)
runsubdir_nest = os.path.join(model.run_dir_nest, station.upper(),subdir)
# If file is not found, continue
if not os.path.isfile(os.path.join(runsubdir_nest, file_name)):
verbose("HJM WARNING: file not found: " + os.path.join(runsubdir_nest, file_name))
verbose("WARNING: file not found: " + os.path.join(runsubdir_nest, file_name))
continue
grid_nest, gtime_nest, ngrid = model.utils.read.read_flexpart_grid(
grid_nest, gtime, ngrid = model.utils.read.read_flexpart_grid(
runsubdir_nest, file_name, fp_header_nest, numscale=model.numscale)
grid_nest *= model.coeff*model.mmair/model.molarmass
# HJM Get the bkg contribution using the Rödenbeck approach
file_baseline = subsimu_dates[0].strftime('/store/empa/em05/projects/VERIFY/TM5-4DVAR/BASELINES_ECMWF/data/TM5-to-FLE_v1_CH4_%Y_stations.nc')
with Dataset(file_baseline) as bkg:
#find the station index in the baseline file
ids = [''.join([c.decode().lower() for c in name]) for name in bkg['identification'][:]]
station_index = np.argmax([station in name for name in ids])
# find the time index in the baseline file
ts = np.array([pd.Timestamp(t[0],t[1],t[2],t[3],t[4]) for t in bkg['time1'][:]])
try:
time_index = np.where(ts == row.Index)[0][0]
obs_bkg[obs_i] = bkg['CH4_c_trans'][time_index,station_index]
except IndexError:
print('HJM ERROR: time not found', row.Index)
if model.plugin.nested:
# Read global footprints
# TODO read correction factor dry air
file_name = row.Index.strftime(file_fmt_glob)
if not os.path.isfile(os.path.join(runsubdir_glob, file_name)):
verbose("Warning: file not found, ", os.path.join(runsubdir_glob, file_name))
continue
grid_glob, gtime_glob, ngrid_glob = model.utils.read.read_flexpart_grid(
runsubdir_glob, file_name, fp_header_glob, numscale=model.numscale)
grid_glob *= model.coeff*model.mmair/model.molarmass
#HJM TODO obs_bkgerr ?
obs_bkgerr[obs_i] = 0
# Background contribution from fluxes outside domain
hbkg = np.sum(grid_glob[:, :, 0:ngrid_glob], axis=2)
hbkg[ix1:ix2, iy1:iy2] = 0.0
# Transport for boxes in regions
# Index to state vector
ind = np.argmin(np.abs(tracer.dates[0:-1] - gtime_glob[0]))
# TODO: calculate on first iteration ,then store?
obs_bkg[obs_i] = np.sum(hbkg[:,:].T*controlvect.flx_bkg[ind,:,:])
obs_bkgerr[obs_i] = np.sum(hbkg[:,:].T*np.abs(controlvect.flx_bkg[ind,:,:]))*tracer.err
else:
obs_bkg[:] = 0
obs_bkgerr[:] = 0
print('hjm ngrid',ngrid,fp_header_nest.numy,fp_header_nest.numx)
# Transport for boxes in regions
hbox = np.zeros((ngrid,nbox), np.float64)
reg = np.unique(tracer.regions)
for r in reg:
......@@ -215,16 +245,15 @@ def obsoper(self, inputs, mode,
wh = np.where(tracer.regions == r)
hbox[:,r-1] = np.sum( grid_nest[wh[1],wh[0],:ngrid], axis=0)
hbox = hbox.flatten()
print('hjm hbox',np.any(hbox))
# TODO: account for possible difference nested grid/inversion domain
hnest = grid_nest
istate = np.zeros(ngrid, dtype=int) - 1
# Calculate indices to state vector
for i, j in enumerate(gtime_nest):
for i, j in enumerate(gtime):
#if j > df:
# Avoid including the midnight files at period end
if j >= df:
......@@ -241,7 +270,6 @@ def obsoper(self, inputs, mode,
# ntstep: number of inversion intervals covered by the footprints
ntstep = int(np.max(istate[istate > -1]) - np.min(istate[istate > -1]) + 1)
print('hjm nstep',ntstep,nbox)
hx = np.zeros(ntstep*nbox)
px = np.zeros(ntstep*nbox)
......@@ -254,12 +282,41 @@ def obsoper(self, inputs, mode,
px[(ns-1)*nbox:ns*nbox] = controlvect.x[istate[i]*nbox:(istate[i]+1)*nbox]
hx[(ns-1)*nbox:ns*nbox] += hbox[i*nbox:(i+1)*nbox]
print('hjm',np.any(hx),np.any(px))
obs_model[obs_i] = np.dot(hx, px)
# Change in mixing ratio from best guess estimate
# TODO: This can possibly be calculated at first iteration only
obs_ghg[obs_i] = 0.
for i, itim in enumerate(gtime):
ind = np.argmin(np.abs(tracer.dates[0:-1] - itim))
obs_ghg[obs_i] += np.sum(hnest.T[i,:,:] * controlvect.flxall[ind,:,:])
if hasattr(obsvect, 'background'):
cini = np.zeros(obsvect.background.ncini)
for i, name in enumerate(obsvect.background.cini_names):
cini[i] = getattr(row, name)
if obsvect.background.optimize_cini is False:
obs_cinipos[obs_i] = np.sum(cini[:])
else:
# number of initial mixing ratio variables to optimize
ncini = obsvect.background.ncini
# Time step of initial concentration optimization; ni
mask = row.Index - datetime.timedelta(days=trajdays) - controlvect.background.cinitime[:] - datetime.timedelta(days=controlvect.background.cini_res) < datetime.timedelta(0)
ni = int(np.argmax(mask))
# Force last time step if last cini period (mask is all False)
if all(~mask):
ni=controlvect.background.ntcini - 1
obs_cinipos[obs_i] = np.dot(cini[:],
controlvect.x[npvar+ni*ncini:npvar+(ni+1)*ncini])
else:
obs_cinipos[:]=0
if getattr(tracer, 'offsets', False):
......@@ -267,18 +324,13 @@ def obsoper(self, inputs, mode,
obs_sim[obs_i] = obs_model[obs_i] + obs_ghg[obs_i] + obs_bkg[obs_i] + obs_cinipos[obs_i]
else:
# Optimize fluxes
obs_sim[obs_i] = obs_model[obs_i] + obs_bkg[obs_i]
print('hjm sim:', obs_model[obs_i], obs_bkg[obs_i])
obs_sim[obs_i] = obs_model[obs_i] + obs_bkg[obs_i] + obs_cinipos[obs_i]
# calculate gradient in observation space
# Jo'(p) = H^TR^-1(H(p) - y)
# calculate as: Jo'(p) = sum( H_i^T*ydel_i*R_i )
# Contribution to observation gradient from obs_i
print(row)
print(row.obs)
departure = obs_sim[obs_i] - row.obs
# if background missing then observation should have no influence
......
......@@ -5,6 +5,8 @@ import os
import shutil
import numpy as np
from datetime import datetime, timedelta
from netCDF4 import Dataset
from pycif.utils.datastores.dump import dump_datastore, read_datastore
from pycif.plugins.obsvects.standard.utils.tstep import tstep
from pycif.plugins.obsvects.standard.utils.crop_monitor import crop_monitor
......@@ -14,7 +16,70 @@ from pycif.utils import path
from pycif.utils.check.errclass import PluginError
def init_background(obsvect, **kwargs):
if obsvect.background.plugin.name == "FLEXPART":
return init_background_flexinv(obsvect, **kwargs)
elif obsvect.background.plugin.name == "TM5-4DVAR":
return init_background_rodenbeck(obsvect, **kwargs)
def init_background_rodenbeck(obsvect, **kwargs):
model = obsvect.model
background = obsvect.background
varname = background.varname_conc
datei = obsvect.datei
path = os.path.join(background.dir_initconc,background.file_initconc)
file_baseline = datei.strftime(path)
# TODO: manage multiple background files at once
if obsvect.datef.strftime(path) != file_baseline:
print('ERROR: The time period spans multiple background files')
raise ValueError
obs_cini = np.empty(len(obsvect.datastore)); obs_cini[:]=np.nan
# Loop over all observations and calculate background contribution
with Dataset(file_baseline) as bkg:
ids = [''.join([c.decode().lower() for c in name]) for name in bkg['identification'][:]]
ids = np.array(ids)
for obs_i, row in enumerate(obsvect.datastore.itertuples()):
station = row.station
station_indices = np.where([station in name for name in ids])
if len(station_indices[0])==0:
print('ERROR: Station %s not found in the background file %s' % (station,file_baseline))
elif len(station_indices[0])>1:
print('WARNING: More than one station %s found in the background file %s' % (station,file_baseline))
print('\t ',*ids[station_indices[0]])
print('Picking the first one', ids[station_indices[0][0]])
station_index = station_indices[0][0]
# find the time index in the baseline file
ts = np.array([pd.Timestamp(t[0],t[1],t[2],t[3],t[4]) for t in bkg['time1'][:]])
try:
time_index = np.where(ts == row.Index)[0][0]
obs_cini[obs_i] = bkg[varname][time_index,station_index]
except IndexError:
print('WARNING: time not found', row.Index)
obsvect.background.cini_names = ['obs_cini_0']
obsvect.background.ncini = 1
obsvect.datastore['obs_cini_0'] = obs_cini
if background.optimize_cini:
print('ERROR: The Rödenbeck background can not be optimized ')
raise ValueError
return obsvect
def init_background_flexinv(obsvect, **kwargs):
"""Initializes the background
Reads and regrids concentration fields
Reads sensitivity to initial conditions fields
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment