Commit 122d0b9d authored by Espen Sollum's avatar Espen Sollum
Browse files

Optimization of the background

parent 1efc916d
......@@ -62,6 +62,37 @@ model :
molarmass : 16.
numscale : 1.E12
####################################################################
####################################################################
# Measurements to account for
# Main keys are:
# - infos: infos on tracers to include in the simulation
# - fic_monitor: standard txt file including all observations
measurements :
plugin:
name: standard
version: std
# File where to save data, if does not exist. Reads from there if exists
# file_monit : /home/eso/repos/CIF/flextest/monitor_allobs.nc
# file_monitor : /home/eso/repos/CIF/flextest/monitor_allobs.nc
dump_type : nc
species :
CH4 :
dailyPM :
provider : WDCGG
format : std
dir_obs : /home/eso/FLEX_INV/CH4/TEST_INPUT/OBS/GHG/*.cn.*
# rescale : false
rescale : true
na_values : -999999.99
default_unit : ppb
default_duration: 1
dump : True
# Minimum measeurement error
measerr : 5.0
#####################################################################
#####################################################################
......@@ -76,7 +107,7 @@ model :
# - varname_initconc: name of fata variable in the netCDF file
# - optimize_cini: optimize initial concentration or not
# - cini_lat: northern edges of latitude bands for cini optimization
# - cini_err: Initial concentration error (unit same as observations)
# - cini_err: Initial concentration error (unit: same as observations, or fraction???)
obsvect:
plugin:
name: standard
......@@ -99,44 +130,14 @@ obsvect:
file_initconc: ch4_noaa_%Y%m.nc
varname_conc: CH4
optimize_cini: True
# Time resolution for initial mixing ratio scalars (days)
cini_res : 28.
cini_lat: [-30., 0., 30., 90.]
cini_err: 5.
cini_err: .01
#####################################################################
#####################################################################
# Measurements to account for
# Main keys are:
# - infos: infos on tracers to include in the simulation
# - fic_monitor: standard txt file including all observations
measurements :
plugin:
name: standard
version: std
# File where to save data, if does not exist. Reads from there if exists
# file_monit : /home/eso/repos/CIF/flextest/monitor_allobs.nc
# file_monitor : /home/eso/repos/CIF/flextest/monitor_allobs.nc
dump_type : nc
species :
CH4 :
dailyPM :
provider : WDCGG
format : std
dir_obs : /home/eso/FLEX_INV/CH4/TEST_INPUT/OBS/GHG/*.cn.*
# rescale : false
rescale : true
na_values : -999999.99
default_unit : ppb
default_duration: 1
dump : True
# Minimum measeurement error
measerr : 5.0
####################################################################
####################################################################
# spaces to control/observation spaces
# - read_only: do not run the transport model, read output from existing run
......@@ -190,8 +191,9 @@ mode:
name: gausscost
version: flexpart
reload_from_previous: False
maxiter: 20
df1: 1
maxiter: 10
df1: 1.0
# df1: 0.01
nsim: 100
m: 1
save_out_netcdf: True
......@@ -203,6 +205,8 @@ mode:
# Please refer to the documentation to know what to include here
# For the standard FLEXPART, include the following:
# - ficlsm: land-sea mask (must be consistent with LMDZ grid)
# - regions_lsm : if true, use land/sea mask encoded in fileregions (sea: < 0, land: > 0)
# This overrides usage of lsm file, if this is also provided
# - correl: Use correlation or not in B
# - dircorrel: path to pre-computed correlations
# - sigma_land: spatial correlation length for prior errors over land (km)
......@@ -215,7 +219,8 @@ mode:
# if bands, define a list of latitudes as band limits (n+1 for n bands)
# - inc_ocean: for 'regions', optimize ocean boxes (default true)
# - periodflux: period of variation for increments within a month (days)
# - glob_err (optional) = uncertainty on global budget
# - flxerr_ll: lower limit flux error: unit (kg/m2/h)
# - glob_err (optional): uncertainty on global budget. Units (Tg/y).
controlvect:
plugin:
name: standard
......@@ -233,14 +238,10 @@ controlvect:
hresol : regions
inc_ocean : true
fileregions : /home/eso/FLEX_INV/CH4/TEST_OUTPUT/regions_ghg.nc
# regions_lsm : if true, use land/sea mask encoded in fileregions (sea: < 0, land: > 0)
# This overrides usage of lsm file, if this is also provided
regions_lsm : True
err : 0.5
# Lower limit flux error: unit (kg/m2/h)
flxerr_ll: 1.e-8
# Total error inversion domain: unit (Tg/y)
glob_err: 50.
# glob_err: 50.
numscale : 1.E12
xb_scale : 1.
period : 1D
......
from builtins import range
from .init_xb import init_xb
from .background_xb import background_xb
from .init_bprod import init_bprod
from .control2native import control2native
from .native2control import native2control
......@@ -29,6 +30,7 @@ def ini_data(plugin, **kwargs):
"""
# Initializes reference directories if needed
init_dir('{}/controlvect/'.format(plugin.workdir))
......
......@@ -20,7 +20,7 @@ def init_bprod(cntrlv, options={}, **kwargs):
updated control vector:
Todo:
ESO: Include total domain error to sale/load routines
ESO: Include total domain error to save/load routines
"""
......@@ -101,7 +101,6 @@ def init_bprod(cntrlv, options={}, **kwargs):
if glob_err < 0:
raise Exception("glob_err must be >0")
# Get area of region boxes
# TODO: could compute elsewhere and store in tracer.domain
area_reg = np.squeeze(map2scale(
......@@ -115,8 +114,8 @@ def init_bprod(cntrlv, options={}, **kwargs):
errscalar = glob_err/toterr
#cntrlv.std = cntrlv.std*errscalar**2
verbose("Total error scaled by "+ str(errscalar))
# TODO: scale cross correlations as well
# probably apply scaling to hcorr?
# TODO: rework this based on most recent flexinvert version
......
......@@ -89,8 +89,6 @@ def init_xb(cntrlv, **kwargs):
# Set a lower limit on the flux error
tracer.flxerr_ll = getattr(tracer, 'flxerr_ll', 1.e-8)
min_err = tracer.flxerr_ll*float(tracer.numscale)/3600.
# std[std < tracer.flxerr_ll*float(tracer.numscale)/3600.] = \
# tracer.flxerr_ll*float(tracer.numscale)/3600.
std = np.maximum(std, min_err)
# Keep the un-aggregated fluxes for later use in obsoper
......@@ -104,7 +102,6 @@ def init_xb(cntrlv, **kwargs):
+ xb_value
cntrlv.flx_bkg = flx_bkg
# Most types are scalar factors
......
......@@ -68,6 +68,10 @@ def sqrtbprod(controlvect, chi, **kwargs):
# Filling corresponding part in the control vector
xout[x_pointer:x_pointer + x_dim] = chi_tmp
# Fill data for background scalars (if present)
if hasattr(controlvect, 'background'):
xout[-controlvect.background.ncini::] = chi[-controlvect.background.ncini::]
return xout * controlvect.std + controlvect.xb
......@@ -132,5 +136,12 @@ def sqrtbprod_ad(controlvect, dx, **kwargs):
# Filling Chi
chiout[chi_pointer:chi_pointer + chi_dim] = xstd
# Fill data for background scalars (if present)
if hasattr(controlvect, 'background'):
chiout[-controlvect.background.ncini::] = dx[-controlvect.background.ncini::] * \
controlvect.background.cini_err
return chiout
......@@ -72,6 +72,7 @@ def congrad(self,
# Lanczos iteration starts here
ingood = 0
iiter = 0
while 1: # Lanczos_loop
# Evaluate the Hessian applied to the latest Lanczos vector
......
......@@ -28,6 +28,7 @@ def execute(self, **kwargs):
# Simulation window
datei = self.datei
datef = self.datef
# Minimizer
minimizer = self.minimizer
......
......@@ -27,5 +27,14 @@ def ini_data(plugin, **kwargs):
# Initializes the directory
path.init_dir('{}/obsoperator'.format(workdir))
# ESO: this is a quick fix to do optimization of the background until a better solution
# is found.
if hasattr(plugin.obsvect, 'background'):
if plugin.obsvect.background.optimize_cini:
if not plugin.obsvect.background.updated_controlvect:
plugin.controlvect.background = plugin.obsvect.background
plugin.controlvect = plugin.controlvect.background_xb(plugin.controlvect)
return plugin
......@@ -90,7 +90,9 @@ def obsoper(self, inputs, mode,
os.path.join(model.run_dir_nest, obsvect.datastore.head(1)['station'][0].upper(),
subdir, 'header_nest'))
# Trajectory life time
trajdays = fp_header_glob.trajdays
# Get the domain definition
species = getattr(controlvect.components, 'fluxes').parameters.attributes[0]
tracer = getattr(getattr(controlvect.components, 'fluxes').parameters, species)
......@@ -102,6 +104,11 @@ def obsoper(self, inputs, mode,
npvar = tracer.ndates*nbox
ndvar = nbox
nvar = npvar
if hasattr(obsvect, 'background'):
if obsvect.background.optimize_cini:
nvar += obsvect.background.ntcini * obsvect.background.ncini
grad_o = np.zeros(nvar)
ix1 = model.domain.ix1
......@@ -110,12 +117,11 @@ def obsoper(self, inputs, mode,
iy2 = model.domain.iy2
else:
raise Exception("For FLEXPART, only hresol:regions are implemented in controlvect")
# Loop through model periods and read model output
self.missingperiod = False
# import pdb; pdb.set_trace()
for di, df in zip(subsimu_dates[:-1], subsimu_dates[1:]):
# Save to datastore for debugging purposes
......@@ -127,6 +133,9 @@ def obsoper(self, inputs, mode,
obs_bkgerr = np.empty(len(obsvect.datastore)); obs_bkgerr[:]=np.nan
obs_err = np.empty(len(obsvect.datastore)); obs_err[:]=np.nan
obs_cinipos = np.empty(len(obsvect.datastore)); obs_cinipos[:]=np.nan
# Loop over observation dates
print("di, df", di, df)
......@@ -199,7 +208,7 @@ def obsoper(self, inputs, mode,
istate = np.zeros(ngrid, dtype=int) - 1
# Calculate indices to state vector
for i,j in enumerate(gtime):
for i, j in enumerate(gtime):
#if j > df:
# Avoid including the midnight files at period end
if j >= df:
......@@ -238,24 +247,32 @@ def obsoper(self, inputs, mode,
obs_ghg[obs_i] += np.sum(hnest.T[i,:,:] * controlvect.flxall[ind,:,:])
# Get background contribution
if hasattr(obsvect, 'background'):
cini = np.zeros(obsvect.background.ncini)
for i, name in enumerate(obsvect.background.cini_names):
cini[i] = getattr(row, name)
# TODO: do the optimization
if obsvect.background.optimize_cini is False:
cini = 0.
for name in obsvect.cini_names:
cini += getattr(row, name)
else:
raise Exception(
"optimize_cini=True not yet implemented")
if obsvect.background.optimize_cini is False:
obs_cinipos[obs_i] = np.sum(cini[:])
else:
# number of initial mixing ratio variables to optimize
ncini = obsvect.background.ncini
# Time step of initial concentration optimization; ni
mask = row.Index - datetime.timedelta(days=trajdays) - controlvect.background.cinitime[:] < datetime.timedelta(0)
ni = int(np.argmax(mask))
obs_cinipos[obs_i] = np.dot(cini[:],
controlvect.x[npvar+ni*ncini:npvar+(ni+1)*ncini])
if getattr(tracer, 'offsets', False):
# Optimize offsets
obs_sim[obs_i] = obs_model[obs_i] + obs_ghg[obs_i] + obs_bkg[obs_i] + cini
obs_sim[obs_i] = obs_model[obs_i] + obs_ghg[obs_i] + obs_bkg[obs_i] + obs_cinipos[obs_i]
else:
# Optimize fluxes
obs_sim[obs_i] = obs_model[obs_i] + obs_bkg[obs_i] + cini
obs_sim[obs_i] = obs_model[obs_i] + obs_bkg[obs_i] + obs_cinipos[obs_i]
# calculate gradient in observation space
# Jo'(p) = H^TR^-1(H(p) - y)
......@@ -265,8 +282,10 @@ def obsoper(self, inputs, mode,
departure = obs_sim[obs_i] - row.obs
# if background missing then observation should have no influence
if cini == 0.:
departure = 0.
if hasattr(obsvect, 'background'):
if np.sum(cini[:]) == 0.:
departure = 0.
obs_cinipos[obs_i] = 0.
# Add background contribution to observation error
obs_err[obs_i] = np.sqrt(obs_bkgerr[obs_i]**2 + row.obserror**2)
......@@ -278,8 +297,12 @@ def obsoper(self, inputs, mode,
grad_o[istate_uni[n]*ndvar:(istate_uni[n]+1)*ndvar] += \
hx[n*ndvar:(n+1)*ndvar] * departure/obs_err[obs_i]**2
obsvect.dx = grad_o
# calculate contribution to gradient from initial mixing ratios
if hasattr(obsvect, 'background'):
if obsvect.background.optimize_cini:
grad_o[npvar+ni*ncini:npvar+(ni+1)*ncini] += cini[:] * departure/obs_err[obs_i]**2
obsvect.dx = grad_o
# Add the different components to datastore
obsvect.datastore['sim'] = obs_sim
......@@ -293,12 +316,9 @@ def obsoper(self, inputs, mode,
obsvect.datastore['obs_model'] = obs_model
obsvect.datastore['obs_sim'] = obs_sim
obsvect.datastore['obs_check'] = obs_check
if obsvect.background.optimize_cini is False:
obsvect.datastore['obs_cinipos'] = obsvect.datastore[obsvect.cini_names].sum(axis=1)
else:
raise Exception(
"optimize_cini=True not yet implemented")
if hasattr(obsvect, 'background'):
obsvect.datastore['obs_cinipos'] = obs_cinipos
# If any FLEXPART files were missing for an observation, remove corresponding row
obsvect.datastore.dropna(axis=0, how='any', subset=['obs_model'], inplace=True)
......@@ -362,13 +382,18 @@ def obsoper(self, inputs, mode,
path.init_dir(rundir)
dump_type = obsvect.dump_type
col2dump = ['obs_ghg', 'obs_bkg', 'obs_model', 'obs_sim', 'obs_check',
'obs_bkgerr', 'obs_err', 'obs_hx']
if hasattr(obsvect, 'background'):
col2dump.append('obs_cinipos')
if dump_debug:
sort_order = getattr(obsvect, 'sort_order', ['index', 'station'])
dump_datastore(obsvect.datastore,
file_monit='{}/monitor_{}_.{}'.format(rundir, run_id, dump_type),
mode='w', dump_type=dump_type,
col2dump=['obs_ghg', 'obs_bkg', 'obs_model', 'obs_sim', 'obs_check',
'obs_bkgerr', 'obs_err', 'obs_hx', 'obs_cinipos'],
col2dump=col2dump,
sort_order=sort_order)
# Returning obsvect to the simulator
......
......@@ -16,7 +16,6 @@ import numpy as np
# to initialize the observation vector
requirements = {'measurements': {'any': True, 'empty': True},
'model': {'any': True, 'empty': False}}
# 'background': {'any': True, 'empty': False}}
def ini_data(plugin, **kwargs):
......@@ -41,7 +40,9 @@ def ini_data(plugin, **kwargs):
# Initializing y0
measurements = plugin.measurements
plugin.dump_type = getattr(plugin, 'dump_type', 'nc')
init_y0(plugin, measurements, **kwargs)
sort_order = getattr(plugin, 'sort_order', ['index', 'station'])
init_y0(plugin, measurements, sort_order=sort_order, **kwargs)
# Initialize R if any observation
if plugin.datastore.size > 0:
......
......@@ -4,7 +4,7 @@ import sys
import os
import shutil
import numpy as np
from datetime import timedelta
from datetime import datetime, timedelta
from pycif.utils.datastores.dump import dump_datastore, read_datastore
from pycif.plugins.obsvects.standard.utils.tstep import tstep
from pycif.plugins.obsvects.standard.utils.crop_monitor import crop_monitor
......@@ -13,8 +13,6 @@ from pycif.utils.check import verbose
from pycif.utils import path
from pycif.utils.check.errclass import PluginError
import datetime as dt
def init_background(obsvect, **kwargs):
"""Initializes the background
......@@ -33,7 +31,9 @@ def init_background(obsvect, **kwargs):
background = obsvect.background
model = obsvect.model
datef = obsvect.datef
datei = obsvect.datei
# Read global concentration fields. Regrid to model grid and
# filter by start and end date of simulation
conc_ini = background.read_conc(model, background.dir_initconc,
......@@ -44,14 +44,29 @@ def init_background(obsvect, **kwargs):
n_y = model.domain.nlat_glob
n_z = len(model.domain.heights)
# Set flag to indicate that the dimension of the control vector needs to be
# updated for optimizing the background
background.updated_controlvect = False
# ncini: number of initial mixing ratio variables to optimize
background.ncini = len(background.cini_lat)
background.cini_names = ["obs_cini_{}".format(i) for i in range(background.ncini)]
# 'cini' is dimension number of observations x zones to optimize
# Stored in monitor as obs_cini_1, obs_cini_2 etc.
# Currently we are strictly using longitudinal bands, that are sorted
# South to North inthe cini array
cini = np.zeros((len(obsvect.datastore), background.ncini), order='C')
# Set dimension for optimizing initial mixing ratio scalars
background.ntcini = max(1, int((datef - datei).days + 1) //
int(background.cini_res))
obsvect.n_cini = len(background.cini_lat)
obsvect.cini_names = ["obs_cini_{}".format(i) for i in range(obsvect.n_cini)]
cini = np.zeros((len(obsvect.datastore), obsvect.n_cini), order='C')
# Read FLEXPART header to access trajdays
# Time stamp for initial mixing ratio scalars
background.cinitime = np.array([datei + timedelta(days=x) for x in range(background.ntcini)])
# Read FLEXPART header (to access trajdays)
fp_header = None
# Loop over all observations and calculate background contribution
......@@ -90,15 +105,17 @@ def init_background(obsvect, **kwargs):
cini[obs_i, 0] = np.dot(gridini.flatten(), conc.flatten())
else:
# TODO: too slow, rewrite
# Check if reordering z,y is ok
for iz in range(len(model.domain.heights)):
for iy in range(model.domain.nlat_glob):
for iy in range(model.domain.nlat_glob):
n = np.min(np.where(model.domain.lat_glob[iy] < background.cini_lat))
for ix in range(model.domain.nlon_glob):
cini[obs_i, n] += gridini[ix, iy, iz] * conc[ix, iy, iz]
cini[obs_i, n] += np.dot(gridini[:, iy, iz], conc[:, iy, iz])
# Add initial concentrations to datastore
for i, name in enumerate(obsvect.cini_names):
for i, name in enumerate(background.cini_names):
obsvect.datastore[name] = cini[:, i]
obsvect.background = background
return obsvect
......@@ -29,6 +29,10 @@ def init_y0(obsvect, measurements, **kwargs):
as well as model time steps
"""
# Set datastore sort order
sort_order = kwargs.get('sort_order', ['index', 'station'])
# Saves initialized observation vector to workdir/obsvect/monitor
# Try linking file_obsvect to workdir/obsvect if existing
......@@ -55,6 +59,10 @@ def init_y0(obsvect, measurements, **kwargs):
obsvect.datastore = \
read_datastore(file_monitor,
dump_type=obsvect.dump_type)
# Change sorting order if specified in config file
obsvect.datastore.sort_values(sort_order, inplace=True)
# Check that the monitor is consistent with the simulation to be run
# If True, returns directly the observation as is
allcorrect, ok_hcoord, ok_vcoord, do_tstep = obsvect.check_monitor()
......@@ -92,6 +100,9 @@ def init_y0(obsvect, measurements, **kwargs):
obsvect.datastore = crop_monitor(obsvect.datastore,
obsvect.datei,
obsvect.datef, **kwargs)
# Change sorting order if specified in config file
obsvect.datastore.sort_values(sort_order, inplace=True)
# Computes grid cells where observations fall
if not ok_hcoord:
......
......@@ -62,7 +62,8 @@ def simul(self, chi, grad=True, run_id=-1, **kwargs):
# non-diagonal matrices, eventually
departures = obsvect.datastore['sim'] - obsvect.datastore['obs']
departures[obsvect.datastore['obs_cinipos'] == 0.] = 0.
if hasattr(obsvect, 'background'):
departures[obsvect.datastore['obs_cinipos'] == 0.] = 0.
# j_o = 0.5 * (departures ** 2 / (obsvect.datastore['obserror']**2 + obsvect.datastore['obs_bkgerr']** 2) ).sum()
......@@ -83,6 +84,11 @@ def simul(self, chi, grad=True, run_id=-1, **kwargs):
# Saves cost function value
with open('{}/cost.txt'.format(workdir), 'a') as f:
f.write('{},{:.4E},{:.4E},{:.4E}\n'.format(run_id, j_b, j_o, zcost))
# Saves current state
with open('{}/px_{}.txt'.format(workdir, run_id), 'a') as f:
np.savetxt(f, controlvect.x, fmt='%.8e')
# Return only the cost function if grad = False
if not grad:
......
......@@ -59,7 +59,7 @@ def dump_datastore(datastore,
**kwargs (dictionary) : any additional options needed later
"""
# Default columns to dump anyway
if dump_default:
col2dump = ['station', 'network', 'parameter',
......@@ -90,8 +90,9 @@ def dump_datastore(datastore,
# Dumping
check.verbose("Dumping datastore to {}".format(file_monit))
df0 = pd.DataFrame({}, index=datastore.index)
for col in col2dump:
if col in datastore.columns:
df0[col] = datastore[col].values
......@@ -104,6 +105,7 @@ def dump_datastore(datastore,
elif dump_type == 'nc':
datastore.index.names = ['index']
ds = df0.to_xarray()
# Saving attributes to the monitor
......@@ -117,6 +119,7 @@ def dump_datastore(datastore,
for key in nc_attributes:
ds.attrs[key] = nc_attributes[key]
# Saving to NetCDF
ds.to_netcdf(file_monit)
......