Commit 1b300534 authored by Espen Sollum's avatar Espen Sollum
Browse files

Added global error scaling

parent c3745e41
...@@ -86,7 +86,7 @@ def read_conc(self, model, dir_initconc, file_initconc, **kwargs): ...@@ -86,7 +86,7 @@ def read_conc(self, model, dir_initconc, file_initconc, **kwargs):
xmod_ver['height'] = np.array(model.domain.heights) xmod_ver['height'] = np.array(model.domain.heights)
# Use middle of month to represent the time of concentration data TODO: check # Use middle of month to represent the time of concentration data
xmod_ver['time'] = datetime.strptime(file_conc, file_initconc) + timedelta(days=14) xmod_ver['time'] = datetime.strptime(file_conc, file_initconc) + timedelta(days=14)
if conc_all is None: if conc_all is None:
...@@ -94,6 +94,4 @@ def read_conc(self, model, dir_initconc, file_initconc, **kwargs): ...@@ -94,6 +94,4 @@ def read_conc(self, model, dir_initconc, file_initconc, **kwargs):
else: else:
conc_all = xr.concat([conc_all, xmod_ver], 'time') conc_all = xr.concat([conc_all, xmod_ver], 'time')
# import pdb; pdb.set_trace()
return conc_all return conc_all
...@@ -7,6 +7,7 @@ from pycif.utils.geometry import dist_matrix ...@@ -7,6 +7,7 @@ from pycif.utils.geometry import dist_matrix
from pycif.utils.netcdf import readnc from pycif.utils.netcdf import readnc
from .utils.scalemaps import map2scale from .utils.scalemaps import map2scale
import pycif.utils.check as check import pycif.utils.check as check
from pycif.utils.check import verbose
def build_hcorrelations(zlat, zlon, lsm, def build_hcorrelations(zlat, zlon, lsm,
...@@ -14,7 +15,7 @@ def build_hcorrelations(zlat, zlon, lsm, ...@@ -14,7 +15,7 @@ def build_hcorrelations(zlat, zlon, lsm,
evalmin=0.5, regions=False, evalmin=0.5, regions=False,
hresoldim=None, hresoldim=None,
dump=False, dir_dump='', projection='gps', dump=False, dir_dump='', projection='gps',
tracer=None, errscalar=None, tracer=None, cntrlv=None, glob_err=None,
**kwargs): **kwargs):
"""Build horizontal correlation matrix based on distance between grid """Build horizontal correlation matrix based on distance between grid
cells. cells.
...@@ -104,10 +105,24 @@ def build_hcorrelations(zlat, zlon, lsm, ...@@ -104,10 +105,24 @@ def build_hcorrelations(zlat, zlon, lsm,
corr = np.exp(old_div(-dx, sigma)) corr = np.exp(old_div(-dx, sigma))
corr[sigma <= 0] = 0 corr[sigma <= 0] = 0
# If total error specified, scale correlation matrix # Scale covariance in accordance with global total error
if errscalar is not None:
corr *= errscalar**2 # Prior error averaged over time steps
xerr = np.mean(np.reshape(cntrlv.std, (tracer.ndates,
int(len(cntrlv.std)/tracer.ndates))), axis=0)
# cov = np.outer(cntrlv.std, cntrlv.std) * corr
cov = np.outer(xerr, xerr) * corr
covsum = cov*np.outer(cntrlv.area_reg, cntrlv.area_reg)
toterr = np.sqrt(covsum.sum())*3600.*24.*365./1.e9/float(tracer.numscale)
errscalar = glob_err/toterr
verbose("Total error scaled by "+ str(errscalar))
verbose("covsum "+ str(covsum.sum()))
# ESO TODO: try scaling std instead
# corr = corr*errscalar**2
cntrlv.std = cntrlv.std*errscalar**2
# Component analysis # Component analysis
evalues, evectors = np.linalg.eigh(corr) evalues, evectors = np.linalg.eigh(corr)
...@@ -147,7 +162,6 @@ def dump_hcorr(nlon, nlat, sigma_sea, sigma_land, ...@@ -147,7 +162,6 @@ def dump_hcorr(nlon, nlat, sigma_sea, sigma_land,
ncell = evalues.size ncell = evalues.size
# TODO change file name for spatial regions
file_dump = '{}/horcor_{}x{}_cs{}_cl{}.bin'.format( file_dump = '{}/horcor_{}x{}_cs{}_cl{}.bin'.format(
dir_dump, nlon, nlat, sigma_sea, sigma_land) dir_dump, nlon, nlat, sigma_sea, sigma_land)
...@@ -179,7 +193,6 @@ def read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump, hresoldim=None): ...@@ -179,7 +193,6 @@ def read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump, hresoldim=None):
""" """
# TODO file name
file_dump = '{}/horcor_{}x{}_cs{}_cl{}.bin'.format( file_dump = '{}/horcor_{}x{}_cs{}_cl{}.bin'.format(
dir_dump, nlon, nlat, sigma_sea, sigma_land) dir_dump, nlon, nlat, sigma_sea, sigma_land)
......
...@@ -19,8 +19,6 @@ def init_bprod(cntrlv, options={}, **kwargs): ...@@ -19,8 +19,6 @@ def init_bprod(cntrlv, options={}, **kwargs):
Returns: Returns:
updated control vector: updated control vector:
Todo:
ESO: Include total domain error to save/load routines
""" """
...@@ -92,9 +90,6 @@ def init_bprod(cntrlv, options={}, **kwargs): ...@@ -92,9 +90,6 @@ def init_bprod(cntrlv, options={}, **kwargs):
tracer.chi_hresoldim = sqrt_evalues.size tracer.chi_hresoldim = sqrt_evalues.size
elif hasattr(tracer, 'hcorrelations') and tracer.hresol == 'regions': elif hasattr(tracer, 'hcorrelations') and tracer.hresol == 'regions':
# TODO don't need a separate block for regions, instead
# pass tracer.hresol/regions keyword to build_hcorr
# Scale by domain total error (given as Tg/y) # Scale by domain total error (given as Tg/y)
if hasattr(tracer, 'glob_err'): if hasattr(tracer, 'glob_err'):
glob_err = float(tracer.glob_err) glob_err = float(tracer.glob_err)
...@@ -102,23 +97,25 @@ def init_bprod(cntrlv, options={}, **kwargs): ...@@ -102,23 +97,25 @@ def init_bprod(cntrlv, options={}, **kwargs):
raise Exception("glob_err must be >0") raise Exception("glob_err must be >0")
# Get area of region boxes # Get area of region boxes
# TODO: could compute elsewhere and store in tracer.domain
area_reg = np.squeeze(map2scale( area_reg = np.squeeze(map2scale(
tracer.domain.areas[np.newaxis, np.newaxis,:,:], tracer.domain.areas[np.newaxis, np.newaxis,:,:],
tracer, tracer,
tracer.domain, tracer.domain,
region_scale_area=False, region_max_val=False)) region_scale_area=False, region_max_val=False))
errsum = np.dot(cntrlv.std, np.tile(area_reg, tracer.ndates))/float(tracer.ndates) cntrlv.area_reg = area_reg
toterr = errsum*3600.*24.*365./1.e9/float(tracer.numscale)
errscalar = glob_err/toterr # New implementation done in build_hcorr.py
cntrlv.std = cntrlv.std*errscalar**2
verbose("Total error scaled by "+ str(errscalar))
verbose("errsum "+ str(errsum))
# TODO: rework this based on most recent flexinvert version,
# including using xerr (time avg error)
# Old implementation of glob_err, consistent with 2020 version of
# flexinvertplus:
# errsum = np.dot(cntrlv.std, np.tile(area_reg, tracer.ndates))/float(tracer.ndates)
# toterr = errsum*3600.*24.*365./1.e9/float(tracer.numscale)
# errscalar = glob_err/toterr
# cntrlv.std = cntrlv.std*errscalar**2
# verbose("Total error scaled by "+ str(errscalar))
# verbose("errsum "+ str(errsum))
corr = tracer.hcorrelations corr = tracer.hcorrelations
...@@ -147,7 +144,7 @@ def init_bprod(cntrlv, options={}, **kwargs): ...@@ -147,7 +144,7 @@ def init_bprod(cntrlv, options={}, **kwargs):
# or separated land and sea, along a land-sea mask # or separated land and sea, along a land-sea mask
# Default is no separation # Default is no separation
lsm = getattr(corr, 'landsea', False) lsm = getattr(corr, 'landsea', False)
# import pdb; pdb.set_trace()
if lsm: if lsm:
sigma_land = getattr(corr, 'sigma_land', -1) sigma_land = getattr(corr, 'sigma_land', -1)
sigma_sea = getattr(corr, 'sigma_sea', -1) sigma_sea = getattr(corr, 'sigma_sea', -1)
...@@ -178,7 +175,7 @@ def init_bprod(cntrlv, options={}, **kwargs): ...@@ -178,7 +175,7 @@ def init_bprod(cntrlv, options={}, **kwargs):
dump=dump_hcorr, dir_dump=dircorrel, dump=dump_hcorr, dir_dump=dircorrel,
projection=projection, regions=True, projection=projection, regions=True,
hresoldim=tracer.hresoldim, tracer=tracer, hresoldim=tracer.hresoldim, tracer=tracer,
errscalar=errscalar, **kwargs) cntrlv=cntrlv, glob_err=glob_err, **kwargs)
# Storing computed correlations for use by other components # Storing computed correlations for use by other components
cntrlv.hcorrelations[(sigma_land, sigma_sea)] = \ cntrlv.hcorrelations[(sigma_land, sigma_sea)] = \
...@@ -241,7 +238,6 @@ def init_bprod(cntrlv, options={}, **kwargs): ...@@ -241,7 +238,6 @@ def init_bprod(cntrlv, options={}, **kwargs):
tracer.chi_pointer = cntrlv.chi_dim tracer.chi_pointer = cntrlv.chi_dim
tracer.chi_dim = tracer.chi_hresoldim * tracer.chi_tresoldim tracer.chi_dim = tracer.chi_hresoldim * tracer.chi_tresoldim
cntrlv.chi_dim += tracer.chi_dim cntrlv.chi_dim += tracer.chi_dim
#import pdb; pdb.set_trace()
# Defining chi from the total dimension # Defining chi from the total dimension
cntrlv.chi = np.zeros((cntrlv.chi_dim,)) cntrlv.chi = np.zeros((cntrlv.chi_dim,))
......
...@@ -152,6 +152,4 @@ def init_xb(cntrlv, **kwargs): ...@@ -152,6 +152,4 @@ def init_xb(cntrlv, **kwargs):
cntrlv.xb = np.append(cntrlv.xb, xb) cntrlv.xb = np.append(cntrlv.xb, xb)
cntrlv.std = np.append(cntrlv.std, std) cntrlv.std = np.append(cntrlv.std, std)
#import pdb; pdb.set_trace()
return cntrlv return cntrlv
...@@ -7,9 +7,6 @@ def sqrtbprod(controlvect, chi, **kwargs): ...@@ -7,9 +7,6 @@ def sqrtbprod(controlvect, chi, **kwargs):
""" """
# ESO check dim
#import pdb; pdb.set_trace()
# Initializes output vector # Initializes output vector
xout = np.zeros(controlvect.dim) xout = np.zeros(controlvect.dim)
...@@ -73,8 +70,6 @@ def sqrtbprod(controlvect, chi, **kwargs): ...@@ -73,8 +70,6 @@ def sqrtbprod(controlvect, chi, **kwargs):
# Fill data for background scalars (if present) # Fill data for background scalars (if present)
if hasattr(controlvect, 'background'): if hasattr(controlvect, 'background'):
# xout[-controlvect.background.ncini::] = chi[-controlvect.background.ncini::]
# ESO: is this correct for multiple cini periods?
xout[-controlvect.cinidim::] = chi[-controlvect.cinidim::] xout[-controlvect.cinidim::] = chi[-controlvect.cinidim::]
return xout * controlvect.std + controlvect.xb return xout * controlvect.std + controlvect.xb
...@@ -144,9 +139,7 @@ def sqrtbprod_ad(controlvect, dx, **kwargs): ...@@ -144,9 +139,7 @@ def sqrtbprod_ad(controlvect, dx, **kwargs):
# Fill data for background scalars (if present) # Fill data for background scalars (if present)
if hasattr(controlvect, 'background'): if hasattr(controlvect, 'background'):
chiout[-controlvect.background.ncini::] = dx[-controlvect.background.ncini::] * \ chiout[-controlvect.cinidim::] = dx[-controlvect.cinidim::] * \
controlvect.background.cini_err controlvect.background.cini_err
return chiout return chiout
...@@ -83,4 +83,3 @@ def create_domain(domain, ...@@ -83,4 +83,3 @@ def create_domain(domain,
domain.iy1 = iy1 domain.iy1 = iy1
domain.iy2 = iy2 domain.iy2 = iy2
# import pdb; pdb.set_trace()
...@@ -62,7 +62,7 @@ def read(self, name, tracdir, tracfic, dates, ...@@ -62,7 +62,7 @@ def read(self, name, tracdir, tracfic, dates,
flx_reg = data[:, iy0:iy0+self.domain.nlat, ix0:ix0+self.domain.nlon] flx_reg = data[:, iy0:iy0+self.domain.nlat, ix0:ix0+self.domain.nlon]
# converting to regular np array and appending # Converting to regular np array and appending
np_flx = np.array(flx_reg) np_flx = np.array(flx_reg)
if first: if first:
...@@ -71,53 +71,12 @@ def read(self, name, tracdir, tracfic, dates, ...@@ -71,53 +71,12 @@ def read(self, name, tracdir, tracfic, dates,
else: else:
trcr_flx = np.append(trcr_flx, np_flx, axis=0) trcr_flx = np.append(trcr_flx, np_flx, axis=0)
# trcr_flx.append(flx_reg)
# nc = xr.open_dataset(
# '{}/{}'.format(tracdir, fic_flx),
# decode_times=False)
# nlon = self.domain.nlon
# nlat = self.domain.nlat
# Vector to map
# Deals with polar boxes by sub-dividing them zonally
# Also loops zonally for consistency with other call to gridded values
# flx = nc['flx_{}'.format(name.lower())].values
# flx0 = flx[:, 0]
# flx1 = flx[:, -1]
# flx = flx[:, 1:-1].reshape((-1, nlat - 2, nlon - 1))
# flx = np.append(flx, flx1[:, np.newaxis, np.newaxis]
# * np.ones((1, 1, nlon - 1)), axis=1)
# flx = np.append(flx0[:, np.newaxis, np.newaxis]
# * np.ones((1, 1, nlon - 1)), flx, axis=1)
# flx = np.append(flx, flx[:, :, np.newaxis, 0], axis=2)
# Keeps only values for the corresponding month
# Assumes monthly resolution
# if nc.dims['time'] == 12:
# month = dd.month
# flx = flx[month - 1]
# else:
# flx = flx[0]
# xmod = xr.DataArray(trcr_flx[0],
xmod = xr.DataArray(trcr_flx, xmod = xr.DataArray(trcr_flx,
coords={'time': times}, coords={'time': times},
dims=('time', 'lat', 'lon')) dims=('time', 'lat', 'lon'))
# TODO: take care if several files are read
# TODO: scale flux contribution by area weight for boxes
# TODO: consider storing fluxes at original time resolution and
# interpolate as needed
flx = np.ndarray((self.ndates, self.domain.nlat, self.domain.nlon)) flx = np.ndarray((self.ndates, self.domain.nlat, self.domain.nlon))
# Interpolate fluxes to start time of control period # Interpolate fluxes to start time of control period
......
...@@ -70,19 +70,12 @@ def read_glob(self, name, tracdir, tracfic, dates, ...@@ -70,19 +70,12 @@ def read_glob(self, name, tracdir, tracfic, dates,
else: else:
trcr_flx = np.append(trcr_flx, np_flx, axis=0) trcr_flx = np.append(trcr_flx, np_flx, axis=0)
# trcr_flx.append(data[:, :, :])
xmod = xr.DataArray(trcr_flx, xmod = xr.DataArray(trcr_flx,
coords={'time': times}, coords={'time': times},
dims=('time', 'lat', 'lon')) dims=('time', 'lat', 'lon'))
# TODO: take care if several files are read
# TODO: scale flux contribution by area weight for boxes
# TODO: consider storing fluxes at original time resolution and
# interpolate as needed
flx = np.ndarray((self.ndates, self.domain.nlat_glob, self.domain.nlon_glob)) flx = np.ndarray((self.ndates, self.domain.nlat_glob, self.domain.nlon_glob))
# Interpolate fluxes to start time of control period # Interpolate fluxes to start time of control period
......
...@@ -67,9 +67,6 @@ def check_options(self, chi, finit, ...@@ -67,9 +67,6 @@ def check_options(self, chi, finit,
check.verbose(towrite) check.verbose(towrite)
# ESO check dimensions
# import pdb; pdb.set_trace()
# Checking for inconsistent definition of parameters # Checking for inconsistent definition of parameters
if dim <= 0. or niter <= 0. or nsim <= 0. \ if dim <= 0. or niter <= 0. or nsim <= 0. \
or dxmin <= 0. or epsg <= 0. or epsg > 1. \ or dxmin <= 0. or epsg <= 0. or epsg > 1. \
......
...@@ -12,7 +12,6 @@ def ini_periods(self, **kwargs): ...@@ -12,7 +12,6 @@ def ini_periods(self, **kwargs):
#self.subsimu_dates = \ #self.subsimu_dates = \
# date_range(datei, datef, period=self.periods) # date_range(datei, datef, period=self.periods)
# TODO: implement sub-periods? For now just keep the full period
self.subsimu_dates = \ self.subsimu_dates = \
date_range(datei, datef, period='') date_range(datei, datef, period='')
......
...@@ -21,9 +21,6 @@ def execute(self, **kwargs): ...@@ -21,9 +21,6 @@ def execute(self, **kwargs):
# Control vector # Control vector
controlvect = self.controlvect controlvect = self.controlvect
# ESO check dim
#import pdb; pdb.set_trace()
# Observation operator # Observation operator
obsoper = self.obsoperator obsoper = self.obsoperator
......
...@@ -110,7 +110,6 @@ def obsoper(self, inputs, mode, ...@@ -110,7 +110,6 @@ def obsoper(self, inputs, mode,
if tracer.hresol == 'regions': if tracer.hresol == 'regions':
nbox = tracer.nregions nbox = tracer.nregions
# TODO: this will change once initial conditions are added (ciniopt)
npvar = tracer.ndates*nbox npvar = tracer.ndates*nbox
ndvar = nbox ndvar = nbox
nvar = npvar nvar = npvar
...@@ -177,7 +176,7 @@ def obsoper(self, inputs, mode, ...@@ -177,7 +176,7 @@ def obsoper(self, inputs, mode,
# If file is not found, continue # If file is not found, continue
if not os.path.isfile(os.path.join(runsubdir_nest, file_name)): if not os.path.isfile(os.path.join(runsubdir_nest, file_name)):
verbose("Warning: file not found, ", os.path.join(runsubdir_nest, file_name)) verbose("WARNING: file not found: " + os.path.join(runsubdir_nest, file_name))
continue continue
grid_nest, gtime, ngrid = model.utils.read.read_flexpart_grid( grid_nest, gtime, ngrid = model.utils.read.read_flexpart_grid(
...@@ -279,12 +278,16 @@ def obsoper(self, inputs, mode, ...@@ -279,12 +278,16 @@ def obsoper(self, inputs, mode,
ncini = obsvect.background.ncini ncini = obsvect.background.ncini
# Time step of initial concentration optimization; ni # Time step of initial concentration optimization; ni
mask = row.Index - datetime.timedelta(days=trajdays) - controlvect.background.cinitime[:] < datetime.timedelta(0) mask = row.Index - datetime.timedelta(days=trajdays) - controlvect.background.cinitime[:] - datetime.timedelta(days=controlvect.background.cini_res) < datetime.timedelta(0)
ni = int(np.argmax(mask)) ni = int(np.argmax(mask))
# Force last time step if last cini period (mask is all False)
if all(~mask):
ni=controlvect.background.ntcini - 1
obs_cinipos[obs_i] = np.dot(cini[:], obs_cinipos[obs_i] = np.dot(cini[:],
controlvect.x[npvar+ni*ncini:npvar+(ni+1)*ncini]) controlvect.x[npvar+ni*ncini:npvar+(ni+1)*ncini])
if getattr(tracer, 'offsets', False): if getattr(tracer, 'offsets', False):
# Optimize offsets # Optimize offsets
obs_sim[obs_i] = obs_model[obs_i] + obs_ghg[obs_i] + obs_bkg[obs_i] + obs_cinipos[obs_i] obs_sim[obs_i] = obs_model[obs_i] + obs_ghg[obs_i] + obs_bkg[obs_i] + obs_cinipos[obs_i]
...@@ -400,11 +403,17 @@ def obsoper(self, inputs, mode, ...@@ -400,11 +403,17 @@ def obsoper(self, inputs, mode,
path.init_dir(rundir) path.init_dir(rundir)
dump_type = obsvect.dump_type dump_type = obsvect.dump_type
# ESO TODO: add the cini_{} columns for debugging
col2dump = ['obs_ghg', 'obs_bkg', 'obs_model', 'obs_sim', 'obs_check', col2dump = ['obs_ghg', 'obs_bkg', 'obs_model', 'obs_sim', 'obs_check',
'obs_bkgerr', 'obs_err', 'obs_hx'] 'obs_bkgerr', 'obs_err', 'obs_hx']
if hasattr(obsvect, 'background'): if hasattr(obsvect, 'background'):
col2dump.append('obs_cinipos') col2dump.append('obs_cinipos')
col2dump += ["obs_cini_{}".format(i) for i in range(obsvect.background.ncini)]
if dump_debug: if dump_debug:
sort_order = getattr(obsvect, 'sort_order', ['index', 'station']) sort_order = getattr(obsvect, 'sort_order', ['index', 'station'])
......
...@@ -6,6 +6,7 @@ import pandas as pd ...@@ -6,6 +6,7 @@ import pandas as pd
import os import os
import numpy as np import numpy as np
import pycif.utils.check as check import pycif.utils.check as check
import sys
from .headers import get_header, parse_header from .headers import get_header, parse_header
from . import utils from . import utils
...@@ -13,33 +14,28 @@ from . import utils ...@@ -13,33 +14,28 @@ from . import utils
def do_parse(self, def do_parse(self,
obs_file, obs_file,
maxlen=2, maxlen=1,
default_unit='ppm', default_unit='ppm',
default_tz='utc', default_tz='utc',
default_duration=1, default_duration=1,
na_values=-999.999, na_values=-999.999,
default_provider='flexinvert', default_provider='flexinvert',
# err_na_values=0.0000,
spec=None, spec=None,
# extract=['obserror', 'unit',
# 'lon', 'lat', 'alt', 'tz'],
# extract=['rec', 'yyyymmdd',
# 'hhmmss', 'conc', 'err'],
extract=['rec', 'conc', 'err'], extract=['rec', 'conc', 'err'],
**kwargs): **kwargs):
"""Parse function for a file from WDCGG """Parse function for a file from flexinvertplus
Args: Args:
obs_file (str) : obs_file (str) :
Path to input file Path to input file
maxlen (int): maxlen (int):
Maximum possible length for a WDCGG header. Default is `300` Maximum possible length for header, default is `1`
default_unit (str): default_unit (str):
Default unit to use for converting the processed species to ppm if Default unit to use for converting the processed species to ppm if
not explicitly specified in the WDCGG file. Default is ppm not explicitly specified in the file. Default is ppm
default_tz (str): default_tz (str):
Default time zone to shift observations to UTC time if not Default time zone to shift observations to UTC time if not
explicitly specified in the WDCGG file. Default is utc explicitly specified in the file. Default is utc
default_duration (str): default_duration (str):
Default duration of measurements in hours. Default is 1. Default duration of measurements in hours. Default is 1.
na_values (str): na_values (str):
...@@ -76,6 +72,13 @@ def do_parse(self, ...@@ -76,6 +72,13 @@ def do_parse(self,
# Get default na_values from species description if available # Get default na_values from species description if available
if hasattr(self, 'err_na_values'): if hasattr(self, 'err_na_values'):
err_na_values = self.err_na_values err_na_values = self.err_na_values
# Get file containing location for all stations
if hasattr(self, 'station_detail'):