Commit e545544d authored by Espen Sollum's avatar Espen Sollum
Browse files

Added branch for flexpart debuging

parent ba66ebb2
......@@ -15,7 +15,6 @@
# y|Y|yes|Yes|YES|n|N|no|No|NO|true|True|TRUE|false|False|FALSE|on|On|ON|off|Off|OFF
# This offers flexibility, but one should stick to a style for consistency
#####################################################################
#####################################################################
# pyCIF parameters
......@@ -33,10 +32,10 @@ workdir : /home/eso/repos/CIF/flextest/
# Initial date
# Use the following compatible format for the date:
# YYYY-mm-dd or YYYY-mm-dd HH:mm
datei : 2012-01-01
datei : 2012-02-01
# End date (same as for the initial date)
datef : 2012-01-31
datef : 2012-02-11 00:00
#####################################################################
#####################################################################
# Transport model
......@@ -54,9 +53,10 @@ model :
name : FLEXPART
version : std
periods : 1MS
# periods : 3MS
nested : yes
run_dir_nest : /home/eso/FLEX_INV/TEST_OUTPUT/FLEXOUT/GHG/NEST/
run_dir_glob : /home/eso/FLEX_INV/TEST_OUTPUT/FLEXOUT/GHG/NEST/
run_dir_nest : /home/eso/FLEX_INV/CH4/TEST_OUTPUT/FLEXOUT/GHG/NEST/
run_dir_glob : /home/eso/FLEX_INV/CH4/TEST_OUTPUT/FLEXOUT/GHG/NEST/
coeff : 1.e-3
mmair : 28.97
molarmass : 16.
......@@ -68,15 +68,40 @@ model :
# How to build your observation vector and observation uncertainties if needed
# Also projects information from the observation to the model space
# - fic_obsvect: observation vector from previous simulations
# - dump_debug: write out extra information (for debugging)
# For FLEXPART the background plugin takes the following parameters
# - dir_initconc: directory containing concentration files
# - file_initconc: name of concentration files
# - varname_initconc: name of fata variable in the netCDF file
# - optimize_cini: optimize initial concentration or not
# - cini_lat: northern edges of latitude bands for cini optimization
# - cini_err: Initial concentration error (unit same as observations)
obsvect:
plugin:
name: standard
version: std
# file_obsvect : /home/eso/repos/CIF/flextest/monitor_obsvect.nc
dump: True
dump_debug: True
dump_type: nc
#####################################################################
#####################################################################
background:
plugin:
name: FLEXPART
version: nc
# parameters:
# CH4 :
dir_netcdf : /home/eso/repos/CIF/flextest/
# Initial concentrations
dir_initconc : /home/eso/FLEX_INV/CH4/TEST_INPUT/INIT_CONC/GHG/
file_initconc: ch4_noaa_%Y%m.nc
varname_conc: CH4
optimize_cini: False
cini_lat: [-30., 0., 30., 90.]
cini_err: 5.
#####################################################################
#####################################################################
......@@ -98,7 +123,7 @@ measurements :
dailyPM :
provider : WDCGG
format : std
dir_obs : /home/eso/FLEX_INV/TEST_INPUT/OBS/GHG/*.cn.*
dir_obs : /home/eso/FLEX_INV/CH4/TEST_INPUT/OBS/GHG/*.cn.*
# rescale : false
rescale : true
na_values : -999999.99
......@@ -119,24 +144,9 @@ obsoperator:
version: std
autorestart: false
read_only: true
#####################################################################
#####################################################################
####################################################################
#####################################################################
# Domain definition
# domain :
# plugin :
# name : FLEXPART
# version : std
# xmin : -180
# xmax : 180
# ymin : -90
# ymax : 90
# nlon : 360
# nlat : 180
# nlev : 1
# type : deg
# Domain definition
# coordinates refers to edge of inversion domain
......@@ -159,6 +169,8 @@ domain :
nlat_glob : 180
dx_glob : 1.
dy_glob : 1.
heights : [100., 500., 1000., 2000., 3000., 5000., 7000., 9000.,
12000., 15000., 20000., 50000.]
......@@ -202,7 +214,6 @@ mode:
# - inc_ocean: for 'regions', optimize ocean boxes (default true)
# - periodflux: period of variation for increments within a month (days)
# - glob_err (optional) = uncertainty on global budget
# - glob_err (optional) = uncertainty on global budget
controlvect:
plugin:
name: standard
......@@ -214,25 +225,24 @@ controlvect:
plugin:
name: 'FLEXPART'
version: 'nc'
# resol : hpixels
type : flexpart
to_netcdf : True
dir_netcdf : /home/eso/repos/CIF/flextest/
hresol : regions
inc_ocean : true
fileregions : /home/eso/FLEX_INV/TEST_OUTPUT/regions_ghg.nc
# errtype : max
errtype : avg
fileregions : /home/eso/FLEX_INV/CH4/TEST_OUTPUT/regions_ghg.nc
errtype : max
# errtype : avg
err : 0.5
# Lower limit flux error: unit (kg/m2/h)
flxerr_ll: 1.e-8
# Total error inversion domain: unit (Tg/y) -- todo: not implemented
glob_err: 10.
# Total error inversion domain: unit (Tg/y)
# glob_err: 10.
numscale : 1.E12
xb_scale : 1.
# periodflux : 5D
period : 10D
dir : /home/eso/FLEX_INV/TEST_OUTPUT/FLUXES/GHG/
period : 1D
dir : /home/eso/FLEX_INV/CH4/TEST_OUTPUT/FLUXES/GHG/
file : CH4_TOTAL_%Y_05x05.nc
file_glob : CH4_TOTAL_%Y_10x10.nc
......@@ -243,24 +253,25 @@ controlvect:
offsets : True
# offsets : False
# Initial concentrations
dir_initconc : /home/eso/FLEX_INV/TEST_INPUT/INIT_CONC/GHG/
file_initconc: ch4_noaa_%Y%M.nc
varname_init: CH4
hcorrelations :
# landsea: True
landsea: False
filelsm : /home/eso/FLEX_INV/TEST_INPUT/lsm_0.5x0.5_VERIFY.nc
landsea: True
# landsea: False
filelsm : /home/eso/FLEX_INV/CH4/TEST_INPUT/lsm_0.5x0.5_VERIFY.nc
dircorrel : /home/eso/repos/CIF/flextest/
dump_hcorr : True
sigma_land: 250
sigma_sea: 1000
# sigma_land: 250.
# sigma_sea: 1000.
sigma_land: 1.
sigma_sea: 1.
evalmin : 1.e-6
# evalmin : 1.e-15
tcorrelations :
sigma_t : 30
# sigma_t : 30
sigma_t : 1
dump_tcorr : True
dircorrel : /home/eso/repos/CIF/flextest/
evalmin : 1.e-5
# evalmin : 1.e-6
evalmin : 1.e-99
......@@ -25,7 +25,7 @@ def read_conc(self, model, dir_initconc, file_initconc, **kwargs):
TS = 288.15 # surface temperature (K)
GC = 9.81 # gravitational acceleration (m/s2)
MMOL = 0.02894 # (kg/mol)
PSURF = 101325 # surface pressure (Pa)
PSURF = 101325. # surface pressure (Pa)
# Available files in the directory
list_files = os.listdir(dir_initconc)
......@@ -86,7 +86,6 @@ def read_conc(self, model, dir_initconc, file_initconc, **kwargs):
xmod_ver['height'] = np.array(model.domain.heights)
# Use middle of month to represent the time of concentration data TODO: check
xmod_ver['time'] = datetime.strptime(file_conc, file_initconc) + timedelta(days=14)
if conc_all is None:
......
......@@ -5,6 +5,7 @@ import os
from itertools import islice
from pycif.utils.geometry import dist_matrix
from pycif.utils.netcdf import readnc
from .utils.scalemaps import map2scale
import pycif.utils.check as check
......@@ -13,6 +14,7 @@ def build_hcorrelations(zlat, zlon, lsm,
evalmin=0.5, regions=False,
hresoldim=None,
dump=False, dir_dump='', projection='gps',
tracer=None,
**kwargs):
"""Build horizontal correlation matrix based on distance between grid
cells.
......@@ -22,8 +24,8 @@ def build_hcorrelations(zlat, zlon, lsm,
un-correlated
Args:
zlat (np.array): 2D array of latitudes
zlon (np.array): 2D array of longitudes
zlat (np.array): 2D array of latitudes or 1D array (hresoldim)
zlon (np.array): 2D array of longitudes or 1D array (hresoldim)
file_lsm (str): path to NetCDF file with land-sea mask (grid must be
consistent with LMDZ grid); the land-sea mask is assumed to be stored
in the varible 'lsm'
......@@ -36,6 +38,7 @@ def build_hcorrelations(zlat, zlon, lsm,
dump (bool): dumps computed correlations if True
dir_dump (str): directory where correlation matrices are stored
projection (str): the projection used for the longitudes and latitudes
tracer: tracer class
Return:
tuple with:
......@@ -49,6 +52,9 @@ def build_hcorrelations(zlat, zlon, lsm,
"""
# Define domain dimensions
if regions:
nlon, nlat = len(zlon), len(zlat)
else:
nlon, nlat = zlat.shape
# Try reading existing file
......@@ -62,20 +68,34 @@ def build_hcorrelations(zlat, zlon, lsm,
check.verbose("Computing hcorr")
# TODO 'regions'
if regions:
if hresoldim is None:
raise Exception("hresoldim missing!")
# if regions:
# if hresoldim is None:
# raise Exception("hresoldim missing!")
corr = np.identity(hresoldim)
# corr = np.identity(hresoldim)
else:
# else:
# No correlation between land and sea if lsm = True
if lsm:
if regions:
landseamask2d = readnc(file_lsm, ['lsm'])
landseamask = map2scale(landseamask2d[np.newaxis, np.newaxis, :, :], tracer, tracer.domain, region_scale_area=False, region_max_val=True).flatten()
import pdb; pdb.set_trace()
else:
landseamask = readnc(file_lsm, ['lsm']).flatten()
sigma = sigma_land * (landseamask[:, np.newaxis] >= 0.5) \
* (landseamask[np.newaxis, :] >= 0.5) \
+ sigma_sea * (landseamask[:, np.newaxis] < 0.5) \
* (landseamask[np.newaxis, :] < 0.5)
# ESO: testing different threshold (0.5) for land value
# sigma = sigma_land * (landseamask[:, np.newaxis] >= 0.5) \
# * (landseamask[np.newaxis, :] >= 0.5) \
# + sigma_sea * (landseamask[:, np.newaxis] < 0.5) \
# * (landseamask[np.newaxis, :] < 0.5)
sigma = sigma_land * (landseamask[:, np.newaxis] >= 0.01) \
* (landseamask[np.newaxis, :] >= 0.01) \
+ sigma_sea * (landseamask[:, np.newaxis] < 0.01) \
* (landseamask[np.newaxis, :] < 0.01)
# Otherwise, isotropic correlation
# Takes sigma_land
......@@ -85,6 +105,8 @@ def build_hcorrelations(zlat, zlon, lsm,
# Compute matrix of distance
dx = dist_matrix(zlat, zlon, projection)
import pdb; pdb.set_trace()
# Compute the correlation matrix itself
corr = np.exp(old_div(-dx, sigma))
corr[sigma <= 0] = 0
......@@ -108,7 +130,12 @@ def build_hcorrelations(zlat, zlon, lsm,
raise e
# Truncating values < evalmin
mask = evalues >= evalmin
# mask = evalues >= evalmin
# ESO: This is how it is done in flexinvert
import pdb; pdb.set_trace()
mask = evalues >= evalmin*evalues.max()
check.verbose("Truncating eigenvalues at " + str(evalmin*evalues.max()))
return evalues[mask] ** 0.5, evectors[:, mask]
......@@ -133,12 +160,18 @@ def dump_hcorr(nlon, nlat, sigma_sea, sigma_land,
"I don't want to overwrite it".format(file_dump))
datasave = np.concatenate((evalues[np.newaxis, :], evectors), axis=0)
# ESO: Writing to text file (for testing)
# if isinstance(datasave, np.ma.MaskedArray):
# datasave.filled().tofile(file_dump, sep="\n", format="%11.3E")
# else:
# datasave.tofile(file_dump, sep="\n", format="%11.3E")
if isinstance(datasave, np.ma.MaskedArray):
datasave.filled().tofile(file_dump)
else:
datasave.tofile(file_dump)
def read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump, hresoldim=None):
"""Reads horizontal correlations from existing text file
......
......@@ -50,7 +50,9 @@ def build_tcorrelations(period, dates, sigma_t,
- pd.DatetimeIndex(dates)[np.newaxis, :]), np.timedelta64(sigma_t, 'D'))
# Compute the correlation matrix itself
corr = np.exp(-dt ** 2)
# corr = np.exp(-dt ** 2)
# ESO:
corr = np.exp(-np.abs(dt))
# Component analysis
evalues, evectors = np.linalg.eigh(corr)
......@@ -73,6 +75,8 @@ def build_tcorrelations(period, dates, sigma_t,
# Truncating values < evalmin
mask = evalues >= evalmin
import pdb; pdb.set_trace()
return evalues[mask] ** 0.5, evectors[:, mask]
......
......@@ -61,41 +61,74 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, run_id=None, **kwar
(tracer.ndates, -1))
x = scale2map(x, tracer, tracer.dates, self.domain)
# If offsets, add prior fluxes
if getattr(tracer, 'offsets', False):
print("Offsets=True")
# store the unaggregated fluxes
# TODO: move this?
arr = self.flxall[:, np.newaxis, :, :]/np.float(self.model.numscale)
xb_grid = xr.DataArray(
arr, coords={'time': tracer.dates[:-1], 'lev': tracer.levels},
dims=('time', 'lev', 'lat', 'lon'))
xb = np.reshape(
self.xb[tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, -1))
xb = scale2map(xb, tracer, tracer.dates, self.domain)
xb /= np.float(self.model.numscale)
# Make a copy to write out the original array
xa=copy.deepcopy(x)
xa/=np.float(self.model.numscale)
# If offsets, add prior fluxes
if getattr(tracer, 'offsets', False):
offsets = True
print("Offsets=True")
x[:,0,:,:] = (x[:,0,:,:] + self.flxall)/np.float(self.model.numscale)
else:
print("Offsets=False")
x[:,0,:,:] /= np.float(self.model.numscale)
xa=copy.deepcopy(x)
offsets = False
xb = np.reshape(
self.xb[tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, -1))
xb = scale2map(xb, tracer, tracer.dates, self.domain)
# TODO:
# transform the scaling factor (0 - 1) to grid and apply to x. Check
# that sum of scaling factor is 1 and also check what to do with negative values?
# Transform scaling factor
scaling_factor = xb_grid / xb
# Rescale using prior flux ratio grid to box
x.values[xb_grid.values > 1.e-15] = x.values[xb_grid.values > 1.e-15]*scaling_factor.values[xb_grid.values > 1.e-15]
#x[xb_grid > 1.e-15] = x*scaling_factor
x[:,:,:,:] /= np.float(self.model.numscale)
xb /= np.float(self.model.numscale)
# store the unaggregated fluxes, for testing
arr = self.flxall[:, np.newaxis, :, :]/np.float(self.model.numscale)
xb_grid = xr.DataArray(arr,
coords={'time': tracer.dates[:-1], 'lev': tracer.levels},
dims=('time', 'lev', 'lat', 'lon'))
std = np.reshape(
self.std[tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, -1))
std = scale2map(std, tracer, tracer.dates, self.domain)
std /= np.float(self.model.numscale)
ds = xr.Dataset({'x': x, 'xb': xb, 'xb_grid': xb_grid, 'std': std,
'xa': xa,
'lon': tracer.domain.lon, 'lat': tracer.domain.lat})
errscalar = getattr(self, 'errscalar', 1.0)
std /= (np.float(self.model.numscale)*errscalar**2)
vars_save = {'x': x, 'xprior': xb, 'xprior_grid': xb_grid, 'std': std,
'xincrement': xa,
'lon': tracer.domain.lon, 'lat': tracer.domain.lat}
if offsets is False:
vars_save.update({'scaling_factor' : scaling_factor})
# ds = xr.Dataset({'x': x, 'xprior': xb, 'xprior_grid': xb_grid, 'std': std,
# 'xincrement': xa, 'scaling_factor' : scaling_factor,
# 'lon': tracer.domain.lon, 'lat': tracer.domain.lat})
ds = xr.Dataset(vars_save)
if run_id is not None:
ds.to_netcdf('{}/controlvect_{}_{}.nc'.format(dir_comp, run_id, trcr))
......
......@@ -2,9 +2,12 @@ import numpy as np
from .build_hcorr import build_hcorrelations
from .build_tcorr import build_tcorrelations
from .sqrtbprod import sqrtbprod, sqrtbprod_ad
from .utils.scalemaps import map2scale
from pycif.utils.check import verbose
from types import MethodType
def init_bprod(cntrlv, options={}, **kwargs):
"""Initilializes the product of chi by sqrt-B. It allows translating
information from the minimization space to the control space. This first
......@@ -93,6 +96,23 @@ def init_bprod(cntrlv, options={}, **kwargs):
evalmin = getattr(corr, 'evalmin', 0.5)
projection = getattr(grid, 'projection', 'gps')
# Use region center lon/lat for correlations
lon_center_reg = np.zeros(tracer.hresoldim)
lat_center_reg = np.zeros(tracer.hresoldim)
for regID, reg in enumerate(np.unique(tracer.regions)):
lon_center_reg[regID] = np.mean(
tracer.domain.zlon[np.where(reg == tracer.regions)])
lat_center_reg[regID] = np.mean(
tracer.domain.zlat[np.where(reg == tracer.regions)])
# Round to nearest grid center coordinate
for i, xx in enumerate(lon_center_reg):
lon_center_reg[i] = min(tracer.domain.lon, key=lambda x:abs(x-xx))
for j, yy in enumerate(lat_center_reg):
lat_center_reg[j] = min(tracer.domain.lat, key=lambda y:abs(y-yy))
# Two possible options: - uniform correlation length,
# or separated land and sea, along a land-sea mask
# Default is no separation
......@@ -121,12 +141,12 @@ def init_bprod(cntrlv, options={}, **kwargs):
else:
sqrt_evalues, evectors = \
build_hcorrelations(
grid.zlat, grid.zlon, lsm,
lat_center_reg, lon_center_reg, lsm,
sigma_land, sigma_sea,
file_lsm=file_lsm, evalmin=evalmin,
dump=dump_hcorr, dir_dump=dircorrel,
projection=projection, regions=True,
hresoldim=tracer.hresoldim,
hresoldim=tracer.hresoldim, tracer=tracer,
**kwargs)
# Storing computed correlations for use by other components
......@@ -134,6 +154,29 @@ def init_bprod(cntrlv, options={}, **kwargs):
{'evectors': evectors,
'sqrt_evalues': sqrt_evalues}
# Scale by domain total error (given as Tg/y)
if hasattr(tracer, 'glob_err'):
glob_err = float(tracer.glob_err)
# Get area of region boxes
# TODO: could compute elsewhere and store in tracer.domain
area_reg = np.squeeze(map2scale(
tracer.domain.areas[np.newaxis, np.newaxis,:,:],
tracer,
tracer.domain,
region_scale_area=False, region_max_val=False))
inv_intervals = tracer.dates[0::-1].shape[0]
errsum = np.dot(cntrlv.std, area_reg)/float(inv_intervals)
toterr = errsum*3600.*24.*365./1.e9/float(tracer.numscale)
cntrlv.errscalar = glob_err/toterr
cntrlv.std = cntrlv.std*cntrlv.errscalar**2
verbose("Total error scaled by "+ str(cntrlv.errscalar))
# TODO: scale cross correlations as well
# probably apply scaling to hcorr?
import pdb; pdb.set_trace()
corr.sqrt_evalues = sqrt_evalues
corr.evectors = evectors
......
......@@ -75,7 +75,8 @@ def init_xb(cntrlv, **kwargs):
xb = np.ones(tracer.dim) * xb_scale + xb_value
flx_regions = map2scale(
flxall[:, np.newaxis, :, :], tracer, tracer.domain, scale_area=True).flatten()
flxall[:, np.newaxis, :, :], tracer, tracer.domain,
region_scale_area=True).flatten()
xb *= flx_regions
std = tracer.err * np.abs(xb)
......@@ -87,8 +88,10 @@ def init_xb(cntrlv, **kwargs):
# Set a lower limit on the flux error
tracer.flxerr_ll = getattr(tracer, 'flxerr_ll', 1.e-8)
std[std < tracer.flxerr_ll*float(tracer.numscale)/3600.] = \
tracer.flxerr_ll*float(tracer.numscale)/3600.
min_err = tracer.flxerr_ll*float(tracer.numscale)/3600.
# std[std < tracer.flxerr_ll*float(tracer.numscale)/3600.] = \
# tracer.flxerr_ll*float(tracer.numscale)/3600.
std = np.maximum(std, min_err)
# Keep the un-aggregated fluxes for later use in obsoper
cntrlv.flxall = flxall
......
......@@ -24,7 +24,6 @@ def sqrtbprod(controlvect, chi, **kwargs):
ndates = tracer.ndates
# Dealing with non-diagonal spatial B
# Adding 'regions' eventually
if (tracer.hresol == 'hpixels' or tracer.hresol == 'regions') and hasattr(tracer, 'hcorrelations'):
corr = tracer.hcorrelations
sqrt_evalues = corr.sqrt_evalues
......
......@@ -66,14 +66,15 @@ def scale2map(x, tracer, dates, dom):
return xmap
def map2scale(xmap, tracer, dom, scale_area=False):
def map2scale(xmap, tracer, dom, region_scale_area=False, region_max_val=False):
"""Translates a map to a control vector slice
Args:
xmap: the map
tracer: tracer Class with corresponding attributes
dom: the model domain
scale_area: For regions, scale map with area size
region_scale_area: For regions, scale map with area size
region_max_val: If True, return maximum value (instead of sum) in region
Returns:
the control vector vertical slice
......@@ -118,11 +119,15 @@ def map2scale(xmap, tracer, dom, scale_area=False):
elif tracer.hresol == 'regions':
x = np.zeros((ndates, tracer.vresoldim, tracer.nregions))
if scale_area:
if region_scale_area:
for regID, reg in enumerate(np.unique(tracer.regions)):
x[..., regID] = np.sum(xmap[..., tracer.regions == reg]*
tracer.domain.areas[..., tracer.regions == reg], axis=2)
x[..., regID] /= tracer.region_areas[regID]
else:
if region_max_val:
for regID, reg in enumerate(np.unique(tracer.regions)):
x[..., regID] = np.max(xmap[..., tracer.regions == reg])
else:
for regID, reg in enumerate(np.unique(tracer.regions)):
x[..., regID] = np.sum(xmap[..., tracer.regions == reg], axis=2)
......