Commit a755a6fb authored by Espen Sollum's avatar Espen Sollum
Browse files

Added scaling of x with region area

parent 4409903f
......@@ -10,7 +10,8 @@ import pycif.utils.check as check
def build_hcorrelations(zlat, zlon, lsm,
sigma_land, sigma_sea, file_lsm=None,
evalmin=0.5,
evalmin=0.5, regions=False,
hresoldim=None,
dump=False, dir_dump='', projection='gps',
**kwargs):
"""Build horizontal correlation matrix based on distance between grid
......@@ -30,6 +31,8 @@ def build_hcorrelations(zlat, zlon, lsm,
sigma_sea (float): idem for sea
evalmin (float): flag out all eigenvalues below this value. Default
is 0.5
regions: True if regions used
hresoldim: physical resolution of state vector, used with 'regions'
dump (bool): dumps computed correlations if True
dir_dump (str): directory where correlation matrices are stored
projection (str): the projection used for the longitudes and latitudes
......@@ -38,41 +41,53 @@ def build_hcorrelations(zlat, zlon, lsm,
tuple with:
- square roots of eigenvalues
- eigenvectors
Todo:
- complete implementation of correlations between regions,
including read/write functionality
"""
# Define domain dimensions
nlon, nlat = zlat.shape
import pdb; pdb.set_trace()
# Try reading existing file
try:
evalues, evectors = \
read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump)
read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump, hresoldim=hresoldim)
# Else build correlations
except IOError:
check.verbose("Computing hcorr")
# No correlation between land and sea if lsm = True
if lsm:
landseamask = readnc(file_lsm, ['lsm']).flatten()
sigma = sigma_land * (landseamask[:, np.newaxis] >= 0.5) \
* (landseamask[np.newaxis, :] >= 0.5) \
+ sigma_sea * (landseamask[:, np.newaxis] < 0.5) \
* (landseamask[np.newaxis, :] < 0.5) \
\
# Otherwise, isotropic correlation
# Takes sigma_land
# 'regions' is WIP, for now use identity matrix
if regions:
if hresoldim is None:
raise Exception("hresoldim missing!")
corr = np.identity(hresoldim)
else:
sigma = sigma_land
# Compute matrix of distance
dx = dist_matrix(zlat, zlon, projection)
# Compute the correlation matrix itself
corr = np.exp(old_div(-dx, sigma))
corr[sigma <= 0] = 0
# No correlation between land and sea if lsm = True
if lsm:
landseamask = readnc(file_lsm, ['lsm']).flatten()
sigma = sigma_land * (landseamask[:, np.newaxis] >= 0.5) \
* (landseamask[np.newaxis, :] >= 0.5) \
+ sigma_sea * (landseamask[:, np.newaxis] < 0.5) \
* (landseamask[np.newaxis, :] < 0.5)
# Otherwise, isotropic correlation
# Takes sigma_land
else:
sigma = sigma_land
# Compute matrix of distance
dx = dist_matrix(zlat, zlon, projection)
# Compute the correlation matrix itself
corr = np.exp(old_div(-dx, sigma))
corr[sigma <= 0] = 0
# Component analysis
evalues, evectors = np.linalg.eigh(corr)
......@@ -87,7 +102,7 @@ def build_hcorrelations(zlat, zlon, lsm,
# Dumping to a txt file
if dump:
dump_hcorr(nlon, nlat, sigma_sea, sigma_land,
evalues, evectors, dir_dump)
evalues, evectors, dir_dump, hresoldim=hresoldim)
except Exception as e:
raise e
......@@ -99,16 +114,17 @@ def build_hcorrelations(zlat, zlon, lsm,
def dump_hcorr(nlon, nlat, sigma_sea, sigma_land,
evalues, evectors, dir_dump):
evalues, evectors, dir_dump, hresoldim=None):
"""Dumps eigenvalues and vectors to a txt file. The default file format
is:
'{}/horcor{}x{}cs{}cl{}_lmdz5.txt'.format(
dir_dump, nlon, nlat, sigma_sea, sigma_land)
"""
ncell = evalues.size
# ESO TODO: change file name for spatial regions
file_dump = '{}/horcor_{}x{}_cs{}_cl{}.bin'.format(
dir_dump, nlon, nlat, sigma_sea, sigma_land)
......@@ -122,16 +138,19 @@ def dump_hcorr(nlon, nlat, sigma_sea, sigma_land,
else:
datasave.tofile(file_dump)
def read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump):
def read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump, hresoldim=None):
"""Reads horizontal correlations from existing text file
Args:
nlon, nlat (ints): dimensions of the domain
sigma_land, sigma_sea (floats): horizontal correlation distances
dir_dump (str): where the horizontal correlations have been stored
hresoldim: if not None, indicates numer of regions used
"""
# ESO TODO fix file name
file_dump = '{}/horcor_{}x{}_cs{}_cl{}.bin'.format(
dir_dump, nlon, nlat, sigma_sea, sigma_land)
......@@ -139,8 +158,11 @@ def read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump):
raise IOError("{} does not exist. "
"Please compute correlations from scratch".format
(file_dump))
data = np.fromfile(file_dump).reshape((-1, nlon * nlat))
if hresoldim is None:
data = np.fromfile(file_dump).reshape((-1, nlon * nlat))
else:
data = np.fromfile(file_dump).reshape((-1, hresoldim))
evalues = data[0]
evectors = data[1:]
......
......@@ -34,7 +34,8 @@ def build_tcorrelations(period, dates, sigma_t,
- eigenvectors
"""
# Try reading existing file
try:
evalues, evectors = \
......@@ -46,10 +47,13 @@ def build_tcorrelations(period, dates, sigma_t,
# Compute matrix of distance
dt = old_div((pd.DatetimeIndex(dates)[:, np.newaxis]
- pd.DatetimeIndex(dates)[np.newaxis, :]), np.timedelta64(sigma_t, 'h'))
- pd.DatetimeIndex(dates)[np.newaxis, :]), np.timedelta64(sigma_t, 'D'))
# Compute the correlation matrix itself
corr = np.exp(-dt ** 2)
# ESO: test to compare with FIP
# corr = np.exp(-np.abs(dt))
# Component analysis
evalues, evectors = np.linalg.eigh(corr)
......@@ -65,7 +69,7 @@ def build_tcorrelations(period, dates, sigma_t,
if dump:
dump_tcorr(period, dates, sigma_t,
evalues, evectors, dir_dump)
except Exception as e:
raise e
......
......@@ -3,11 +3,12 @@ standard_library.install_aliases()
import pickle as pickle
import numpy as np
import xarray as xr
import copy
from .utils.scalemaps import scale2map
from pycif.utils.path import init_dir
def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs):
def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, run_id=None, **kwargs):
"""Dumps a control vector into a pickle file.
Does not save large correlations.
......@@ -43,8 +44,12 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs):
components = self.components
for comp in components.attributes:
component = getattr(components, comp)
dir_comp = '{}/{}'.format(dir_netcdf, comp)
if dir_netcdf is not None:
dir_comp = dir_netcdf
else:
dir_comp = '{}/{}'.format(dir_netcdf, comp)
init_dir(dir_comp)
for trcr in component.parameters.attributes:
......@@ -55,17 +60,27 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs):
self.x[tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, -1))
x = scale2map(x, tracer, tracer.dates, self.domain)
import pdb; pdb.set_trace()
# If 'offsets', add prior fluxes
# If offsets, add prior fluxes
if getattr(tracer, 'offsets', False):
x[:,0,:,:] += self.flxall/np.float(self.model.numscale)
print("Offsets=True")
# Make a copy to write out the original array
xa=copy.deepcopy(x)
xa/=np.float(self.model.numscale)
x[:,0,:,:] = (x[:,0,:,:] + self.flxall)/np.float(self.model.numscale)
else:
print("Offsets=False")
x[:,0,:,:] /= np.float(self.model.numscale)
xa=copy.deepcopy(x)
xb = np.reshape(
self.xb[tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, -1))
xb = scale2map(xb, tracer, tracer.dates, self.domain)
xb /= np.float(self.model.numscale)
# ESO: unaggregated fluxes, for testing
arr = self.flxall[:, np.newaxis, :, :]/np.float(self.model.numscale)
xb_grid = xr.DataArray(arr,
......@@ -76,10 +91,17 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs):
self.std[tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, -1))
std = scale2map(std, tracer, tracer.dates, self.domain)
std /= np.float(self.model.numscale)
ds = xr.Dataset({'x': x, 'xb': xb, 'xb_grid': xb_grid, 'std': std,
'xa': xa,
'lon': tracer.domain.lon, 'lat': tracer.domain.lat})
ds.to_netcdf('{}/controlvect_{}_{}.nc'.format(dir_comp, comp, trcr))
if run_id is not None:
ds.to_netcdf('{}/controlvect_{}_{}.nc'.format(dir_comp, run_id, trcr))
else:
print("dumping controlvect")
ds.to_netcdf('{}/controlvect_{}_{}.nc'.format(dir_comp, comp, trcr))
def load(self, cntrl_file, **kwargs):
......
......@@ -84,7 +84,63 @@ def init_bprod(cntrlv, options={}, **kwargs):
corr.evectors = evectors
tracer.chi_hresoldim = sqrt_evalues.size
elif hasattr(tracer, 'hcorrelations') and tracer.hresol == 'regions':
# ESO: work in progress on regions
# TODO possibly don't need a separate if block for regions, just
# pass a regions / tracer.hresol keyword to buil_hcorrelations
corr = tracer.hcorrelations
dump_hcorr = getattr(corr, 'dump_hcorr', False)
dircorrel = getattr(corr, 'dircorrel', '')
evalmin = getattr(corr, 'evalmin', 0.5)
projection = getattr(grid, 'projection', 'gps')
# Two possible options: - uniform correlation length,
# or separated land and sea, along a land-sea mask
# Default is no separation
lsm = getattr(corr, 'landsea', False)
if lsm:
sigma_land = getattr(corr, 'sigma_land', -1)
sigma_sea = getattr(corr, 'sigma_sea', -1)
file_lsm = getattr(corr, 'filelsm')
else:
sigma = getattr(corr, 'sigma', -1)
sigma_land = sigma
sigma_sea = -999
file_lsm = None
# Load or compute the horizontal correlations
# Checks before whether they were already loaded
if (sigma_land, sigma_sea) in cntrlv.hcorrelations:
sqrt_evalues = \
cntrlv.hcorrelations[
(sigma_land, sigma_sea)]['sqrt_evalues']
evectors = \
cntrlv.hcorrelations[
(sigma_land, sigma_sea)]['evectors']
else:
sqrt_evalues, evectors = \
build_hcorrelations(
grid.zlat, grid.zlon, lsm,
sigma_land, sigma_sea,
file_lsm=file_lsm, evalmin=evalmin,
dump=dump_hcorr, dir_dump=dircorrel,
projection=projection, regions=True,
hresoldim=tracer.hresoldim,
**kwargs)
# Storing computed correlations for use by other components
cntrlv.hcorrelations[(sigma_land, sigma_sea)] = \
{'evectors': evectors,
'sqrt_evalues': sqrt_evalues}
corr.sqrt_evalues = sqrt_evalues
corr.evectors = evectors
tracer.chi_hresoldim = sqrt_evalues.size
else:
tracer.chi_hresoldim = tracer.hresoldim
......@@ -138,5 +194,5 @@ def init_bprod(cntrlv, options={}, **kwargs):
# Defining chi from the total dimension
cntrlv.chi = np.zeros((cntrlv.chi_dim,))
return cntrlv
......@@ -64,8 +64,9 @@ def init_xb(cntrlv, **kwargs):
# Filling Xb and uncertainties
if getattr(tracer, 'type') == 'flexpart':
# ESO Reading fluxes and nested fluxes for flexpart
# ESO: Reading fluxes and nested fluxes for flexpart
# For now, only aggregation to regions is supported
# TODO: correlation/error cov
flxall = tracer.read(
trcr, tracer.dir, tracer.file, tracer.dates,
comp_type=comp,
......@@ -74,22 +75,36 @@ def init_xb(cntrlv, **kwargs):
xb = np.ones(tracer.dim) * xb_scale + xb_value
# import pdb; pdb.set_trace()
flx_regions = map2scale(
flxall[:, np.newaxis, :, :], tracer, tracer.domain).flatten()
flxall[:, np.newaxis, :, :], tracer, tracer.domain, scale_area=True).flatten()
xb *= flx_regions
std = tracer.err * np.abs(xb)
if getattr(tracer, 'offsets', False):
# Optimize flux offsets
xb[:] = 0.
# Set a lower limit on the flux error
tracer.flxerr_ll = getattr(tracer, 'flxerr_ll', 1.e-8)
std[std <= tracer.flxerr_ll] = tracer.flxerr_ll
std[std < tracer.flxerr_ll*float(tracer.numscale)/3600.] = \
tracer.flxerr_ll*float(tracer.numscale)/3600.
# Keep the un-aggregated fluxes for later use in obsoper
cntrlv.flxall = flxall
# Read global fluxes, for contribution outside domain
flx_bkg = tracer.read_glob(
trcr, tracer.dir, tracer.file_glob, tracer.dates,
comp_type=comp,
**kwargs) * xb_scale \
+ xb_value
# import pdb; pdb.set_trace()
cntrlv.flx_bkg = flx_bkg
# Most types are scalar factors
elif getattr(tracer, 'type', 'scalar') == 'scalar' \
......
......@@ -24,7 +24,9 @@ def sqrtbprod(controlvect, chi, **kwargs):
ndates = tracer.ndates
# Dealing with non-diagonal spatial B
if tracer.hresol == 'hpixels' and hasattr(tracer, 'hcorrelations'):
# if tracer.hresol == 'hpixels' and hasattr(tracer, 'hcorrelations'):
# ESO: allow use with regions
if (tracer.hresol == 'hpixels' or tracer.hresol == 'regions') and hasattr(tracer, 'hcorrelations'):
corr = tracer.hcorrelations
sqrt_evalues = corr.sqrt_evalues
evectors = corr.evectors
......@@ -88,7 +90,8 @@ def sqrtbprod_ad(controlvect, dx, **kwargs):
chi_pointer = tracer.chi_pointer
chi_dim = tracer.chi_dim
ndates = tracer.ndates
# ESO TODO Look at this...
# x * std
xstd = dx[x_pointer: x_pointer + x_dim] \
* controlvect.std[x_pointer: x_pointer + x_dim]
......@@ -113,7 +116,7 @@ def sqrtbprod_ad(controlvect, dx, **kwargs):
.flatten(order='F')
# Dealing with non-diagonal spatial B
if tracer.hresol == 'hpixels' and hasattr(tracer, 'hcorrelations'):
if (tracer.hresol == 'hpixels' or tracer.hresol == 'regions') and hasattr(tracer, 'hcorrelations'):
corr = tracer.hcorrelations
sqrt_evalues = corr.sqrt_evalues
evectors = corr.evectors
......
......@@ -71,7 +71,7 @@ def hresol2dim(tracer, dom, **kwargs):
if tracer.regions.shape != (dom.nlat, dom.nlon):
raise Exception("Regions were not correctly defined in {}"
.format(tracer.ficregions))
return tracer.nregions
elif tracer.hresol == 'global':
......
......@@ -118,8 +118,17 @@ def map2scale(xmap, tracer, dom, scale_area=False):
elif tracer.hresol == 'regions':
# TODO: add option to scale with area size
x = np.zeros((ndates, tracer.vresoldim, tracer.nregions))
for regID, reg in enumerate(np.unique(tracer.regions)):
x[..., regID] = np.sum(xmap[..., tracer.regions == reg], axis=2)
if scale_area:
for regID, reg in enumerate(np.unique(tracer.regions)):
x[..., regID] = np.sum(xmap[..., tracer.regions == reg]*
tracer.domain.areas[..., tracer.regions == reg], axis=2)
x[..., regID] /= tracer.region_areas[regID]
else:
for regID, reg in enumerate(np.unique(tracer.regions)):
x[..., regID] = np.sum(xmap[..., tracer.regions == reg], axis=2)
# import pdb; pdb.set_trace()
elif tracer.hresol == 'global':
x = xmap.sum(axis=(2, 3))[..., np.newaxis]
......
......@@ -10,7 +10,7 @@ from . import headers, utils
def do_parse(self,
fic,
maxlen=300,
default_unit='ppm',
default_unit='ppb',
default_tz='utc',
default_duration=1,
na_values=-999,
......@@ -175,7 +175,7 @@ def do_parse(self,
df = df[~np.isnan(df['obs'])]
df = df[df['obs'] > na_values]
df = df[df['obserror'] > err_na_values]
# Rescales if needed
if kwargs.get('rescale', False):
coeffscale = utils.rescale(fic, header)
......@@ -189,4 +189,10 @@ def do_parse(self,
df = utils.convert_unit(df, ['obs', 'obserror'],
default_unit=default_unit)
# Set a minimum measurement error to override reported value
if hasattr(self, 'measerr'):
# df['obserror'][df['obserror'] < self.measerr] = self.measerr
df.loc[df['obserror'] < self.measerr, 'obserror'] = self.measerr
return df
......@@ -69,7 +69,7 @@ def parse_fic(fic):
return infos
def convert_unit(df, params, unit='ppm', default_unit='ppm'):
def convert_unit(df, params, unit='ppb', default_unit='ppb'):
"""Converts between ppb, ppm, ppt. Default is conversion to ppm
"""
......
......@@ -6,7 +6,8 @@ from itertools import izip
from pycif.utils.check import verbose
def hcoord(obsvect, chunksize=1e3, **kwargs):
# ESO TEST chaning default chunksize
def hcoord(obsvect, chunksize=1e5, **kwargs):
"""Finds out in which domain grid cells the observations are
Args:
......
from __future__ import absolute_import
import numpy as np
import pandas as pd
import pycif.utils.check as check
from .svd import svd_cost
def simul(self, chi, grad=True, run_id=-1, **kwargs):
......@@ -45,6 +48,7 @@ def simul(self, chi, grad=True, run_id=-1, **kwargs):
# Computes forward simulations
controlvect.x = controlvect.sqrtbprod(chi, **kwargs)
obsvect = obsoper.obsoper(controlvect, 'fwd',
datei=self.datei, datef=self.datef,
......@@ -57,43 +61,44 @@ def simul(self, chi, grad=True, run_id=-1, **kwargs):
# TODO: Include these lines into rinvprod.py, with option for
# non-diagonal matrices, eventually
departures = obsvect.datastore['sim'] - obsvect.datastore['obs']
j_o = 0.5 * (departures ** 2 / obsvect.datastore['obserror'] ** 2).sum()
# j_o = 0.5 * (departures ** 2 / obsvect.datastore['obserror'] ** 2).sum()
j_o = 0.5 * (departures ** 2 / (obsvect.datastore['obserror']**2 + obsvect.datastore['obs_bkgerr']** 2) ).sum()
zcost = j_b + j_o
towrite = (
"In Simulator {}:\n"
" Jb = {}\n"
" Jo = {}\n"
" Total = {}") \
" Jb = {:.4E}\n"
" Jo = {:.4E}\n"
" Total = {:.4E}") \
.format(run_id, j_b, j_o, zcost)
check.verbose(towrite)
# Saves cost function value
with open('{}/cost.txt'.format(workdir), 'a') as f:
f.write('{},{:.4E},{:.4E},{:.4E}\n'.format(run_id, j_b, j_o, zcost))
# Return only the cost function if grad = False
if not grad:
return zcost
# Runs the adjoint to get the gradients
obsvect.datastore['obs_incr'] = \
departures / obsvect.datastore['obserror'] ** 2
# ESO rewrite this...
# controlvect = obsoper.obsoper(obsvect, 'footprint',
# controlvect = obsoper.obsoper(obsvect, 'adj',
# datei=datei, datef=datef,
# workdir=workdir, run_id=run_id,
# reload_results=reload_results,
# **kwargs)
# import pdb; pdb.set_trace()
controlvect.dx = obsvect.dx
# Observational part of the gradient
zgrad_b = chi
# Control part of the gradient
# Observational part of the gradient (ESO ?)
zgrad_o = controlvect.sqrtbprod_ad(controlvect.dx, **kwargs)
# Control part of the gradient (ESO ?)
zgrad_b = chi
zgrad = zgrad_b + zgrad_o
......@@ -101,7 +106,7 @@ def simul(self, chi, grad=True, run_id=-1, **kwargs):
znorm_grad_b = np.dot(zgrad_b, zgrad_b) ** 0.5
znorm_grad_o = np.dot(zgrad_o, zgrad_o) ** 0.5
check.verbose("In Simulator {}:\n"
" grad(Jb) = {}\n"
" grad(Jo) = {}\n"
" grad(Jb) = {:.4E}\n"
" grad(Jo) = {:.4E}\n"
.format(run_id, znorm_grad_b, znorm_grad_o))
return zcost, zgrad
......@@ -37,6 +37,22 @@ class Fluxes(Plugin):
"""
raise PluginError('The function read was not defined')
# ESO: test