Commit a755a6fb authored by Espen Sollum's avatar Espen Sollum
Browse files

Added scaling of x with region area

parent 4409903f
...@@ -10,7 +10,8 @@ import pycif.utils.check as check ...@@ -10,7 +10,8 @@ import pycif.utils.check as check
def build_hcorrelations(zlat, zlon, lsm, def build_hcorrelations(zlat, zlon, lsm,
sigma_land, sigma_sea, file_lsm=None, sigma_land, sigma_sea, file_lsm=None,
evalmin=0.5, evalmin=0.5, regions=False,
hresoldim=None,
dump=False, dir_dump='', projection='gps', dump=False, dir_dump='', projection='gps',
**kwargs): **kwargs):
"""Build horizontal correlation matrix based on distance between grid """Build horizontal correlation matrix based on distance between grid
...@@ -30,6 +31,8 @@ def build_hcorrelations(zlat, zlon, lsm, ...@@ -30,6 +31,8 @@ def build_hcorrelations(zlat, zlon, lsm,
sigma_sea (float): idem for sea sigma_sea (float): idem for sea
evalmin (float): flag out all eigenvalues below this value. Default evalmin (float): flag out all eigenvalues below this value. Default
is 0.5 is 0.5
regions: True if regions used
hresoldim: physical resolution of state vector, used with 'regions'
dump (bool): dumps computed correlations if True dump (bool): dumps computed correlations if True
dir_dump (str): directory where correlation matrices are stored dir_dump (str): directory where correlation matrices are stored
projection (str): the projection used for the longitudes and latitudes projection (str): the projection used for the longitudes and latitudes
...@@ -38,41 +41,53 @@ def build_hcorrelations(zlat, zlon, lsm, ...@@ -38,41 +41,53 @@ def build_hcorrelations(zlat, zlon, lsm,
tuple with: tuple with:
- square roots of eigenvalues - square roots of eigenvalues
- eigenvectors - eigenvectors
Todo:
- complete implementation of correlations between regions,
including read/write functionality
""" """
# Define domain dimensions # Define domain dimensions
nlon, nlat = zlat.shape nlon, nlat = zlat.shape
import pdb; pdb.set_trace()
# Try reading existing file # Try reading existing file
try: try:
evalues, evectors = \ evalues, evectors = \
read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump) read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump, hresoldim=hresoldim)
# Else build correlations # Else build correlations
except IOError: except IOError:
check.verbose("Computing hcorr") check.verbose("Computing hcorr")
# No correlation between land and sea if lsm = True
if lsm: # 'regions' is WIP, for now use identity matrix
landseamask = readnc(file_lsm, ['lsm']).flatten() if regions:
sigma = sigma_land * (landseamask[:, np.newaxis] >= 0.5) \ if hresoldim is None:
* (landseamask[np.newaxis, :] >= 0.5) \ raise Exception("hresoldim missing!")
+ sigma_sea * (landseamask[:, np.newaxis] < 0.5) \
* (landseamask[np.newaxis, :] < 0.5) \ corr = np.identity(hresoldim)
\
# Otherwise, isotropic correlation
# Takes sigma_land
else: else:
sigma = sigma_land # No correlation between land and sea if lsm = True
if lsm:
# Compute matrix of distance landseamask = readnc(file_lsm, ['lsm']).flatten()
dx = dist_matrix(zlat, zlon, projection) sigma = sigma_land * (landseamask[:, np.newaxis] >= 0.5) \
* (landseamask[np.newaxis, :] >= 0.5) \
# Compute the correlation matrix itself + sigma_sea * (landseamask[:, np.newaxis] < 0.5) \
corr = np.exp(old_div(-dx, sigma)) * (landseamask[np.newaxis, :] < 0.5)
corr[sigma <= 0] = 0
# Otherwise, isotropic correlation
# Takes sigma_land
else:
sigma = sigma_land
# Compute matrix of distance
dx = dist_matrix(zlat, zlon, projection)
# Compute the correlation matrix itself
corr = np.exp(old_div(-dx, sigma))
corr[sigma <= 0] = 0
# Component analysis # Component analysis
evalues, evectors = np.linalg.eigh(corr) evalues, evectors = np.linalg.eigh(corr)
...@@ -87,7 +102,7 @@ def build_hcorrelations(zlat, zlon, lsm, ...@@ -87,7 +102,7 @@ def build_hcorrelations(zlat, zlon, lsm,
# Dumping to a txt file # Dumping to a txt file
if dump: if dump:
dump_hcorr(nlon, nlat, sigma_sea, sigma_land, dump_hcorr(nlon, nlat, sigma_sea, sigma_land,
evalues, evectors, dir_dump) evalues, evectors, dir_dump, hresoldim=hresoldim)
except Exception as e: except Exception as e:
raise e raise e
...@@ -99,16 +114,17 @@ def build_hcorrelations(zlat, zlon, lsm, ...@@ -99,16 +114,17 @@ def build_hcorrelations(zlat, zlon, lsm,
def dump_hcorr(nlon, nlat, sigma_sea, sigma_land, def dump_hcorr(nlon, nlat, sigma_sea, sigma_land,
evalues, evectors, dir_dump): evalues, evectors, dir_dump, hresoldim=None):
"""Dumps eigenvalues and vectors to a txt file. The default file format """Dumps eigenvalues and vectors to a txt file. The default file format
is: is:
'{}/horcor{}x{}cs{}cl{}_lmdz5.txt'.format( '{}/horcor{}x{}cs{}cl{}_lmdz5.txt'.format(
dir_dump, nlon, nlat, sigma_sea, sigma_land) dir_dump, nlon, nlat, sigma_sea, sigma_land)
""" """
ncell = evalues.size ncell = evalues.size
# ESO TODO: change file name for spatial regions
file_dump = '{}/horcor_{}x{}_cs{}_cl{}.bin'.format( file_dump = '{}/horcor_{}x{}_cs{}_cl{}.bin'.format(
dir_dump, nlon, nlat, sigma_sea, sigma_land) dir_dump, nlon, nlat, sigma_sea, sigma_land)
...@@ -122,16 +138,19 @@ def dump_hcorr(nlon, nlat, sigma_sea, sigma_land, ...@@ -122,16 +138,19 @@ def dump_hcorr(nlon, nlat, sigma_sea, sigma_land,
else: else:
datasave.tofile(file_dump) datasave.tofile(file_dump)
def read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump):
def read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump, hresoldim=None):
"""Reads horizontal correlations from existing text file """Reads horizontal correlations from existing text file
Args: Args:
nlon, nlat (ints): dimensions of the domain nlon, nlat (ints): dimensions of the domain
sigma_land, sigma_sea (floats): horizontal correlation distances sigma_land, sigma_sea (floats): horizontal correlation distances
dir_dump (str): where the horizontal correlations have been stored dir_dump (str): where the horizontal correlations have been stored
hresoldim: if not None, indicates numer of regions used
""" """
# ESO TODO fix file name
file_dump = '{}/horcor_{}x{}_cs{}_cl{}.bin'.format( file_dump = '{}/horcor_{}x{}_cs{}_cl{}.bin'.format(
dir_dump, nlon, nlat, sigma_sea, sigma_land) dir_dump, nlon, nlat, sigma_sea, sigma_land)
...@@ -139,8 +158,11 @@ def read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump): ...@@ -139,8 +158,11 @@ def read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump):
raise IOError("{} does not exist. " raise IOError("{} does not exist. "
"Please compute correlations from scratch".format "Please compute correlations from scratch".format
(file_dump)) (file_dump))
data = np.fromfile(file_dump).reshape((-1, nlon * nlat)) if hresoldim is None:
data = np.fromfile(file_dump).reshape((-1, nlon * nlat))
else:
data = np.fromfile(file_dump).reshape((-1, hresoldim))
evalues = data[0] evalues = data[0]
evectors = data[1:] evectors = data[1:]
......
...@@ -34,7 +34,8 @@ def build_tcorrelations(period, dates, sigma_t, ...@@ -34,7 +34,8 @@ def build_tcorrelations(period, dates, sigma_t,
- eigenvectors - eigenvectors
""" """
# Try reading existing file # Try reading existing file
try: try:
evalues, evectors = \ evalues, evectors = \
...@@ -46,10 +47,13 @@ def build_tcorrelations(period, dates, sigma_t, ...@@ -46,10 +47,13 @@ def build_tcorrelations(period, dates, sigma_t,
# Compute matrix of distance # Compute matrix of distance
dt = old_div((pd.DatetimeIndex(dates)[:, np.newaxis] dt = old_div((pd.DatetimeIndex(dates)[:, np.newaxis]
- pd.DatetimeIndex(dates)[np.newaxis, :]), np.timedelta64(sigma_t, 'h')) - pd.DatetimeIndex(dates)[np.newaxis, :]), np.timedelta64(sigma_t, 'D'))
# Compute the correlation matrix itself # Compute the correlation matrix itself
corr = np.exp(-dt ** 2) corr = np.exp(-dt ** 2)
# ESO: test to compare with FIP
# corr = np.exp(-np.abs(dt))
# Component analysis # Component analysis
evalues, evectors = np.linalg.eigh(corr) evalues, evectors = np.linalg.eigh(corr)
...@@ -65,7 +69,7 @@ def build_tcorrelations(period, dates, sigma_t, ...@@ -65,7 +69,7 @@ def build_tcorrelations(period, dates, sigma_t,
if dump: if dump:
dump_tcorr(period, dates, sigma_t, dump_tcorr(period, dates, sigma_t,
evalues, evectors, dir_dump) evalues, evectors, dir_dump)
except Exception as e: except Exception as e:
raise e raise e
......
...@@ -3,11 +3,12 @@ standard_library.install_aliases() ...@@ -3,11 +3,12 @@ standard_library.install_aliases()
import pickle as pickle import pickle as pickle
import numpy as np import numpy as np
import xarray as xr import xarray as xr
import copy
from .utils.scalemaps import scale2map from .utils.scalemaps import scale2map
from pycif.utils.path import init_dir from pycif.utils.path import init_dir
def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs): def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, run_id=None, **kwargs):
"""Dumps a control vector into a pickle file. """Dumps a control vector into a pickle file.
Does not save large correlations. Does not save large correlations.
...@@ -43,8 +44,12 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs): ...@@ -43,8 +44,12 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs):
components = self.components components = self.components
for comp in components.attributes: for comp in components.attributes:
component = getattr(components, comp) component = getattr(components, comp)
dir_comp = '{}/{}'.format(dir_netcdf, comp) if dir_netcdf is not None:
dir_comp = dir_netcdf
else:
dir_comp = '{}/{}'.format(dir_netcdf, comp)
init_dir(dir_comp) init_dir(dir_comp)
for trcr in component.parameters.attributes: for trcr in component.parameters.attributes:
...@@ -55,17 +60,27 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs): ...@@ -55,17 +60,27 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs):
self.x[tracer.xpointer:tracer.xpointer + tracer.dim], self.x[tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, -1)) (tracer.ndates, -1))
x = scale2map(x, tracer, tracer.dates, self.domain) x = scale2map(x, tracer, tracer.dates, self.domain)
import pdb; pdb.set_trace() # If offsets, add prior fluxes
# If 'offsets', add prior fluxes
if getattr(tracer, 'offsets', False): if getattr(tracer, 'offsets', False):
x[:,0,:,:] += self.flxall/np.float(self.model.numscale) print("Offsets=True")
# Make a copy to write out the original array
xa=copy.deepcopy(x)
xa/=np.float(self.model.numscale)
x[:,0,:,:] = (x[:,0,:,:] + self.flxall)/np.float(self.model.numscale)
else:
print("Offsets=False")
x[:,0,:,:] /= np.float(self.model.numscale)
xa=copy.deepcopy(x)
xb = np.reshape( xb = np.reshape(
self.xb[tracer.xpointer:tracer.xpointer + tracer.dim], self.xb[tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, -1)) (tracer.ndates, -1))
xb = scale2map(xb, tracer, tracer.dates, self.domain) xb = scale2map(xb, tracer, tracer.dates, self.domain)
xb /= np.float(self.model.numscale)
# ESO: unaggregated fluxes, for testing # ESO: unaggregated fluxes, for testing
arr = self.flxall[:, np.newaxis, :, :]/np.float(self.model.numscale) arr = self.flxall[:, np.newaxis, :, :]/np.float(self.model.numscale)
xb_grid = xr.DataArray(arr, xb_grid = xr.DataArray(arr,
...@@ -76,10 +91,17 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs): ...@@ -76,10 +91,17 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs):
self.std[tracer.xpointer:tracer.xpointer + tracer.dim], self.std[tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, -1)) (tracer.ndates, -1))
std = scale2map(std, tracer, tracer.dates, self.domain) std = scale2map(std, tracer, tracer.dates, self.domain)
std /= np.float(self.model.numscale)
ds = xr.Dataset({'x': x, 'xb': xb, 'xb_grid': xb_grid, 'std': std, ds = xr.Dataset({'x': x, 'xb': xb, 'xb_grid': xb_grid, 'std': std,
'xa': xa,
'lon': tracer.domain.lon, 'lat': tracer.domain.lat}) 'lon': tracer.domain.lon, 'lat': tracer.domain.lat})
ds.to_netcdf('{}/controlvect_{}_{}.nc'.format(dir_comp, comp, trcr))
if run_id is not None:
ds.to_netcdf('{}/controlvect_{}_{}.nc'.format(dir_comp, run_id, trcr))
else:
print("dumping controlvect")
ds.to_netcdf('{}/controlvect_{}_{}.nc'.format(dir_comp, comp, trcr))
def load(self, cntrl_file, **kwargs): def load(self, cntrl_file, **kwargs):
......
...@@ -84,7 +84,63 @@ def init_bprod(cntrlv, options={}, **kwargs): ...@@ -84,7 +84,63 @@ def init_bprod(cntrlv, options={}, **kwargs):
corr.evectors = evectors corr.evectors = evectors
tracer.chi_hresoldim = sqrt_evalues.size tracer.chi_hresoldim = sqrt_evalues.size
elif hasattr(tracer, 'hcorrelations') and tracer.hresol == 'regions':
# ESO: work in progress on regions
# TODO possibly don't need a separate if block for regions, just
# pass a regions / tracer.hresol keyword to buil_hcorrelations
corr = tracer.hcorrelations
dump_hcorr = getattr(corr, 'dump_hcorr', False)
dircorrel = getattr(corr, 'dircorrel', '')
evalmin = getattr(corr, 'evalmin', 0.5)
projection = getattr(grid, 'projection', 'gps')
# Two possible options: - uniform correlation length,
# or separated land and sea, along a land-sea mask
# Default is no separation
lsm = getattr(corr, 'landsea', False)
if lsm:
sigma_land = getattr(corr, 'sigma_land', -1)
sigma_sea = getattr(corr, 'sigma_sea', -1)
file_lsm = getattr(corr, 'filelsm')
else:
sigma = getattr(corr, 'sigma', -1)
sigma_land = sigma
sigma_sea = -999
file_lsm = None
# Load or compute the horizontal correlations
# Checks before whether they were already loaded
if (sigma_land, sigma_sea) in cntrlv.hcorrelations:
sqrt_evalues = \
cntrlv.hcorrelations[
(sigma_land, sigma_sea)]['sqrt_evalues']
evectors = \
cntrlv.hcorrelations[
(sigma_land, sigma_sea)]['evectors']
else:
sqrt_evalues, evectors = \
build_hcorrelations(
grid.zlat, grid.zlon, lsm,
sigma_land, sigma_sea,
file_lsm=file_lsm, evalmin=evalmin,
dump=dump_hcorr, dir_dump=dircorrel,
projection=projection, regions=True,
hresoldim=tracer.hresoldim,
**kwargs)
# Storing computed correlations for use by other components
cntrlv.hcorrelations[(sigma_land, sigma_sea)] = \
{'evectors': evectors,
'sqrt_evalues': sqrt_evalues}
corr.sqrt_evalues = sqrt_evalues
corr.evectors = evectors
tracer.chi_hresoldim = sqrt_evalues.size
else: else:
tracer.chi_hresoldim = tracer.hresoldim tracer.chi_hresoldim = tracer.hresoldim
...@@ -138,5 +194,5 @@ def init_bprod(cntrlv, options={}, **kwargs): ...@@ -138,5 +194,5 @@ def init_bprod(cntrlv, options={}, **kwargs):
# Defining chi from the total dimension # Defining chi from the total dimension
cntrlv.chi = np.zeros((cntrlv.chi_dim,)) cntrlv.chi = np.zeros((cntrlv.chi_dim,))
return cntrlv return cntrlv
...@@ -64,8 +64,9 @@ def init_xb(cntrlv, **kwargs): ...@@ -64,8 +64,9 @@ def init_xb(cntrlv, **kwargs):
# Filling Xb and uncertainties # Filling Xb and uncertainties
if getattr(tracer, 'type') == 'flexpart': if getattr(tracer, 'type') == 'flexpart':
# ESO Reading fluxes and nested fluxes for flexpart # ESO: Reading fluxes and nested fluxes for flexpart
# For now, only aggregation to regions is supported # For now, only aggregation to regions is supported
# TODO: correlation/error cov
flxall = tracer.read( flxall = tracer.read(
trcr, tracer.dir, tracer.file, tracer.dates, trcr, tracer.dir, tracer.file, tracer.dates,
comp_type=comp, comp_type=comp,
...@@ -74,22 +75,36 @@ def init_xb(cntrlv, **kwargs): ...@@ -74,22 +75,36 @@ def init_xb(cntrlv, **kwargs):
xb = np.ones(tracer.dim) * xb_scale + xb_value xb = np.ones(tracer.dim) * xb_scale + xb_value
# import pdb; pdb.set_trace()
flx_regions = map2scale( flx_regions = map2scale(
flxall[:, np.newaxis, :, :], tracer, tracer.domain).flatten() flxall[:, np.newaxis, :, :], tracer, tracer.domain, scale_area=True).flatten()
xb *= flx_regions xb *= flx_regions
std = tracer.err * np.abs(xb) std = tracer.err * np.abs(xb)
if getattr(tracer, 'offsets', False):
# Optimize flux offsets
xb[:] = 0.
# Set a lower limit on the flux error # Set a lower limit on the flux error
tracer.flxerr_ll = getattr(tracer, 'flxerr_ll', 1.e-8) tracer.flxerr_ll = getattr(tracer, 'flxerr_ll', 1.e-8)
std[std <= tracer.flxerr_ll] = tracer.flxerr_ll std[std < tracer.flxerr_ll*float(tracer.numscale)/3600.] = \
tracer.flxerr_ll*float(tracer.numscale)/3600.
# Keep the un-aggregated fluxes for later use in obsoper # Keep the un-aggregated fluxes for later use in obsoper
cntrlv.flxall = flxall cntrlv.flxall = flxall
# Read global fluxes, for contribution outside domain
flx_bkg = tracer.read_glob(
trcr, tracer.dir, tracer.file_glob, tracer.dates,
comp_type=comp,
**kwargs) * xb_scale \
+ xb_value
# import pdb; pdb.set_trace()
cntrlv.flx_bkg = flx_bkg
# Most types are scalar factors # Most types are scalar factors
elif getattr(tracer, 'type', 'scalar') == 'scalar' \ elif getattr(tracer, 'type', 'scalar') == 'scalar' \
......
...@@ -24,7 +24,9 @@ def sqrtbprod(controlvect, chi, **kwargs): ...@@ -24,7 +24,9 @@ def sqrtbprod(controlvect, chi, **kwargs):
ndates = tracer.ndates ndates = tracer.ndates
# Dealing with non-diagonal spatial B # Dealing with non-diagonal spatial B
if tracer.hresol == 'hpixels' and hasattr(tracer, 'hcorrelations'): # if tracer.hresol == 'hpixels' and hasattr(tracer, 'hcorrelations'):
# ESO: allow use with regions
if (tracer.hresol == 'hpixels' or tracer.hresol == 'regions') and hasattr(tracer, 'hcorrelations'):
corr = tracer.hcorrelations corr = tracer.hcorrelations
sqrt_evalues = corr.sqrt_evalues sqrt_evalues = corr.sqrt_evalues
evectors = corr.evectors evectors = corr.evectors
...@@ -88,7 +90,8 @@ def sqrtbprod_ad(controlvect, dx, **kwargs): ...@@ -88,7 +90,8 @@ def sqrtbprod_ad(controlvect, dx, **kwargs):
chi_pointer = tracer.chi_pointer chi_pointer = tracer.chi_pointer
chi_dim = tracer.chi_dim chi_dim = tracer.chi_dim
ndates = tracer.ndates ndates = tracer.ndates
# ESO TODO Look at this...
# x * std # x * std
xstd = dx[x_pointer: x_pointer + x_dim] \ xstd = dx[x_pointer: x_pointer + x_dim] \
* controlvect.std[x_pointer: x_pointer + x_dim] * controlvect.std[x_pointer: x_pointer + x_dim]
...@@ -113,7 +116,7 @@ def sqrtbprod_ad(controlvect, dx, **kwargs): ...@@ -113,7 +116,7 @@ def sqrtbprod_ad(controlvect, dx, **kwargs):
.flatten(order='F') .flatten(order='F')
# Dealing with non-diagonal spatial B # Dealing with non-diagonal spatial B
if tracer.hresol == 'hpixels' and hasattr(tracer, 'hcorrelations'): if (tracer.hresol == 'hpixels' or tracer.hresol == 'regions') and hasattr(tracer, 'hcorrelations'):
corr = tracer.hcorrelations corr = tracer.hcorrelations
sqrt_evalues = corr.sqrt_evalues sqrt_evalues = corr.sqrt_evalues
evectors = corr.evectors evectors = corr.evectors
......
...@@ -71,7 +71,7 @@ def hresol2dim(tracer, dom, **kwargs): ...@@ -71,7 +71,7 @@ def hresol2dim(tracer, dom, **kwargs):
if tracer.regions.shape != (dom.nlat, dom.nlon): if tracer.regions.shape != (dom.nlat, dom.nlon):
raise Exception("Regions were not correctly defined in {}" raise Exception("Regions were not correctly defined in {}"
.format(tracer.ficregions)) .format(tracer.ficregions))
return tracer.nregions return tracer.nregions
elif tracer.hresol == 'global': elif tracer.hresol == 'global':
......
...@@ -118,8 +118,17 @@ def map2scale(xmap, tracer, dom, scale_area=False): ...@@ -118,8 +118,17 @@ def map2scale(xmap, tracer, dom, scale_area=False):
elif tracer.hresol == 'regions': elif tracer.hresol == 'regions':
# TODO: add option to scale with area size # TODO: add option to scale with area size
x = np.zeros((ndates, tracer.vresoldim, tracer.nregions)) x = np.zeros((ndates, tracer.vresoldim, tracer.nregions))
for regID, reg in enumerate(np.unique(tracer.regions)): if scale_area:
x[..., regID] = np.sum(xmap[..., tracer.regions == reg], axis=2) for regID, reg in enumerate(np.unique(tracer.regions)):
x[..., regID] = np.sum(xmap[..., tracer.regions == reg]*
tracer.domain.areas[..., tracer.regions == reg], axis=2)