Commit ab34842a authored by Espen Sollum's avatar Espen Sollum
Browse files

Bug in obsparser. Doing merge with master

parent 6a4352d9
......@@ -215,5 +215,6 @@ meteo :
version : csv
dirmeteo : /home/eso/repos/CIF/data/dummy_gauss/
ficmeteo : meteo.csv
resolution='1H'
#####################################################################
#####################################################################
from builtins import range
from .init_xb import init_xb
from .init_bprod import init_bprod
from .control2native import control2native
......@@ -41,12 +42,8 @@ def ini_data(plugin, **kwargs):
components = plugin.components
for comp in components.attributes:
component = getattr(components, comp)
#ESO
print ("component", components)
# for trcr in component.parameters.attributes:
for trcr in component.attributes:
# tracer = getattr(component.parameters, trcr)
tracer = getattr(component, trcr)
for trcr in component.parameters.attributes:
tracer = getattr(component.parameters, trcr)
tracer.dir = getattr(tracer, 'dir',
getattr(component, 'dir',
......@@ -54,12 +51,12 @@ def ini_data(plugin, **kwargs):
getattr(plugin.model,
comp, None),
'dir', '')))
tracer.fic = getattr(tracer, 'fic',
getattr(component, 'fic',
tracer.file = getattr(tracer, 'file',
getattr(component, 'file',
getattr(
getattr(plugin.model,
comp, None),
'fic', '')))
'file', '')))
# Forces the tracer to have an empty read function
if not hasattr(tracer, 'read'):
......@@ -69,9 +66,13 @@ def ini_data(plugin, **kwargs):
# Gets read from model if not already defined
try:
tracer.read(*range(4))
# except PluginError:
# except TypeError:
# tracer.read(*list(range(4)))
except PluginError:
if comp in ['fluxes', 'inicond', 'latcond', 'topcond']:
tracer.read = getattr(plugin.model, comp).read
print "ESO: TypeError"
else:
raise Exception('WARNING: {} was not defined in the '
......@@ -79,6 +80,7 @@ def ini_data(plugin, **kwargs):
'control vector'.format(comp))
except Exception:
print "ESO: exception"
pass
# Gets the domain and
......
from __future__ import division
from past.utils import old_div
import numpy as np
import os
from itertools import islice
......@@ -7,7 +9,7 @@ import pycif.utils.check as check
def build_hcorrelations(zlat, zlon, lsm,
sigma_land, sigma_sea, fic_lsm=None,
sigma_land, sigma_sea, file_lsm=None,
evalmin=0.5,
dump=False, dir_dump='', projection='gps',
**kwargs):
......@@ -21,7 +23,7 @@ def build_hcorrelations(zlat, zlon, lsm,
Args:
zlat (np.array): 2D array of latitudes
zlon (np.array): 2D array of longitudes
fic_lsm (str): path to NetCDF file with land-sea mask (grid must be
file_lsm (str): path to NetCDF file with land-sea mask (grid must be
consistent with LMDZ grid); the land-sea mask is assumed to be stored
in the varible 'lsm'
sigma_land (float): decay distance for correlation between land cells
......@@ -52,7 +54,7 @@ def build_hcorrelations(zlat, zlon, lsm,
check.verbose("Computing hcorr")
# No correlation between land and sea if lsm = True
if lsm:
landseamask = readnc(fic_lsm, ['lsm']).flatten()
landseamask = readnc(file_lsm, ['lsm']).flatten()
sigma = sigma_land * (landseamask[:, np.newaxis] >= 0.5) \
* (landseamask[np.newaxis, :] >= 0.5) \
+ sigma_sea * (landseamask[:, np.newaxis] < 0.5) \
......@@ -67,7 +69,7 @@ def build_hcorrelations(zlat, zlon, lsm,
dx = dist_matrix(zlat, zlon, projection)
# Compute the correlation matrix itself
corr = np.exp(-dx / sigma)
corr = np.exp(old_div(-dx, sigma))
corr[sigma <= 0] = 0
# Component analysis
......@@ -85,7 +87,7 @@ def build_hcorrelations(zlat, zlon, lsm,
dump_hcorr(nlon, nlat, sigma_sea, sigma_land,
evalues, evectors, dir_dump)
except Exception, e:
except Exception as e:
raise e
# Truncating values < evalmin
......@@ -105,15 +107,15 @@ def dump_hcorr(nlon, nlat, sigma_sea, sigma_land,
ncell = evalues.size
fic_dump = '{}/horcor_{}x{}_cs{}_cl{}_lmdz.bin'.format(
file_dump = '{}/horcor_{}x{}_cs{}_cl{}_lmdz.bin'.format(
dir_dump, nlon, nlat, sigma_sea, sigma_land)
if os.path.isfile(fic_dump):
if os.path.isfile(file_dump):
raise IOError("Warning: {} already exists. "
"I don't want to overwrite it".format(fic_dump))
"I don't want to overwrite it".format(file_dump))
datasave = np.concatenate((evalues[np.newaxis, :], evectors), axis=0)
datasave.tofile(fic_dump)
datasave.tofile(file_dump)
def read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump):
......@@ -126,15 +128,15 @@ def read_hcorr(nlon, nlat, sigma_sea, sigma_land, dir_dump):
"""
fic_dump = '{}/horcor_{}x{}_cs{}_cl{}_lmdz.bin'.format(
file_dump = '{}/horcor_{}x{}_cs{}_cl{}_lmdz.bin'.format(
dir_dump, nlon, nlat, sigma_sea, sigma_land)
if not os.path.isfile(fic_dump):
if not os.path.isfile(file_dump):
raise IOError("{} does not exist. "
"Please compute correlations from scratch".format
(fic_dump))
(file_dump))
data = np.fromfile(fic_dump).reshape((-1, nlon * nlat))
data = np.fromfile(file_dump).reshape((-1, nlon * nlat))
evalues = data[0]
evectors = data[1:]
......
from __future__ import division
from past.utils import old_div
import numpy as np
import os
import pandas as pd
......@@ -43,9 +45,8 @@ def build_tcorrelations(period, dates, sigma_t,
check.verbose("Computing temporal correlations")
# Compute matrix of distance
dt = (pd.DatetimeIndex(dates)[:, np.newaxis]
- pd.DatetimeIndex(dates)[np.newaxis, :]) \
/ np.timedelta64(sigma_t, 'h')
dt = old_div((pd.DatetimeIndex(dates)[:, np.newaxis]
- pd.DatetimeIndex(dates)[np.newaxis, :]), np.timedelta64(sigma_t, 'h'))
# Compute the correlation matrix itself
corr = np.exp(-dt ** 2)
......@@ -65,7 +66,7 @@ def build_tcorrelations(period, dates, sigma_t,
dump_tcorr(period, dates, sigma_t,
evalues, evectors, dir_dump)
except Exception, e:
except Exception as e:
raise e
# Truncating values < evalmin
......@@ -92,16 +93,16 @@ def dump_tcorr(period, dates, sigma_t,
datei = dates[0]
datef = dates[-1]
fic_dump = '{}/tempcor_{}_{}_per{}_ct{}_lmdz.bin'.format(
file_dump = '{}/tempcor_{}_{}_per{}_ct{}_lmdz.bin'.format(
dir_dump, datei.strftime("%Y%m%d%H%M"), datef.strftime("%Y%m%d%H%M"),
period, sigma_t)
if os.path.isfile(fic_dump):
if os.path.isfile(file_dump):
raise IOError("Warning: {} already exists. "
"I don't want to overwrite it".format(fic_dump))
"I don't want to overwrite it".format(file_dump))
datasave = np.concatenate((evalues[np.newaxis, :], evectors), axis=0)
datasave.tofile(fic_dump)
datasave.tofile(file_dump)
def read_tcorr(period, dates, sigma_t, dir_dump):
......@@ -119,16 +120,16 @@ def read_tcorr(period, dates, sigma_t, dir_dump):
datei = dates[0]
datef = dates[-1]
fic_dump = '{}/tempcor_{}_{}_per{}_ct{}_lmdz.bin'.format(
file_dump = '{}/tempcor_{}_{}_per{}_ct{}_lmdz.bin'.format(
dir_dump, datei.strftime("%Y%m%d%H%M"), datef.strftime("%Y%m%d%H%M"),
period, sigma_t)
if not os.path.isfile(fic_dump):
if not os.path.isfile(file_dump):
raise IOError("{} does not exist. "
"Please compute correlations from scratch".format
(fic_dump))
(file_dump))
data = np.fromfile(fic_dump).reshape((-1, len(dates)))
data = np.fromfile(file_dump).reshape((-1, len(dates)))
evalues = data[0]
evectors = data[1:]
......
from __future__ import absolute_import
import datetime
import xarray as xr
import numpy as np
from pycif.utils.check import verbose
from pycif.utils.check.errclass import PluginError
from .utils import dateslice, scale2map
from .utils.dates import dateslice
from .utils.scalemaps import scale2map
from .utils.reindex import reindex
def control2native(self, mod_input, di, df, mode,
......@@ -27,7 +30,7 @@ def control2native(self, mod_input, di, df, mode,
"""
# TODO: There is a hidden conceptual layer here
# Until now, it is assumed that inputs should have a direct corresponding
# So far, it is assumed that inputs should have a direct corresponding
# component in the control vector, whereas one would need a general
# mapping function between the control vector structure and the inputs
# structure
......@@ -38,7 +41,6 @@ def control2native(self, mod_input, di, df, mode,
ddi = min(di, df)
ddf = min(di, df)
# If this type of input is not considered in the control vector,
# return nothing
# It will then be handled as a fixed input
......@@ -66,9 +68,9 @@ def control2native(self, mod_input, di, df, mode,
if cdates[0] < ddi:
cdates[0] = ddi
# Translates only control variables corresponding to the
# simulation period
for x in variables:
tmp = np.reshape(
variables[x][tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, tracer.nlev, -1))[dslice]
......@@ -83,7 +85,7 @@ def control2native(self, mod_input, di, df, mode,
if getattr(tracer, 'type', 'scalar') == 'scalar':
# Read the tracer array and apply the present control vector
# scaling factor
inputs = tracer.read(trcr, tracer.dir, tracer.fic,
inputs = tracer.read(trcr, tracer.dir, tracer.file,
self.model.input_dates[ddi],
comp_type=mod_input,
model=self.model,
......@@ -118,7 +120,7 @@ def control2native(self, mod_input, di, df, mode,
xmod[trcr].pop('scale')
# Saving reference directories if specified
xmod[trcr]['ficorig'] = getattr(tracer, 'fic', None)
xmod[trcr]['fileorig'] = getattr(tracer, 'file', None)
xmod[trcr]['dirorig'] = getattr(tracer, 'dir', None)
return xmod
import cPickle as pickle
from future import standard_library
standard_library.install_aliases()
import pickle as pickle
import numpy as np
import xarray as xr
from .utils import scale2map
from .utils.scalemaps import scale2map
from pycif.utils.path import init_dir
def dump(self, fic, to_netcdf=False, dir_netcdf=None, **kwargs):
def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs):
"""Dumps a control vector into a pickle file.
Does not save large correlations.
Args:
self (pycif.utils.classes.controlvects.ControlVect):
the Control Vector to dump
fic (str): path to the file to dump as pickle
file (str): path to the file to dump as pickle
to_netcdf (bool): save to netcdf files if True
dir_netcdf (str): root path for the netcdf directory
"""
......@@ -31,7 +33,7 @@ def dump(self, fic, to_netcdf=False, dir_netcdf=None, **kwargs):
tosave['xb'] = self.xb
# Dumping the dictionary to a pickle
with open(fic, 'w') as f:
with open(cntrl_file, 'wb') as f:
pickle.dump(tosave, f, pickle.HIGHEST_PROTOCOL)
# Dumping to an ensemble of NetCDF files
......@@ -52,24 +54,25 @@ def dump(self, fic, to_netcdf=False, dir_netcdf=None, **kwargs):
x = np.reshape(
self.x[tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, -1))
x = scale2map(x, tracer, tracer.dates[:-1], self.domain)
x = scale2map(x, tracer, tracer.dates, self.domain)
xb = np.reshape(
self.xb[tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, -1))
xb = scale2map(xb, tracer, tracer.dates[:-1], self.domain)
xb = scale2map(xb, tracer, tracer.dates, self.domain)
std = np.reshape(
self.std[tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, -1))
std = scale2map(std, tracer, tracer.dates[:-1], self.domain)
std = scale2map(std, tracer, tracer.dates, self.domain)
ds = xr.Dataset({'x': x, 'xb': xb, 'std': std})
ds = xr.Dataset({'x': x, 'xb': xb, 'std': std, 'lon' : tracer.domain.lon, 'lat' :
tracer.domain.lat})
ds.to_netcdf('{}/controlvect_{}_{}.nc'.format(dir_comp, comp, trcr))
def load(self, fic, **kwargs):
with open(fic, 'r') as f:
def load(self, cntrl_file, **kwargs):
with open(cntrl_file, 'r') as f:
toread = pickle.load(f)
out = self.from_dict(toread)
......
......@@ -28,12 +28,11 @@ def init_bprod(cntrlv, options={}, **kwargs):
components = cntrlv.components
for comp in components.attributes:
component = getattr(components, comp)
# for trcr in component.parameters.attributes:
# tracer = getattr(component.parameters, trcr)
for trcr in component.attributes:
tracer = getattr(component, trcr)
for trcr in component.parameters.attributes:
tracer = getattr(component.parameters, trcr)
# Deals with horizontal correlations if any
if hasattr(tracer, 'hcorrelations') and tracer.resol == 'hpixels':
if hasattr(tracer, 'hcorrelations') and tracer.hresol == 'hpixels':
corr = tracer.hcorrelations
dump_hcorr = getattr(corr, 'dump_hcorr', False)
dircorrel = getattr(corr, 'dircorrel', '')
......@@ -47,13 +46,13 @@ def init_bprod(cntrlv, options={}, **kwargs):
if lsm:
sigma_land = getattr(corr, 'sigma_land', -1)
sigma_sea = getattr(corr, 'sigma_sea', -1)
fic_lsm = getattr(corr, 'ficlsm')
file_lsm = getattr(corr, 'filelsm')
else:
sigma = getattr(corr, 'sigma', -1)
sigma_land = sigma
sigma_sea = -999
fic_lsm = None
file_lsm = None
# Load or compute the horizontal correlations
# Checks before whether they were already loaded
......@@ -70,7 +69,7 @@ def init_bprod(cntrlv, options={}, **kwargs):
build_hcorrelations(
grid.zlat, grid.zlon, lsm,
sigma_land, sigma_sea,
fic_lsm=fic_lsm, evalmin=evalmin,
file_lsm=file_lsm, evalmin=evalmin,
dump=dump_hcorr, dir_dump=dircorrel,
projection=projection,
**kwargs)
......
import numpy as np
from scipy import ndimage
from pycif.utils import dates
from .utils import resol2dim
from .utils import dateslice, map2scale
from .utils.dimensions import hresol2dim, vresol2dim
from .utils.scalemaps import map2scale, vmap2vaggreg
import pdb
def init_xb(cntrlv, **kwargs):
"""Initializes the prior control vector. Loops over all components and
......@@ -31,13 +30,10 @@ def init_xb(cntrlv, **kwargs):
# Else, carry on initializing
components = cntrlv.components
# pdb.set_trace()
for comp in components.attributes:
component = getattr(components, comp)
# for trcr in component.parameters.attributes:
# tracer = getattr(component.parameters, trcr)
for trcr in component.attributes:
tracer = getattr(component, trcr)
for trcr in component.parameters.attributes:
tracer = getattr(component.parameters, trcr)
# Deals with temporal resolution of the control component
# A negative period is equivalent to no period
......@@ -46,69 +42,75 @@ def init_xb(cntrlv, **kwargs):
datei, datef, getattr(tracer, 'period', ''),
subperiod=getattr(tracer, 'subperiod', ''))
# ESO: n dates, but n-1 intervals
tracer.ndates = len(tracer.dates) - 1
# Keeping a pointer to the correct location in the whole control
tracer.xpointer = cntrlv.dim
# Updating dimension
tracer.hresoldim = resol2dim(tracer, cntrlv.domain, **kwargs)
tracer.dim = tracer.ndates * tracer.hresoldim
tracer.hresoldim = hresol2dim(tracer, tracer.domain, **kwargs)
tracer.vresoldim = vresol2dim(tracer, tracer.domain, **kwargs)
tracer.dim = tracer.ndates * tracer.hresoldim * tracer.vresoldim
cntrlv.dim += tracer.dim
# Scale xb if prescribed in Yaml
xb_scale = getattr(tracer, 'xb_scale', 1.)
import pdb; pdb.set_trace()
# Filling with defined value if prescribed
xb_value = getattr(tracer, 'xb_value', 0.)
# Filling Xb and uncertainties
# Most types are scalar factors at the moment
if getattr(tracer, 'resol', '') != 'hpixels' \
or comp != 'fluxes' \
or getattr(tracer, 'type', 'scalar') == 'scalar':
# Most types are scalar factors
# import pdb ; pdb.set_trace()
if getattr(tracer, 'type', 'scalar') == 'scalar' \
or getattr(tracer, 'hresol', '') != 'hpixels' \
or getattr(tracer, 'vresol', '') != 'vpixels':
tracer.type = 'scalar'
xb = np.ones(tracer.dim) * xb_scale + xb_value
cntrlv.std = \
np.append(
cntrlv.std,
tracer.err * xb)
std = tracer.err * xb
# Only fluxes at the pixel resolution can be stored as physical
# values
# Physical variables stores uncertainties in the physical space
# Valid only for control variables at the pixel resolution
else:
tracer.type = 'physical'
flxall = tracer.read_fluxes(
trcr, tracer.dir, tracer.fic, tracer.dates[:-1],
flxall = tracer.read(
trcr, tracer.dir, tracer.file, tracer.dates,
comp_type=comp,
**kwargs).data * xb_scale \
+ xb_value
pdb.set_trace()
flxmap = map2scale(flxall, tracer, cntrlv.domain)
# Vertical aggregation depending on vertical stacks
vaggreg = vmap2vaggreg(flxall, tracer, tracer.domain)
xstack = map2scale(vaggreg, tracer, cntrlv.domain)
xb = flxmap.flatten()
# Putting flatten values into xb
xb = xstack.flatten()
# Filling uncertainties depending on uncertainty type
# If 'max', takes the maximum of neighbouring cells as
# reference flux (spatially and temporally)
if getattr(tracer, 'errtype', '') == 'max':
flxmap = \
xstack = \
map2scale(
ndimage.maximum_filter(np.abs(flxall), size=3),
ndimage.maximum_filter(np.abs(vaggreg), size=3),
tracer, cntrlv.domain)
# If errtype is 'avg', prescribes uniform uncertainties
elif getattr(tracer, 'errtype', '') == 'avg':
flxmap = \
map2scale(0. * np.abs(flxall) + np.mean(np.abs(flxall)),
xstack = \
map2scale(0. * np.abs(vaggreg)
+ np.mean(np.abs(vaggreg)),
tracer, cntrlv.domain)
cntrlv.std = \
np.append(
cntrlv.std,
tracer.err * np.abs(flxmap).flatten())
std = tracer.err * np.abs(xstack).flatten()
# Appending local xb to the control vector
cntrlv.xb = np.append(cntrlv.xb, xb)
cntrlv.std = np.append(cntrlv.std, std)
# ESO
print "controlvect:shape(cntrlv.xb)", np.shape(cntrlv.xb)
return cntrlv
......@@ -2,7 +2,8 @@ import datetime
import numpy as np
from pycif.utils.check import verbose
from .utils import dateslice, map2scale
from .utils.dates import dateslice
from .utils.scalemaps import map2scale, vmap2vaggreg
def native2control(self, datastore, di, df,
......@@ -68,27 +69,39 @@ def native2control(self, datastore, di, df,
dslice = dateslice(tracer, di, df)
# For fluxes stored as a scaling factor,
# scaling by the original flux
flx = np.ones((len(dslice)))
if mod_input == 'fluxes' \
and hasattr(tracer, 'read_fluxes') \
and getattr(tracer, 'type', 'scalar') == 'scalar':
flx = \
tracer.read_fluxes(
trcr, tracer.dir, tracer.fic, tracer.dates[dslice],
# For variables stored as a scaling factor,
# scaling by the original value
phys = np.ones((len(dslice)))
if getattr(tracer, 'type', 'scalar') == 'scalar':
phys = \
tracer.read(
trcr, tracer.dir, tracer.file, tracer.dates[dslice],
comp_type=mod_input, model=self.model,**kwargs).data
# Loop over control space periods for temporal aggregation
# Make vertical aggregation per temporal slice
data = datastore['adj_out'][mod_input][trcr]['data']
data_dates = datastore['adj_out'][mod_input][trcr]['dates']
for idd, ds in enumerate(dslice):
dd0 = tracer.dates[ds]
dd1 = tracer.dates[ds + 1]
mask = (datastore['adj_out'][mod_input][trcr]['dates'] >= dd0) \
& (datastore['adj_out'][mod_input][trcr]['dates'] < dd1)
dd1 = tracer.dates[min(ds + 1, tracer.ndates - 1)]
# Either take the corresponding slice of time,
# or take the exact date
# if the control variable is on a time stamp
if dd0 < dd1:
mask = (data_dates >= dd0) & (data_dates < dd1)
else:
mask = data_dates == dd0
# Vertical aggregation
vdata = np.sum(data[mask], axis=0) * phys[idd]
vaggreg = vmap2vaggreg(vdata[np.newaxis], tracer, tracer.domain)
# 2d maps to control vector slices
self.dx[tracer.xpointer:tracer.xpointer + tracer.dim][
ds * tracer.hresoldim:(ds + 1) * tracer.hresoldim] += \
map2scale(
np.sum(
datastore['adj_out'][mod_input][trcr]['data'][mask],
axis=0) * flx[idd], tracer, self.domain)[0]
ds * tracer.hresoldim * tracer.vresoldim:
(ds + 1) * tracer.hresoldim * tracer.vresoldim] += \
map2scale(vaggreg, tracer, tracer.domain).flatten()
......@@ -14,11 +14,8 @@ def sqrtbprod(controlvect, chi, **kwargs):
components = controlvect.components
for comp in components.attributes:
component = getattr(components, comp)
# for trcr in component.parameters.attributes:
# tracer = getattr(component.parameters, trcr)