Commit 71719406 authored by Espen Sollum's avatar Espen Sollum
Browse files

Fixed some issues related to dates

parent e545544d
......@@ -69,6 +69,7 @@ model :
# Also projects information from the observation to the model space
# - fic_obsvect: observation vector from previous simulations
# - dump_debug: write out extra information (for debugging)
# - sort_order: (optional) datastore sort order, ['index', 'station'] is default
# For FLEXPART the background plugin takes the following parameters
# - dir_initconc: directory containing concentration files
# - file_initconc: name of concentration files
......@@ -84,6 +85,7 @@ obsvect:
dump: True
dump_debug: True
dump_type: nc
sort_order: ['station', 'index']
background:
plugin:
......@@ -163,7 +165,7 @@ domain :
nlat : 90
nlev : 1
type : deg
xmin_glob : -179.
xmin_glob : -180.
ymin_glob : -90.
nlon_glob : 360
nlat_glob : 180
......@@ -189,9 +191,9 @@ mode:
version: flexpart
reload_from_previous: False
maxiter: 10
# zreduc: 0.001
# epsg: 0.01
df1: 0.01
df1: 1
nsim: 100
m: 1
save_out_netcdf: True
####################################################################
......@@ -231,17 +233,17 @@ controlvect:
hresol : regions
inc_ocean : true
fileregions : /home/eso/FLEX_INV/CH4/TEST_OUTPUT/regions_ghg.nc
errtype : max
# errtype : max
# errtype : avg
err : 0.5
# Lower limit flux error: unit (kg/m2/h)
flxerr_ll: 1.e-8
# Total error inversion domain: unit (Tg/y)
# glob_err: 10.
glob_err: 10.
numscale : 1.E12
xb_scale : 1.
# periodflux : 5D
period : 1D
# period : 10D
dir : /home/eso/FLEX_INV/CH4/TEST_OUTPUT/FLUXES/GHG/
file : CH4_TOTAL_%Y_05x05.nc
file_glob : CH4_TOTAL_%Y_10x10.nc
......@@ -260,15 +262,15 @@ controlvect:
filelsm : /home/eso/FLEX_INV/CH4/TEST_INPUT/lsm_0.5x0.5_VERIFY.nc
dircorrel : /home/eso/repos/CIF/flextest/
dump_hcorr : True
# sigma_land: 250.
# sigma_sea: 1000.
sigma_land: 1.
sigma_sea: 1.
sigma_land: 250.
sigma_sea: 1000.
# sigma_land: 1.
# sigma_sea: 1.
evalmin : 1.e-6
# evalmin : 1.e-15
tcorrelations :
# sigma_t : 30
sigma_t : 1
sigma_t : 10
dump_tcorr : True
dircorrel : /home/eso/repos/CIF/flextest/
# evalmin : 1.e-6
......
......@@ -80,7 +80,6 @@ def build_hcorrelations(zlat, zlon, lsm,
if regions:
landseamask2d = readnc(file_lsm, ['lsm'])
landseamask = map2scale(landseamask2d[np.newaxis, np.newaxis, :, :], tracer, tracer.domain, region_scale_area=False, region_max_val=True).flatten()
import pdb; pdb.set_trace()
else:
landseamask = readnc(file_lsm, ['lsm']).flatten()
......@@ -105,8 +104,6 @@ def build_hcorrelations(zlat, zlon, lsm,
# Compute matrix of distance
dx = dist_matrix(zlat, zlon, projection)
import pdb; pdb.set_trace()
# Compute the correlation matrix itself
corr = np.exp(old_div(-dx, sigma))
corr[sigma <= 0] = 0
......@@ -133,7 +130,6 @@ def build_hcorrelations(zlat, zlon, lsm,
# mask = evalues >= evalmin
# ESO: This is how it is done in flexinvert
import pdb; pdb.set_trace()
mask = evalues >= evalmin*evalues.max()
check.verbose("Truncating eigenvalues at " + str(evalmin*evalues.max()))
......
......@@ -50,8 +50,8 @@ def build_tcorrelations(period, dates, sigma_t,
- pd.DatetimeIndex(dates)[np.newaxis, :]), np.timedelta64(sigma_t, 'D'))
# Compute the correlation matrix itself
# corr = np.exp(-dt ** 2)
# ESO:
# corr = np.exp(-dt ** 2)
# ESO:
corr = np.exp(-np.abs(dt))
# Component analysis
......@@ -75,8 +75,6 @@ def build_tcorrelations(period, dates, sigma_t,
# Truncating values < evalmin
mask = evalues >= evalmin
import pdb; pdb.set_trace()
return evalues[mask] ** 0.5, evectors[:, mask]
......
......@@ -8,7 +8,8 @@ from .utils.scalemaps import scale2map
from pycif.utils.path import init_dir
def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, run_id=None, **kwargs):
def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, run_id=None,
smallnum=1.e-27, **kwargs):
"""Dumps a control vector into a pickle file.
Does not save large correlations.
......@@ -77,36 +78,26 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, run_id=None, **kwar
# Make a copy to write out the original array
xa=copy.deepcopy(x)
xa/=np.float(self.model.numscale)
xa = copy.deepcopy(x)
xa /= np.float(self.model.numscale)
# If offsets, add prior fluxes
if getattr(tracer, 'offsets', False):
offsets = True
print("Offsets=True")
x[:,0,:,:] = (x[:,0,:,:] + self.flxall)/np.float(self.model.numscale)
else:
print("Offsets=False")
offsets = False
# TODO:
# transform the scaling factor (0 - 1) to grid and apply to x. Check
# that sum of scaling factor is 1 and also check what to do with negative values?
# Transform scaling factor
# Factor for regridding
scaling_factor = xb_grid / xb
# Rescale using prior flux ratio grid to box
x.values[xb_grid.values > 1.e-15] = x.values[xb_grid.values > 1.e-15]*scaling_factor.values[xb_grid.values > 1.e-15]
#x[xb_grid > 1.e-15] = x*scaling_factor
x.values[xb_grid.values > smallnum] = x.values[xb_grid.values > smallnum]*scaling_factor.values[xb_grid.values > smallnum]
x[:,:,:,:] /= np.float(self.model.numscale)
std = np.reshape(
self.std[tracer.xpointer:tracer.xpointer + tracer.dim],
(tracer.ndates, -1))
......
......@@ -166,8 +166,7 @@ def init_bprod(cntrlv, options={}, **kwargs):
tracer.domain,
region_scale_area=False, region_max_val=False))
inv_intervals = tracer.dates[0::-1].shape[0]
errsum = np.dot(cntrlv.std, area_reg)/float(inv_intervals)
errsum = np.dot(cntrlv.std, np.tile(area_reg, tracer.ndates)/float(tracer.ndates))
toterr = errsum*3600.*24.*365./1.e9/float(tracer.numscale)
cntrlv.errscalar = glob_err/toterr
cntrlv.std = cntrlv.std*cntrlv.errscalar**2
......@@ -175,7 +174,7 @@ def init_bprod(cntrlv, options={}, **kwargs):
# TODO: scale cross correlations as well
# probably apply scaling to hcorr?
import pdb; pdb.set_trace()
# import pdb; pdb.set_trace()
corr.sqrt_evalues = sqrt_evalues
corr.evectors = evectors
......
......@@ -12,7 +12,7 @@ def create_domain(domain,
Notes:
We assume that center coordinates are used in the domain definition
We assume that outer edge coordinates are used in the domain definition
Todo:
......@@ -72,12 +72,15 @@ def create_domain(domain,
domain.zlat_glob = zlat_glob
# Indices for inversion domain into global domain (relative to lower left corner)
ix1 = np.argmin(np.abs(lonc_glob - domain.lon[0] - 1))
iy1 = np.argmin(np.abs(latc_glob - domain.lat[0] - 1))
ix2 = np.argmin(np.abs(lonc_glob - domain.lon[-1] - 1))
iy2 = np.argmin(np.abs(latc_glob - domain.lat[-1] - 1))
ix1 = np.argmin(np.abs(lonc_glob - domain.lon[0] ))
iy1 = np.argmin(np.abs(latc_glob - domain.lat[0] ))
ix2 = np.argmin(np.abs(lonc_glob - domain.lon[-1] ))
iy2 = np.argmin(np.abs(latc_glob - domain.lat[-1] ))
domain.ix1 = ix1
domain.ix2 = ix2
domain.iy1 = iy1
domain.iy2 = iy2
# import pdb; pdb.set_trace()
......@@ -37,14 +37,11 @@ def check_options(self, chi, finit,
# Number of simulations
# Can be explicitly given by the user, or will be determined from 'maxiter'
try:
niter = self.niter
nsim = self.nsim
except AttributeError:
maxiter = self.maxiter
niter = maxiter
nsim = 2 * maxiter
niter = getattr(self, 'niter', maxiter)
nsim = getattr(self, 'nsim', 2*maxiter)
# Default parameters
m = getattr(self, 'm', 5)
......
......@@ -6,6 +6,8 @@ from __future__ import absolute_import
import os
import numpy as np
import glob
import re
from datetime import datetime, timedelta
from . import flexpart_header
from . import mod_flexpart
......@@ -49,7 +51,6 @@ def read_flexpart_dir(subdir, nested=True, **kwargs):
grids[:,:,:,ii] = grid_fp
gtimes[:, ii] = gtime
return grids, gtimes
......@@ -86,12 +87,24 @@ def read_flexpart_grid(subdir, file_name, fp_header, **kwargs):
# Convert grid times to datetime format
gtime_dt = []
# Convert FLEXPART julian days to datetime
for i in range(len(gtime)):
if gtime[i] == 0.:
break
gtime_dt.append(j2d(gtime[i]))
# Generate the footprint dates as a series (for debugging), adding one hour
# gtime_dt.append(datetime.strptime(re.findall(r'\d+', file_name)[0], '%Y%m%d%H%M%S'))
# gtime_dt[0] += timedelta(hours=1)
# gtime_dt[0] = gtime_dt[0].replace(hour=0)
# for i in range(1, len(gtime)):
# if gtime[i] == 0.:
# break
# gtime_dt.append(gtime_dt[0] - timedelta(days=i))
# gtime_dt.reverse()
return grid_fp, gtime_dt, ngrid
......
......@@ -111,11 +111,11 @@ def obsoper(self, inputs, mode,
else:
raise Exception("For FLEXPART, only hresol:regions are implemented in controlvect")
# Loop through model periods and read model output
self.missingperiod = False
# import pdb; pdb.set_trace()
for di, df in zip(subsimu_dates[:-1], subsimu_dates[1:]):
# Save to datastore for debugging purposes
......@@ -131,25 +131,21 @@ def obsoper(self, inputs, mode,
print("di, df", di, df)
for obs_i, row in enumerate(obsvect.datastore.itertuples()):
# for obs_i, row in enumerate(obsvect.datastore.iterrows()):
# Subdirectory for FLEXPART footprints
subdir = row.Index.strftime("%Y%m") # itertuples
# subdir = row[0].strftime("%Y%m")
subdir = row.Index.strftime("%Y%m")
# For debugging
obs_check[obs_i] = obs_i
station = row.station # itertuples
# station = row[1].station
station = row.station
runsubdir_nest = os.path.join(
model.run_dir_nest, station.upper(), subdir)
runsubdir_glob = os.path.join(
model.run_dir_glob, station.upper(), subdir)
file_date = row.Index.strftime('%Y%m%d%H%M%S') # itertuples
# file_date = row[0].strftime('%Y%m%d%H%M%S')
file_date = row.Index.strftime('%Y%m%d%H%M%S')
# Read nested grids
file_name = 'grid_time_nest_' + file_date + '_001'
......@@ -176,15 +172,15 @@ def obsoper(self, inputs, mode,
grid_glob *= model.coeff*model.mmair/model.molarmass
# Background contribution from fluxes outside domain
hbkg = np.sum(grid_glob[:, :, 0:ngrid-1], axis=2)
hbkg = np.sum(grid_glob[:, :, 0:ngrid_glob], axis=2)
hbkg[ix1:ix2, iy1:iy2] = 0.0
# Index to state vector
ind = np.argmin(np.abs(tracer.dates[0::-1] - gtime_glob[0]))
ind = np.argmin(np.abs(tracer.dates[0:-1] - gtime_glob[0]))
# TODO: calculate on first iteration ,then store?
obs_bkg[obs_i] = np.sum(hbkg[:,:].T*controlvect.flx_bkg[ind,:,:])
obs_bkgerr[obs_i] = obs_bkg[obs_i]*tracer.err
obs_bkgerr[obs_i] = np.sum(hbkg[:,:].T*np.abs(controlvect.flx_bkg[ind,:,:]))*tracer.err
# Transport for boxes in regions
......@@ -204,11 +200,14 @@ def obsoper(self, inputs, mode,
# Calculate indices to state vector
for i,j in enumerate(gtime):
if j > df:
#if j > df:
# Avoid including the midnight files at period end
if j >= df:
istate[i] = -1
else:
# Discard 1st tracer date in the comparison
mask = j - tracer.dates[1::] <= datetime.timedelta(0)
# (similar to writing dates + stateres)
mask = j - tracer.dates[1::] < datetime.timedelta(0)
istate[i] = int(np.argmax(mask))
if np.max(istate) < 0:
......@@ -235,14 +234,13 @@ def obsoper(self, inputs, mode,
# TODO: This can possibly be calculated at first iteration only
obs_ghg[obs_i] = 0.
for i, itim in enumerate(gtime):
ind = np.argmin(np.abs(tracer.dates[0::-1] - itim))
ind = np.argmin(np.abs(tracer.dates[0:-1] - itim))
obs_ghg[obs_i] += np.sum(hnest.T[i,:,:] * controlvect.flxall[ind,:,:])
# Get background contribution
# TODO: do the optimization
if obsvect.background.optimize_cini is False:
# cini = row[1][obsvect.cini_names].sum() # with iterrows
cini = 0.
for name in obsvect.cini_names:
cini += getattr(row, name)
......@@ -264,8 +262,7 @@ def obsoper(self, inputs, mode,
# calculate as: Jo'(p) = sum( H_i^T*ydel_i*R_i )
# Contribution to observation gradient from obs_i
departure = obs_sim[obs_i] - row.obs # itertuples
# departure = obs_sim[obs_i] - row[1].obs #iterrows
departure = obs_sim[obs_i] - row.obs
# if background missing then observation should have no influence
if cini == 0.:
......@@ -280,8 +277,6 @@ def obsoper(self, inputs, mode,
if istate_uni[n] > -1:
grad_o[istate_uni[n]*ndvar:(istate_uni[n]+1)*ndvar] += \
hx[n*ndvar:(n+1)*ndvar] * departure/obs_err[obs_i]**2
# (row.obserror**2 +
# obs_bkgerr[obs_i]**2)
obsvect.dx = grad_o
......@@ -289,9 +284,6 @@ def obsoper(self, inputs, mode,
# Add the different components to datastore
obsvect.datastore['sim'] = obs_sim
obsvect.datastore['obs_bkgerr'] = obs_bkgerr
# obsvect.datastore['obs_err'] = np.sqrt(obsvect.datastore['obs_bkgerr']**2 +
# obsvect.datastore['obserror']**2)
obsvect.datastore['obs_err'] = obs_err
......@@ -370,13 +362,14 @@ def obsoper(self, inputs, mode,
path.init_dir(rundir)
dump_type = obsvect.dump_type
if dump_debug:
sort_order = getattr(obsvect, 'sort_order', ['index', 'station'])
dump_datastore(obsvect.datastore,
file_monit='{}/monitor_{}_.{}'.format(rundir, run_id, dump_type),
mode='w', dump_type=dump_type,
col2dump=['obs_ghg', 'obs_bkg', 'obs_model', 'obs_sim', 'obs_check',
'obs_bkgerr', 'obs_err', 'obs_hx', 'obs_cinipos'])
'obs_bkgerr', 'obs_err', 'obs_hx', 'obs_cinipos'],
sort_order=sort_order)
# Returning obsvect to the simulator
if mode is 'fwd':
......
......@@ -100,20 +100,26 @@ def init_background(obsvect, **kwargs):
for i, name in enumerate(obsvect.cini_names):
obsvect.datastore[name] = cini[:, i]
import pdb; pdb.set_trace()
# ESO: debugging by reading cini from flexinvert
if True:
#if False:
with open("cini.txt") as f:
ci1= f.readlines()
d1 = [i.split()[0] for i in ci1]
s1 = [i.split()[1] for i in ci1]
s1 = [i.split()[1].lower() for i in ci1]
v1 = [np.float64(i.split()[2]) for i in ci1]
ndf = pd.DataFrame({"obs_cini_0" : v1, "station": s1},
index=[dt.datetime.strptime(i, '%Y%m%d%H%M%S') for i in d1])
ndf.sort_index(inplace=True)
ndf.index.rename('index', inplace=True)
sort_order = getattr(obsvect, 'sort_order', ['index', 'station'])
# ndf.sort_values(['index', 'station'], inplace=True)
# obsvect.datastore.sort_values(['index', 'station'], inplace=True)
ndf.sort_values(sort_order, inplace=True)
obsvect.datastore.sort_values(sort_order, inplace=True)
obsvect.datastore['obs_cini_0'] = ndf['obs_cini_0']
return obsvect
......@@ -62,7 +62,6 @@ def simul(self, chi, grad=True, run_id=-1, **kwargs):
# non-diagonal matrices, eventually
departures = obsvect.datastore['sim'] - obsvect.datastore['obs']
# import pdb; pdb.set_trace()
departures[obsvect.datastore['obs_cinipos'] == 0.] = 0.
......
......@@ -150,6 +150,8 @@ def read_datastore(file_monitor,
"""
sort_order = kwargs.get('sort_order', ['index', 'station'])
# Doesn't try to read if file_monitor is None
# ESO: added 2nd condition or program halts with some xarray versions
if file_monitor is None or not os.path.isfile(file_monitor):
......@@ -167,7 +169,8 @@ def read_datastore(file_monitor,
[d for d in pd.read_csv(file_monitor, chunksize=chunksize)])
df['date'] = pd.to_datetime(df['date'], format='%Y%m%d%H%M')
df.set_index('date', inplace=True)
df.sort_index(inplace=True)
#df.sort_values(['index', 'station'], inplace=True)
df.sort_values(sort_order, inplace=True)
if 'sim_period' in df and not np.any(np.isnan(df['sim_period'])):
df['sim_period'] = \
......@@ -175,6 +178,8 @@ def read_datastore(file_monitor,
format='%Y%m%d%H%M')
# Re-ordering the dataframe before returning
df.sort_index(inplace=True)
#df.set_index('date', inplace=True)
#df.sort_values(['index', 'station'], inplace=True)
df.sort_values(sort_order, inplace=True)
return df
......@@ -99,9 +99,12 @@ def j2d(fp_day):
"""
return datetime.datetime.combine(
# TODO: FLEXPART juldate is possibly off by 1 hour, check
res = datetime.datetime.combine(
datetime.date.fromordinal(int(fp_day)), datetime.time()) - \
datetime.timedelta(days=1721425)
res = res.replace(hour=0)
return res
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment