Commit 0a1fe2c8 authored by Antoine Berchet's avatar Antoine Berchet
Browse files

Forward tranforms to produce BOUN_CONCS; not finalized

parent c4ed0e40
from .read import read
from .fetch import fetch
from .get_domain import get_domain
import os
from .utils import find_valid_file
from pycif.utils import path
def fetch(ref_dir, ref_file, input_dates, target_dir, tracer=None):
list_files = {}
list_dates = {}
for datei in input_dates:
tmp_files = []
tmp_dates = []
for dd in input_dates[datei]:
dir_dd = dd.strftime(ref_dir)
files_3d, dates_3d = find_valid_file(dir_dd, tracer.file_3d, dd)
tmp_files.extend(files_3d)
tmp_dates.extend(dates_3d)
# Fetching
local_files = []
for f in tmp_files:
target_file = '{}/{}'.format(target_dir, os.path.basename(f))
path.link(f, target_file)
local_files.append(target_file)
list_files[datei] = list(set(local_files))
list_dates[datei] = list(set(tmp_dates))
return list_files, list_dates
\ No newline at end of file
import glob
import datetime
import xarray as xr
import os
from pycif.utils.classes.setup import Setup
from .utils import grib_file_reader, find_valid_file
from pycif.utils import path
def get_domain(ref_dir, ref_file, input_dates, target_dir, tracer=None):
date_ref = input_dates.values()[0][0]
dir_dd = date_ref.strftime(ref_dir)
files_3d, dates_3d = find_valid_file(dir_dd, tracer.file_3d, date_ref)
lon, lat = grib_file_reader(files_3d[0], ['longitude', 'latitude'])
lon_min = lon.min() - (lon[1] - lon[0]) / 2
lon_max = lon.max() + (lon[-1] - lon[-2]) / 2
lat_min = lat.min() - (lat[1] - lat[0]) / 2
lat_max = lat.max() + (lat[-1] - lat[-2]) / 2
nlon = lon.size
nlat = lat.size
# Initializes domain
setup = Setup.from_dict({'domain':
{'plugin': {'name': 'dummy',
'version': 'std',
'type': 'domain'},
'xmin': lon_min, 'xmax': lon_max,
'ymin': lat_min, 'ymax': lat_max,
'nlon': nlon, 'nlat': nlat}})
Setup.load_setup(setup, level=1)
return setup.domain
\ No newline at end of file
......@@ -12,19 +12,23 @@ from .utils import grib_file_reader, find_valid_file
def read(self, name, tracdir, tracfile, varnames, dates,
interpol_flx=False, comp_type=None, **kwargs):
for dd in dates:
dir_dd = dd.strftime('{}/%Y/%m/'.format(tracdir))
file_2d = find_valid_file(dir_dd, self.file_2d, dd)
file_3d = find_valid_file(dir_dd, self.file_2d, dd)
lnsp = grib_file_reader(file_2d, 'lnsp')
latitude = grib_file_reader(file_2d, 'latitude')
longitude = grib_file_reader(file_2d, 'longitude')
spec = grib_file_reader(file_3d, varnames, dd)
import code
code.interact(local=dict(locals(), **globals()))
# tracfile can be a list of same lenght as dates
try:
if len(tracfile) != len(dates):
raise Exception("Try read EDGAR files from a list of dates and a "
"list of files, but not of same length:\n{}\n{}"
.format(tracfile, dates))
list_files = tracfile[:]
except TypeError:
list_files = len(dates) * [tracfile]
xout = []
for dd, dd_file in zip(dates, list_files):
dir_dd = dd.strftime(tracdir)
spec = grib_file_reader('{}/{}'.format(dir_dd, dd_file), varnames)
xout.append(spec)
xmod = xr.DataArray(xout, coords={'time': dates},
dims=('time', 'lev', 'lat', 'lon'))
return xmod
......@@ -21,12 +21,25 @@ def grib_file_reader(filepath, varname):
verbose('Reading {}'.format(filepath))
df = cfgrib.open_file(filepath)
var = df.variables[varname].data
if hasattr(var, 'build_array'):
var = var.build_array()
if len(np.shape(varname)) == 0:
varnames = [varname]
else:
varnames = varname[:]
return var
varout = []
for name in varnames:
var = df.variables[name].data
if hasattr(var, 'build_array'):
var = var.build_array()
varout.append(var)
if len(np.shape(varname)) == 0:
return varout[0]
else:
return varout
def find_valid_file(ref_dir, file_format, dd):
......@@ -42,7 +55,12 @@ def find_valid_file(ref_dir, file_format, dd):
file_dates = np.array([d + datetime.timedelta(hours=dlt)
for d, dlt in zip(ref_dates, ref_deltas)])
mask = (file_dates - dd) < datetime.timedelta(0)
file_ref = list_files[mask][np.argmax(file_dates[mask])]
mask = (file_dates - dd) <= datetime.timedelta(0)
file_ref1 = list_files[mask][np.argmax(file_dates[mask])]
date_ref1 = file_dates[mask].max()
return file_ref
mask = (file_dates - dd) >= datetime.timedelta(0)
file_ref2 = list_files[mask][np.argmin(file_dates[mask])]
date_ref2 = file_dates[mask].min()
return ([file_ref1, file_ref2], [date_ref1, date_ref2])
......@@ -5,52 +5,6 @@ import xarray as xr
from .read import read
from .write import write
from .fetch import fetch
from .get_domain import get_domain
def ini_data(plugin, **kwargs):
# Looking for a reference file to read lon/lat in
list_file = glob.glob('{}/*nc'.format(plugin.dir))
ref_file = None
# Either a file is specified in the Yaml
if getattr(plugin, 'file', '') in list_file:
ref_file = '{}/{}'.format(plugin.dir, getattr(plugin, 'file', ''))
# Or loop over available file regarding file pattern
else:
for flx_file in list_file:
try:
date = \
datetime.datetime.strptime(os.path.basename(flx_file),
getattr(plugin, 'file', ''))
ref_file = flx_file
break
except:
continue
if ref_file is None:
raise Exception("EDGARv5 could not be initialized as no file was found")
# Read lon/lat in
nc = xr.open_dataset(ref_file, decode_times=False)
lon = nc['lon']
lat = nc['lat']
lon_min = lon.min() - (lon[1] - lon[0]) / 2
lon_max = lon.max() + (lon[-1] - lon[-2]) / 2
lat_min = lat.min() - (lat[1] - lat[0]) / 2
lat_max = lat.max() + (lat[-1] - lat[-2]) / 2
nlon = lon.size
nlat = lat.size
# Initializes domain
setup = plugin.from_dict({'domain':
{'plugin': {'name': 'dummy', 'version': 'std', 'type': 'domain'},
'xmin': lon_min, 'xmax': lon_max, 'ymin': lat_min, 'ymax': lat_max,
'nlon': nlon, 'nlat': nlat}})
plugin.load_setup(setup, level=1)
plugin.domain = setup.domain
\ No newline at end of file
import os
import datetime
import glob
import numpy as np
from pycif.utils import path
from pycif.utils import check
def fetch(ref_dir, ref_file, input_dates, target_dir, tracer=None):
list_files = {}
list_dates = {}
for datei in input_dates:
tmp_files = []
tmp_dates = []
for dd in input_dates[datei]:
file_flx = dd.strftime(ref_file)
dir_flx = dd.strftime(ref_dir)
date_flx = dd
if not os.path.isfile('{}/{}'.format(dir_flx, file_flx)) \
and getattr(tracer, 'closest_year', False):
check.verbose('Warning: could not find correct year for EDGAR; '
'using closest available one')
list_dates_avail = \
[datetime.datetime.strptime(os.path.basename(f), ref_file)
for f in glob.glob('{}/v50_*nc'.format(dir_flx))]
delta_dates = np.abs(dd - np.array(list_dates_avail))
date_flx = list_dates_avail[np.argmin(delta_dates)]
file_flx = date_flx.strftime(ref_file)
tmp_files.append('{}/{}'.format(dir_flx, file_flx))
tmp_dates.append(date_flx)
# Fetching
local_files = []
for f in tmp_files:
target_file = '{}/{}'.format(target_dir, os.path.basename(f))
path.link(f, target_file)
local_files.append(target_file)
list_files[datei] = list(set(local_files))
list_dates[datei] = list(set(tmp_dates))
return list_files, list_dates
\ No newline at end of file
import glob
import datetime
import xarray as xr
import os
from pycif.utils.classes.setup import Setup
def get_domain(ref_dir, ref_file, input_dates, target_dir, tracer=None):
# Looking for a reference file to read lon/lat in
list_file = glob.glob('{}/*nc'.format(ref_dir))
domain_file = None
# Either a file is specified in the Yaml
if ref_file in list_file:
domain_file = '{}/{}'.format(ref_dir, ref_file)
# Or loop over available file regarding file pattern
else:
for flx_file in list_file:
try:
date = \
datetime.datetime.strptime(os.path.basename(flx_file),
ref_file)
domain_file = flx_file
break
except ValueError:
continue
if domain_file is None:
raise Exception(
"EDGARv5 domain could not be initialized as no file was found")
# Read lon/lat in
nc = xr.open_dataset(domain_file, decode_times=False)
lon = nc['lon']
lat = nc['lat']
lon_min = lon.min() - (lon[1] - lon[0]) / 2
lon_max = lon.max() + (lon[-1] - lon[-2]) / 2
lat_min = lat.min() - (lat[1] - lat[0]) / 2
lat_max = lat.max() + (lat[-1] - lat[-2]) / 2
nlon = lon.size
nlat = lat.size
# Initializes domain
setup = Setup.from_dict({'domain':
{'plugin': {'name': 'dummy',
'version': 'std',
'type': 'domain'},
'xmin': lon_min, 'xmax': lon_max,
'ymin': lat_min, 'ymax': lat_max,
'nlon': nlon, 'nlat': nlat}})
Setup.load_setup(setup, level=1)
return setup.domain
\ No newline at end of file
......@@ -13,11 +13,21 @@ import glob
def read(self, name, tracdir, tracfile, varnames, dates,
interpol_flx=False, tracer=None, model=None, **kwargs):
# tracfile can be a list of same lenght as dates
try:
if len(tracfile) != len(dates):
raise Exception("Try read EDGAR files from a list of dates and a "
"list of files, but not of same length:\n{}\n{}"
.format(tracfile, dates))
list_files = tracfile[:]
except TypeError:
list_files = len(dates) * [tracfile]
# Reading fluxes for periods within the simulation window
trcr_flx = []
for dd in dates:
file_flx = dd.strftime(tracfile)
for dd, dd_file in zip(dates, list_files):
file_flx = dd.strftime(dd_file)
dir_flx = dd.strftime(tracdir)
if not os.path.isfile('{}/{}'.format(dir_flx, file_flx)) \
......
......@@ -8,10 +8,7 @@ from pycif.utils import path
from pycif.utils.netcdf import readnc
def fetch_meteo(meteo,
datei,
datef,
workdir,
def fetch_meteo(meteo, datei, datef, workdir,
filetypes=['defstoke', 'fluxstoke', 'fluxstokev', 'phystoke'],
**kwargs):
"""Reads meteorology and links to the working directory
......
......@@ -22,7 +22,7 @@ def ini_periods(self, **kwargs):
# Time steps defined by the user
nphour_ref = self.nphour_ref
# numbers of time steps per hour really used in the simulation
# Numbers of time steps per hour really used in the simulation
self.tstep_dates = {}
self.tstep_all = []
self.nhour = []
......@@ -45,11 +45,11 @@ def ini_periods(self, **kwargs):
self.nhour.extend(nphour * [nh + 1])
# Frequency in seconds
# A VERIFIER PAR RAPPORT AU FORTRAN: ARRONDI ENTIER NPHOUR
# TODO: Check with FORTRAN: nphour rounding?
freq = '{}s'.format(old_div(3600, nphour))
# List of time steps
# TODO: dans ce cas, attention si pas de temps chimiques?
# TODO: what about chemical time steps?
# the time step to really use is nphour*ichemstep
ddhi = dd + datetime.timedelta(hours=nh)
ddhe = dd + datetime.timedelta(hours=nh + 1)
......
......@@ -49,19 +49,46 @@ def init_transform(self, transform_type=None):
else:
parameters = comp_type.parameters.attributes[:]
# Copying information in the state space for the model
for param in comp_type.attributes:
setattr(getattr(self.model, input_type), param,
getattr(comp_type, param))
#
# # Copying information in the state space for the model
# for param in comp_type.attributes:
# setattr(getattr(self.model, input_type), param,
# getattr(comp_type, param))
npar = len(parameters)
mapper[input_type]['input_component'] = npar * [input_type]
mapper[input_type]['force_read'] = npar * [False]
mapper[input_type]['input_component_plg'] = npar * [comp_type]
mapper[input_type]['input_parameter'] = parameters[:]
mapper[input_type]['input_parameter_plg'] = \
[getattr(comp_type.parameters, p) for p in parameters]
mapper[input_type]['force_read'] = npar * [False]
mapper[input_type]['output_component'] = npar * [input_type]
mapper[input_type]['output_parameter'] = parameters[:]
# Saving input dates and files to read later
# If none is specified, take reference input dates from the model
# and default files from the tracer
default_dates = self.model.input_dates
ndefault = len(default_dates)
mapper[input_type]['input_dates_parameters'] = \
[getattr(getattr(comp_type.parameters, p),
'input_dates', default_dates)
for p in parameters]
mapper[input_type]['input_files_parameters'] = \
[getattr(getattr(comp_type.parameters, p), 'input_files',
ndefault * [getattr(getattr(comp_type.parameters, p),
'file', '')])
for p in parameters]
mapper[input_type]['input_dates_component'] = \
getattr(comp_type, 'input_dates', self.model.input_dates)
mapper[input_type]['input_files_component'] = \
getattr(comp_type, 'input_files',
ndefault * [getattr(comp_type, 'file', '')])
mapper[input_type]['target_dates'] = self.model.input_dates
# Updating inputs/outputs depending on transformations to carry out
if input_type in transfs:
......
from __future__ import absolute_import
import pandas as pd
import numpy as np
from .superobs import timeavg
from .superobs import satellites as sat
def native2obsvect(self, datastore, datei, datef, runsubdir, workdir):
......
......@@ -3,8 +3,6 @@ import pandas as pd
import numpy as np
import itertools
from pycif.utils.datastores.empty import init_empty
from .superobs import timeavg
from .superobs import satellites as sat
def obsvect2native(self, datei, datef, mode, runsubdir, workdir):
......
from __future__ import absolute_import
from .native2obsvect import native2obsvect
from .obsvect2native import obsvect2native
\ No newline at end of file
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division
from builtins import zip
from builtins import range
from past.utils import old_div
import numpy as np
import xarray as xr
import os
from pycif.utils.datastores.dump import dump_datastore, read_datastore
import scipy
from .vinterp import vertical_interp
def native2obsvect(self, y0, datastore, ddi, ddf):
"""Aggregate simulations at the grid scale to total columns.
Re-interpolate the model pressure levels to the satellite averaging kernel
levels. Average using the averaging kernel formula
"""
# Number of levels to extract for satellites
dlev = np.ones(len(y0), dtype=int)
dlev[y0['level'] < 0] = self.model.domain.nlev
# Index in the original data of the level-extended dataframe
native_inds = np.append([0], dlev.cumsum())[:-1]
datastore['indorig'] = np.nan
datastore.ix[native_inds, 'indorig'] = np.arange(len(y0))
datastore['indorig'].fillna(method='ffill', inplace=True)
datastore['indorig'] = datastore['indorig'].astype(int)
# Building the extended dataframe
iq1 = (np.abs(y0['level']) - np.abs((y0['level'] / 10.)
.astype(int) * 10)) \
.astype(int)
nblinfo = ((y0['level'].astype(int) - y0['level']) * 1e7).astype(int)
list_satIDs = iq1.loc[y0['level'] < 0].unique()
ds_p = datastore.set_index('indorig')[['pressure', 'dp']]
for satID in list_satIDs:
satmask = iq1 == satID
nobs = np.sum(satmask)
nblloc = nblinfo.loc[satmask].values - 1
# Stacking output datastore into levels * nobs
native_ind_stack = native_inds[satmask] \
+ np.arange(self.model.domain.nlev)[:, np.newaxis]
# If all nans in datasim, meaning that the species was not simulated
sim = datastore['sim'].values[native_ind_stack]
if not np.any(~np.isnan(sim)):
continue
# Grouping all data from this satellite
datasim = xr.Dataset(
{'pressure': (['level', 'index'],
np.log(ds_p['pressure'].values[native_ind_stack])),
'dp': (['level', 'index'], ds_p['dp'].values[native_ind_stack]),
'sim': (['level', 'index'], sim)},
coords={'index': nblloc,
'level': np.arange(self.model.domain.nlev)}
)
if 'sim_tl' in datastore:
datasim['sim_tl'] = (['level', 'index'],
datastore['sim_tl'].values[native_ind_stack])
# Check whether there is some ak
file_aks = ddi.strftime(
'{}/obsvect/satellites/infos_{:02d}%Y%m%d%H%M.nc' \
.format(self.workdir, satID))
try:
sat_aks = read_datastore(file_aks).to_xarray()
except IOError:
# Assumes total columns?
# datastore['qdp'] = datastore['sim'] * datastore['dp']
# groups = datastore.groupby(['indorig'])
# y0.loc[:, 'sim'] += \
# groups['qdp'].sum().values / groups['dp'].sum().values
#
# if 'sim_tl' in datastore:
# datastore['qdp'] = datastore['sim_tl'] * datastore['dp']
# groups = datastore.groupby(['indorig'])
# y0.loc[:, 'sim_tl'] += \
# groups['qdp'].sum().values / groups['dp'].sum().values
continue
aks = sat_aks['ak'][nblloc, ::-1].T
pavgs = 100. * sat_aks['pavg'][nblloc, ::-1].T
pavgs = xr.DataArray(
np.concatenate([pavgs, np.zeros((1, nobs))], axis=0),
coords={'index': nblloc,
'level': np.arange(aks.level.size + 1)},
dims=('level', 'index'))
pavgs_mid = xr.DataArray(
np.log(0.5 * (pavgs[:-1].values + pavgs[1:].values)),
dims={'index': nblloc,
'level': np.arange(aks.level.size)})
dpavgs = xr.DataArray(
np.diff(- pavgs, axis=0),
dims={'index': nblloc,
'level': np.arange(aks.level.size)})
qa0lus = sat_aks['qa0lu'][nblloc, ::-1].T