Commit 1f2c5559 authored by Espen Sollum's avatar Espen Sollum
Browse files

Add files missing from previous commit

parent 47eb8d20
####################
# pyCIF config file #
#####################
# Define here all parameters for pyCIF following Yaml syntax
# For details on Yaml syntax, please see:
# http://docs.ansible.com/ansible/latest/YAMLSyntax.html
# For non-specified parameters, one can either comment the corresponding line
# or give the value "null"
# Some parameters are switches that need to be activated or not.
# pyCIF expects a boolean True/False
# Yaml accepts the following syntax to be converted to booleans:
# y|Y|yes|Yes|YES|n|N|no|No|NO|true|True|TRUE|false|False|FALSE|on|On|ON|off|Off|OFF
# This offers flexibility, but one should stick to a style for consistency
#####################################################################
# pyCIF parameters
# Verbose level
# 1 = Basic information
# 2 = Debugging
verbose : 2
# Log file (to be saved in $wordkir)
logfile: pyCIF.logtest
# Execution directory
workdir : /homevip/espen/FLEX_INV/CIF/flextest/
# Initial date
# Use the following compatible format for the date:
# YYYY-mm-dd or YYYY-mm-dd HH:mm
datei : 2012-01-01
# End date (same as for the initial date)
datef : 2012-01-31
#####################################################################
#####################################################################
# Transport model
# Accepts any model registered in pyvar.models
# fp_run_dir - directory from which to read FLEXPART footprints
# nested - yes if using nested output domain
# coeff - coefficient to convert from grid units of ppt to ppbv/ppmv (1e-3/1e-6)
# (flexinvert uses ppb, pycif use 1e-6)
# mmair - standard molecular mass of air
# molarmass - Molar mass (in flux files, e.g. CO2-C=12, CH4=16)
# numscale - numeric scaling
model :
plugin:
name : FLEXPART
version : std
periods : 1MS
nested : yes
run_dir_nest : /homevip/espen/FLEX_INV/CH4/TEST_OUTPUT/FLEXOUT/GHG/NEST/
run_dir_glob : /homevip/espen/FLEX_INV/CH4/TEST_OUTPUT/FLEXOUT/GHG/NEST/
coeff : 1.e-3
mmair : 28.97
molarmass : 16.
numscale : 1.E12
#####################################################################
#####################################################################
# How to build your observation vector and observation uncertainties if needed
# Also projects information from the observation to the model space
# - fic_obsvect: observation vector from previous simulations
obsvect:
plugin:
name: standard
version: std
# fic_obsvect : /homevip/espen/FLEX_INV/CIF/flextest/monitor_obsvect.nc
# file_obsvect : /homevip/espen/FLEX_INV/CIF/flextest/monitor_obsvect.nc
dump: True
dump_type: nc
#####################################################################
#####################################################################
# Measurements to account for
# Main keys are:
# - infos: infos on tracers to include in the simulation
# - fic_monitor: standard txt file including all observations
measurements :
plugin:
name: standard
version: std
# File where to save data, if does not exist. Reads from there if exists
# fic_monitor : /homevip/espen/FLEX_INV/CIF/flextest/monitor_allobs.nc
# file_monit : /homevip/espen/FLEX_INV/CIF//monitor_allobs.nc
# file_monitor : /homevip/espen/FLEX_INV/CIF/monitor_allobs.nc
dump_type : nc
species :
CH4 :
dailyPM :
provider : WDCGG
format : std
dir_obs : /homevip/espen/FLEX_INV/CH4/TEST_INPUT/OBS/GHG/*.cn.*
# rescale : false
rescale : true
na_values : -999999.99
default_unit : ppb
default_duration: 1
dump : True
# Minimum measeurement error
measerr : 5.0
####################################################################
####################################################################
# spaces to control/observation spaces
# - read_only: do not run the transport model, read output from existing run
obsoperator:
plugin:
name: fp
version: std
autorestart: false
read_only: true
####################################################################
#####################################################################
# Domain definition
# coordinates refers to edge of inversion domain
# For FLEXPART, include definition of global domain
domain :
plugin :
name : FLEXPART
version : std
xmin : -15.
xmax : 35.
ymin : 30.
ymax : 75.
nlon : 100
nlat : 90
nlev : 1
type : deg
xmin_glob : -179.
ymin_glob : -90.
nlon_glob : 360
nlat_glob : 180
dx_glob : 1.
dy_glob : 1.
mode:
plugin:
name: 4dvar
version: std
minimizer:
plugin:
# name: congrad
name: M1QN3
version: std
simulator:
plugin:
name: gausscost
version: flexpart
reload_from_previous: False
maxiter: 10
df1: 0.01
# zreduc: 0.001
# epsg: 0.01
save_out_netcdf: True
####################################################################
#####################################################################
# Arguments to define the state vector
# These are specific to the state vector and inversion method you chose
# Please refer to the documentation to know what to include here
# For the standard FLEXPART, include the following:
# - ficlsm: land-sea mask (must be consistent with LMDZ grid)
# - correl: Use correlation or not in B
# - dircorrel: path to pre-computed correlations
# - sigma_land: spatial correlation length for prior errors over land (km)
# - sigma_sea: spatial correlation length for prior errors over ocean (km)
# - tracers: list of tracers to put in the state vector (with definition arguments):
# - calcstd: calculate global standard deviation
# - resol: resolution at which fields are scaled
# (choice = bands,regions,pixels;
# if regions, provide a netcdf file ficregion
# if bands, define a list of latitudes as band limits (n+1 for n bands)
# - inc_ocean: for 'regions', optimize ocean boxes (default true)
# - periodflux: period of variation for increments within a month (days)
# - glob_err (optional) = uncertainty on global budget
# - glob_err (optional) = uncertainty on global budget
controlvect:
plugin:
name: standard
version: std
components:
fluxes:
parameters:
CH4 :
plugin:
name: 'FLEXPART'
version: 'nc'
# resol : hpixels
type : flexpart
to_netcdf : True
dir_netcdf : /homevip/espen/FLEX_INV/CIF/flextest/
hresol : regions
inc_ocean : true
fileregions : /homevip/espen/FLEX_INV/CH4/TEST_OUTPUT/regions_ghg.nc
# errtype : max
errtype : avg
err : 0.5
# Lower limit flux error: unit (kg/m2/h)
flxerr_ll: 1.e-8
# Total error inversion domain: unit (Tg/y) -- todo: not implemented
glob_err: 10.
numscale : 1.E12
xb_scale : 1.
# periodflux : 5D
period : 10D
dir : /homevip/espen/FLEX_INV/CH4/TEST_OUTPUT/FLUXES/GHG/
file : CH4_TOTAL_%Y_05x05.nc
file_glob : CH4_TOTAL_%Y_10x10.nc
varname_flx: emisch4
lonname_flx: longitude
latname_flx: latitude
timename_flx: time
offsets : True
# offsets : False
# Initial concentrations
dir_initconc : /homevip/espen/FLEX_INV/CH4/TEST_INPUT/INIT_CONC/GHG/
file_initconc: ch4_noaa_%Y%M.nc
varname_init: CH4
hcorrelations :
landsea: True
# landsea: False
filelsm : /homevip/espen/FLEX_INV/CH4/TEST_INPUT/lsm_0.5x0.5_VERIFY.nc
dircorrel : /homevip/espen/FLEX_INV/CIF/flextest/
dump_hcorr : True
sigma_land: 250
sigma_sea: 1000
tcorrelations :
sigma_t : 30
dump_tcorr : True
dircorrel : /homevip/espen/FLEX_INV/CIF/flextest/
evalmin : 1.e-5
####################################################################
......@@ -48,7 +48,7 @@ def hresol2dim(tracer, dom, **kwargs):
elif tracer.hresol == 'regions':
if not hasattr(tracer, 'regions'):
with Dataset(tracer.ficregions, 'r') as f:
with Dataset(tracer.fileregions, 'r') as f:
tracer.regions = f.variables['regions'][:]
tracer.nregions = len(np.unique(tracer.regions))
......@@ -70,7 +70,7 @@ def hresol2dim(tracer, dom, **kwargs):
# Check that regions have the correct dimensions
if tracer.regions.shape != (dom.nlat, dom.nlon):
raise Exception("Regions were not correctly defined in {}"
.format(tracer.ficregions))
.format(tracer.fileregions))
return tracer.nregions
......
from scipy.io import FortranFile
import numpy as np
import xarray as xr
import calendar
import os
import string
from netCDF4 import Dataset
import pandas as pd
from pycif.utils import path
import shutil
def native2inputs(self, datastore, input_type, datei, datef, mode,
runsubdir, workdir):
"""Converts data at the model data resolution to model compatible input
files.
Args:
self: the model Plugin
input_type (str): one of 'fluxes', 'obs'
datastore: data to convert
if input_type == 'fluxes', a dictionary with flux maps
if input_type == 'obs', a pandas dataframe with the observations
datei, datef: date interval of the sub-simulation
mode (str): running mode: one of 'fwd', 'adj' and 'tl'
runsubdir (str): sub-directory for the current simulation
workdir (str): the directory of the whole pycif simulation
Notes:
Copied from LMDZ. We do not attempt to run the model at this point.
"""
print("models/flexpart/native2inputs")
# Exit here
return
if datastore is None:
datastore = {}
# Switching datei and datef if adj
ddi = min(datei, datef)
ddf = max(datei, datef)
# Deals with fluxes
# Not specified emissions are set to zero by LMDZ
if input_type == 'fluxes':
for spec in self.emis_species.attributes:
# If determined by the control vector
if spec in datastore:
data = datastore[spec]
# Else read directly from netCDF files
else:
tracer = getattr(self.emis_species, spec)
data = {'spec': self.emis_species.read_fluxes(
spec, tracer.dir, tracer.fic, [ddi, ddf])}
# TODO: Re-project at some point if necessary
# Re-samples at daily scale
day_dates = pd.date_range(ddi, ddf, freq='D').union([ddf])
flx = xr.Dataset.from_dataframe(
data['spec'].to_dataframe(name='spec')
.unstack(level=['lat', 'lon'])
.reindex(day_dates, method='ffill')
.stack(level=['lat', 'lon']))['spec'] \
.rename({'level_0': 'time'})
# Keeping only data in the simulation window
mask = (flx.time >= np.datetime64(ddi, 'ns')) \
& (flx.time <= np.datetime64(ddf, 'ns'))
mask = np.where(mask)[0]
flx = flx[mask, :, :]
# If a scaling factor is available, use it
if 'scale' in data:
scale = xr.Dataset.from_dataframe(
data['scale'].to_dataframe(name='scale')
.unstack(level=['lat', 'lon'])
.reindex(day_dates, method='ffill')
.stack(level=['lat', 'lon']))['scale'] \
.rename({'level_0': 'time'})[mask, :, :]
flx_fwd = flx * scale
else:
flx_fwd = flx * 1.
# If tangent-linear mode, include tl increments
if 'incr' in data and mode == 'tl':
incr = xr.Dataset.from_dataframe(
data['incr'].to_dataframe(name='incr')
.unstack(level=['lat', 'lon'])
.reindex(day_dates, method='ffill')
.stack(level=['lat', 'lon']))['incr'] \
.rename({'level_0': 'time'})[mask, :, :]
flx_tl = incr
else:
flx_tl = 0. * flx
# Put in dataset for writing by 'write_fluxes'
ds = xr.Dataset({'fwd': flx_fwd, 'tl': flx_tl})
# Write to FORTRAN binary
fic = "{}/mod_{}.bin".format(runsubdir, spec)
self.fluxes.write_fluxes(fic, ds)
# Deals with 3D chemical production and loss
elif input_type in ['prodloss3d', 'prescrconcs']:
# Deals only with species whose prodloss is activated
if not hasattr(getattr(self, 'chemistry', None), input_type):
return
# Binary and nc file name depending on input_type
bin_name = 'prodscale' if input_type == 'prodloss3d' else 'scale'
nc_name = 'prodloss' if input_type == 'prodloss3d' else 'prescr'
for spec in getattr(self.chemistry, input_type).attributes:
if spec in datastore:
data = datastore[spec]
# Re-samples at daily scale
day_dates = pd.date_range(ddi, ddf, freq='D').union([ddf])
prod = data['scale'].to_dataframe(name='scale') \
.unstack(level=['lat', 'lon']) \
.reindex(day_dates, method='ffill') \
.stack(level=['lat', 'lon']).to_xarray()['scale'] \
.rename({'level_0': 'time'})
# Keeping only data in the simulation window
mask = (prod.time >= np.datetime64(ddi, 'ns')) \
& (prod.time <= np.datetime64(ddf, 'ns'))
mask = np.where(mask)[0]
prod = prod[mask]
# If tangent-linear mode, include tl increments
if 'incr' in data and mode == 'tl':
incr = data['incr'].to_dataframe(name='incr') \
.unstack(level=['lat', 'lon']) \
.reindex(day_dates, method='ffill') \
.stack(level=['lat', 'lon']) \
.to_xarray()['incr'] \
.rename({'level_0': 'time'})[mask, :, :]
else:
incr = 0. * prod
# Write to FORTRAN binary
fic = "{}/mod_{}_{}.bin".format(runsubdir, bin_name, spec)
with FortranFile(fic, 'w') as f:
# Looping over all values and writing to binary
prod = prod.values
incr = incr.values
for d0, d1 in zip(np.ravel(prod), np.ravel(incr)):
f.write_record(np.array([d0, d1], dtype=float))
# Links reference netCDF files that are needed anyway by LMDZ
try:
dirorig = datastore[spec]['dirorig']
ficorig = datastore[spec]['ficorig']
if ficorig is None or dirorig is None:
raise KeyError
except KeyError:
tracer = getattr(getattr(self.chemistry, input_type), spec)
dirorig = tracer.dir
ficorig = tracer.fic
origin = \
ddi.strftime("{}/{}".format(dirorig, ficorig))
target = "{}/{}_{}.nc".format(runsubdir, nc_name, spec)
path.link(origin, target)
# Deals with initial conditions
elif input_type == 'inicond':
# Scaling factors from control vector if any
if ddi == self.datei:
for spec in self.chemistry.acspecies.attributes:
if spec in datastore:
data = datastore[spec]
inicond = data['scale'].values
# If tangent-linear mode, include tl increments
if 'incr' in data and mode == 'tl':
incr = data['incr'].values
else:
incr = 0 * inicond
# Write to FORTRAN binary
fic = "{}/init_{}.bin".format(runsubdir, spec)
with FortranFile(fic, 'w') as f:
# Looping over all values and writing to binary
for d0, d1 in zip(np.ravel(inicond), np.ravel(incr)):
f.write_record(np.array([d0, d1], dtype=float))
# Reference initial condition file
if hasattr(self, 'chain'):
source = self.chain.strftime(
"{}/../chain/restart_%Y%m%d%H%M.nc".format(runsubdir))
target = "{}/start.nc".format(runsubdir)
path.link(source, target)
if mode == 'tl':
source = self.chain.strftime(
"{}/../chain/restart_tl_%Y%m%d%H%M.bin".format(runsubdir))
target = "{}/restart_tl.bin".format(runsubdir)
path.link(source, target)
else:
source = datei.strftime(
"{}/{}".format(self.restart.dir,
self.restart.fic))
target = "{}/start.nc".format(runsubdir)
if mode in ['fwd', 'tl']:
path.link(source, target)
else:
shutil.copy(source, target)
for spec in self.chemistry.acspecies.attributes:
restartID = \
getattr(self.chemistry.acspecies, spec).restart_id
with Dataset(target, 'a') as f:
var = 'q{:02d}'.format(restartID)
if var in f.variables:
f.variables[var][:] = 0.
# Deals with observations
elif input_type == 'obs':
# If empty datastore, do nothing
if datastore.size == 0:
return
self.dataobs = datastore
col2extract = 'obs_incr' if mode == 'adj' else 'obs'
# Include only part of the datastore
data = datastore[['dtstep', 'tstep', 'i', 'j',
col2extract, 'level']].values
# Assumes that stations with no level are in first level
# TODO: make it general
data[np.isnan(data[:, -1]), -1] = 1
data[data[:, -1] <= 0, -1] = 1
data[:, -1] /= 100
# Attribute ID to each species
for k, s in enumerate(map(string.lower,
self.chemistry.acspecies.attributes)):
data[(datastore['parameter'].str.lower() == s).values, -1] += k + 1
# Write in a binary
fic = "{}/obs.bin".format(runsubdir)
with FortranFile(fic, 'w') as f:
# Write header
nbobs = data[:, 0].sum()
f.write_record(np.array(nbobs, dtype=np.int32))
# Writes data to the binary file
# Beware that tstep numbering is incremented to fit FORTRAN indexing
for d in data:
for dt in np.arange(d[0]):
dd = np.append(d[1] + dt + 1, d[2:])
f.write_record(dd.astype(float))
else:
self.make_input(input_type, datei, datef, mode, runsubdir, workdir)
return
import numpy as np
import datetime
import os
import calendar
import glob
import pandas as pd
from .utils import read
from pycif.utils.netcdf import save_nc
from pycif.utils.check import verbose
from pycif.utils.datastores.empty import init_empty
import pickle
def outputs2native(self, runsubdir, mode='fwd', dump=True):
"""Reads outputs to pycif objects.
Does nothing for now as we instead read FLEXPART output
inside loop over observations in obsoper.py
"""
if not hasattr(self, 'dataobs'):
self.dataobs = init_empty()
if not hasattr(self, 'datasensit'):
self.datasensit = {}
return self.datasensit
def run(self):
"""Empty run method for model FLEXPART
"""
print("Empty run method for model FLEXPART")
""" Routines for reading FLEXPART output
"""
from __future__ import absolute_import
import os
import numpy as np
import glob
from . import flexpart_header
from . import mod_flexpart
from pycif.utils.dates import j2d
def read_flexpart_dir(subdir, nested=True, **kwargs):
""" Reads all FLEXPART grid_time files from a directory
Returns: