Commit 64aa4b83 authored by Antoine Berchet's avatar Antoine Berchet
Browse files

Computing uncertainties in EnSRF, congrad and variational

parent fd686cf6
......@@ -126,7 +126,33 @@ article:
- apt-get update
- pip freeze
script:
- tox -e py38 -e coverage -- -m "(dummy and article and inversion and not adjtltest) or (fwd and ref_config)"
- tox -e py38 -e coverage -- -m "(dummy and article and inversion and not adjtltest and not uncertainties) or (fwd and ref_config)"
after_script:
- mkdir -p coverage
- xmlstarlet sel -t -v "//coverage/@line-rate" reports/coverage.xml > coverage/.current_coverage
- echo 'TOTAL COVERAGE:'" $(cat coverage/.current_coverage)%"
coverage: '/^TOTAL COVERAGE: ([0-9\.]+\%)$/'
artifacts:
when: always
paths:
- reports/pytest*.html
- reports/coverage/
- coverage/.current_coverage
- examples_artifact
- figures_artifact
only:
- pytests
article_uncertainties:
stage: test
image:
name: pycif/pycif-ubuntu:0.1
entrypoint: [""]
before_script:
- apt-get update
- pip freeze
script:
- tox -e py38 -e coverage -- -m "(dummy and article and inversion and not adjtltest and uncertainties) or (fwd and ref_config)"
after_script:
- mkdir -p coverage
- xmlstarlet sel -t -v "//coverage/@line-rate" reports/coverage.xml > coverage/.current_coverage
......
......@@ -6,5 +6,5 @@ dir_out=/home/satellites13/aberchet/cif_paper/simulations_`date +'%Y%m%d'`
dir_out=/homel/aberchet/test_article/simulations_`date +'%Y%m%d'`
mkdir -p $dir_out
cd ..
pytest -s -m "(dummy and article and inversion and not adjtltest) or (fwd and ref_config)" \
pytest -s -m "(dummy and article and inversion and not adjtltest and uncertainties) or (fwd and ref_config)" \
--html reports/pytest.html --self-contained-html --basetemp=$dir_out/ ./
......@@ -43,6 +43,9 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs):
if hasattr(self, "xb"):
tosave["xb"] = self.xb
if hasattr(self, "std"):
tosave["std"] = self.std
if hasattr(self, "pa"):
tosave["pa"] = self.pa
......@@ -89,8 +92,22 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs):
(tracer.ndates, tracer.nlev, -1),
)
std = scale2map(std, tracer, tracer.dates, tracer.domain)
ds = xr.Dataset({"x": x, "xb": xb, "std": std})
# Adding the diagonal of posterior uncertainties if available
if hasattr(self, "pa"):
pa_std = np.reshape(np.sqrt(np.diag(self.pa)[
tracer.xpointer: tracer.xpointer + tracer.dim
]),
(tracer.ndates, tracer.nlev, -1),
)
pa_std = scale2map(pa_std, tracer, tracer.dates, tracer.domain)
ds = xr.Dataset({"x": x, "xb": xb,
"b_std": std, "pa_std": pa_std})
else:
ds = xr.Dataset({"x": x, "xb": xb,
"b_std": std})
# If tracer is scalar, also include the "physical" projection
if getattr(tracer, "type", "scalar") == "scalar":
......@@ -121,11 +138,28 @@ def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs):
xb, levels={"time": inputs.time,
"lev": inputs.lev},
)
ds_phys = xr.Dataset({"x_phys": x_phys, "xb_phys": xb_phys})
ds_phys = ds_phys.rename({"time": "time_phys"})
b_phys = \
inputs * reindex(
std, levels={"time": inputs.time,
"lev": inputs.lev},
)
if hasattr(self, "pa"):
pa_phys = \
inputs * reindex(
pa_std, levels={"time": inputs.time,
"lev": inputs.lev},
)
ds_phys = xr.Dataset({
"x_phys": x_phys, "xb_phys": xb_phys,
"b_phys": b_phys, "pa_phys": pa_phys})
else:
ds_phys = xr.Dataset({
"x_phys": x_phys, "xb_phys": xb_phys, "b_phys": b_phys})
ds_phys = ds_phys.rename({"time": "time_phys"})
ds = ds.merge(ds_phys)
# Adding longitudes and latitudes
if not getattr(tracer, "is_lbc", False):
ds = ds.assign(
......@@ -161,4 +195,10 @@ def load(self, cntrl_file, **kwargs):
if hasattr(out, "xb"):
self.xb = out.xb
if hasattr(out, "std"):
self.std = out.std
if hasattr(out, "pa"):
self.pa = out.pa
return out
......@@ -22,6 +22,16 @@ requirements = {
},
}
input_arguments = {
"save_uncertainties": {
"doc": "Save the estimated eigenvectors of the inverse of the hessian. "
"They allow the reconstruction of the uncertainty reduction "
"matrix",
"default": False,
"accepted": bool
}
}
def ini_data(plugin, **kwargs):
"""Initializes congrad.
......
......@@ -421,7 +421,7 @@ def congrad(self, px, pgrad, planc1, **kwargs):
pevecs[jk, :] = zvcglev[jk, :] * np.sqrt(
1.0 - 1.0 / zrcglev[jk]
)
# Transform control variable and gradient back to space with scalar
# product SCAAS
pgrad *= 2.0
......
......@@ -33,6 +33,10 @@ def minimize(self, finit, gradinit, chi0, **kwargs):
xopt, gradopt, preduc, pevecs, iiter = self.congrad(
chi0, gradinit, lanczvect0, **kwargs
)
# Save pevecs as uncertainty reductions
if self.save_uncertainties:
self.pevecs = pevecs
# Final verbose and output
towrite = """
......
......@@ -92,5 +92,9 @@ def execute(self, **kwargs):
dir_dump = "{}/obsvect_posterior/".format(workdir)
obsvect.dump(dir_dump)
print(__file__)
import code
code.interact(local=dict(locals(), **globals()))
# Return optimized control vector and observation vector
return controlvect, obsvect
......@@ -7,7 +7,7 @@ import numpy as np
import pickle
from logging import info
from pycif.utils.datastores.dump import read_datastore
from .basefunctions import compute_sample
from .sampling import compute_sample
from .build_H import build_Hx
......@@ -42,12 +42,11 @@ def execute(self, **kwargs):
)
info(towrite)
# Load Hx
# Load Hx or build it from members if not already computed
ensemble_dir = "{}/ensemble/".format(workdir)
sample_file = os.path.join(ensemble_dir, "x_sample.pickle")
hx_file = os.path.join(ensemble_dir, "Hx.pickle")
nsample = self.nsample
bfull = controlvect.build_b(controlvect)
try:
with open(sample_file, "rb") as f:
x_sample = pickle.load(f)
......@@ -57,17 +56,21 @@ def execute(self, **kwargs):
except IOError:
# Build ensemble
x_sample = np.random.multivariate_normal(
controlvect.xb, bfull,
size=nsample)
x_sample = np.random.normal(size=(nsample, controlvect.dim))
for k, x in enumerate(x_sample):
x_sample[k] = controlvect.sqrtbprod(x)
# Compute simulation for reference control vector as well
x_sample = np.append(x_sample, np.ones((1, controlvect.dim)), axis=0)
# Compute Hx for each member of the ensemble
compute_sample(self, controlvect, x_sample)
# Save corresponding samples
with open(sample_file, "wb") as f:
pickle.dump(x_sample, f, pickle.HIGHEST_PROTOCOL)
# Build Hx matrix
# Build Hx matrix from output simulations of members
hx_array = build_Hx(obsvect, ensemble_dir, x_sample)
# Solve operations
......@@ -87,9 +90,7 @@ def execute(self, **kwargs):
xa = controlvect.xb + kmatrix.dot(dy)
# Compute uncertainties
# xa_sample = x_sample.T \
# + kmatrix.dot(2 * hxb[:, np.newaxis] - hx_array)
# pa = xa_sample.dot(xa_sample.T) / (nsample - 1)
bfull = controlvect.build_b(controlvect)
pa = bfull - kmatrix.dot(bh.T)
# Dump control vector
......
......@@ -21,7 +21,7 @@ def compute_sample(self, controlvect, list_sample):
for ijob, x in enumerate(list_sample):
controlvect.xb[:] = x
# Dumps Dirac controlvect
# Dumps controlvect value from sample
base_dir = "{}/ensemble/sample_{:04d}/".format(workdir, ijob)
path.init_dir(base_dir)
......
......@@ -33,6 +33,10 @@ requirements = {
"name": "gausscost",
"version": "std",
},
"platform": {
"any": True,
"empty": True
},
}
input_arguments = {
......@@ -40,6 +44,12 @@ input_arguments = {
"doc": "Save final posterior vector as NetCDF",
"default": False,
"accepted": bool
},
"montecarlo": {
"doc": "Number of perturbed inversions "
"to compute posterior inversions.",
"default": 0,
"accepted": int
}
}
......
import numpy as np
from logging import info
from .sampling import compute_sample
def execute(self, **kwargs):
"""Performs a variational inversion given a minimizer method and a
......@@ -39,42 +39,92 @@ def execute(self, **kwargs):
if len(obsvect.yobs) == 0:
raise Exception("WARNING: trying to run a test an inversion with no "
"observation points! Please check your monitor file")
# Some verbose
towrite = """
Running a variational inversion with the following modules:
Minimizer: {}
Simulator: {}
""".format(
minimizer.plugin.name, simulator.plugin.name
)
info(towrite)
# Initial run of the simulator as a starting point for M1QN3
costinit, gradinit = simulator.simul(controlvect.chi, run_id=-1, **kwargs)
zgnorm = np.sqrt(np.sum(gradinit ** 2))
info("Nb of observations: " + str(len(obsoper.obsvect.yobs)))
info("Initial cost: " + str(costinit))
info("Initial gradient norm: " + str(zgnorm))
# Runs the minimizer
chiopt = minimizer.minimize(costinit, gradinit, controlvect.chi, **kwargs)
# Last call to the simulator for final diagnostics
costend, gradend = simulator.simul(chiopt, run_id=-2, **kwargs)
zgnorm = np.sqrt(np.dot(gradend, gradend)) / zgnorm
info("Final cost: " + str(costend))
info("Ratio final/initial gradient norm: " + str(zgnorm))
# If no Monte Carlo estimation of posterior uncertainties,
# Do simple inversion
if self.montecarlo == 0:
# Some verbose
towrite = """
Running a variational inversion with the following modules:
Minimizer: {}
Simulator: {}
""".format(
minimizer.plugin.name, simulator.plugin.name
)
info(towrite)
# Initial run of the simulator as a starting point for the minimizer
costinit, gradinit = simulator.simul(controlvect.chi, run_id=-1, **kwargs)
zgnorm = np.sqrt(np.sum(gradinit ** 2))
info("Nb of observations: " + str(len(obsoper.obsvect.yobs)))
info("Initial cost: " + str(costinit))
info("Initial gradient norm: " + str(zgnorm))
# Runs the minimizer
chiopt = minimizer.minimize(costinit, gradinit,
controlvect.chi, **kwargs)
# Save uncertainty reduction to the controlvect
if getattr(minimizer, "save_uncertainties", False):
# Convert the eigen vectors back to the control space
eigenvects = []
for pevec in minimizer.pevecs:
eigenvects.append(controlvect.sqrtbprod(pevec, **kwargs)
- controlvect.xb)
eigenvects = np.array(eigenvects)
# Computes Pa from eigen vectors
controlvect.pa = (controlvect.build_b(controlvect)
- (eigenvects.T.dot(eigenvects)))
# Last call to the simulator for final diagnostics
costend, gradend = simulator.simul(chiopt, run_id=-2, **kwargs)
zgnorm = np.sqrt(np.dot(gradend, gradend)) / zgnorm
info("Final cost: " + str(costend))
info("Ratio final/initial gradient norm: " + str(zgnorm))
# Save results
controlvect.x = controlvect.sqrtbprod(chiopt, **kwargs)
controlvect.dump(
"{}/controlvect_final.pickle".format(workdir),
to_netcdf=self.save_out_netcdf,
dir_netcdf="{}/controlvect/".format(workdir),
)
# Return values
return controlvect, obsvect
# If Monte Carlo, call individual inversions
ensemble_dir = "{}/ensemble/".format(workdir)
nsample = self.montecarlo
# Build ensemble
x_sample = np.random.normal(size=(nsample, controlvect.dim))
for k, x in enumerate(x_sample):
x_sample[k] = controlvect.sqrtbprod(x)
# Compute simulation for reference control vector as well
x_sample = np.append(x_sample, np.ones((1, controlvect.dim)), axis=0)
# Compute Hx for each member of the ensemble
compute_sample(self, controlvect, x_sample)
# Load posterior control vectors
h_dir = "{workdir}/ensemble/xa_emsemble/".format(workdir=workdir)
xa_sample = np.zeros((nsample, controlvect.dim))
for k in range(nsample):
xa_file = "{}/controlvect_final_{:04d}.pickle".format(h_dir, k)
controlvect.load(xa_file)
xa_sample[k] = controlvect.x
xa_mean = xa_sample.mean(axis=0)
dx = xa_sample - xa_mean
pa = dx.T.dot(dx) / (nsample - 1)
controlvect.x = xa_mean
controlvect.pa = pa
# Save results
controlvect.x = controlvect.sqrtbprod(chiopt, **kwargs)
controlvect.dump(
"{}/controlvect_final.pickle".format(workdir),
to_netcdf=self.save_out_netcdf,
dir_netcdf="{}/controlvect/".format(workdir),
)
# Return values
return controlvect, obsvect
import copy
import os
import glob
import re
import time
import pickle
from pycif.utils import path
from logging import info
from pycif.utils.yml import ordered_dump
def compute_sample(self, controlvect, list_sample):
workdir = controlvect.workdir
platform = self.platform
# Save Xb for later
controlvect.xb_ref = copy.deepcopy(controlvect.xb)
# Loop over state vector dimensions
list_jobs = []
for ijob, x in enumerate(list_sample):
controlvect.xb[:] = x
# Dumps controlvect value from sample
base_dir = "{}/ensemble/sample_{:04d}/".format(workdir, ijob)
path.init_dir(base_dir)
controlvect.dump(
"{}/controlvect.pickle".format(base_dir),
)
# Updating configuration dictionary
yml_dict = \
self.from_yaml(self.reference_instances["reference_setup"].def_file)
yml_dict.update(
{"workdir": base_dir,
"controlvect": {
"plugin": {"name": "standard", "version": "std"},
"reload_xb": True,
"reload_file": "{}/controlvect.pickle".format(base_dir)
}
}
)
yml_dict["mode"]["montecarlo"] = 0
# Dumps new yml file
yml_file = "{}/config_base_{:04d}.yml".format(base_dir, ijob)
with open(yml_file, "w") as outfile:
ordered_dump(outfile, yml_dict)
# Run the base function as an independent process
job_file = os.path.join(base_dir, "job_pycif_base_{:04d}".format(ijob))
info("Submitting Monte Carlo sample inversion {} from {}"
.format(ijob + 1, len(list_sample)))
job_id = platform.submit_job(
"{} -m pycif {}".format(platform.python, yml_file),
job_file
)
list_jobs.append(job_id)
# Check that jobs are over
while not platform.check_jobs(list_jobs):
time.sleep(platform.sleep_time)
# Move controlvects
h_dir = "{}/ensemble/xa_emsemble/".format(workdir)
path.init_dir(h_dir)
for ijob in range(len(list_sample)):
base_dir = "{}/ensemble/sample_{:04d}/".format(workdir, ijob)
os.system("mv {basedir}/controlvect_final.pickle "
"{}/controlvect_final_{:04d}.pickle".format(
h_dir, ijob, basedir=base_dir))
\ No newline at end of file
......@@ -17,7 +17,8 @@ def plot_inversion(pytestconfig):
yield
# Plot varying cost functions at the very end of pytest
marker = pytestconfig.getoption('-m')
if "article" in marker and "not article" not in marker:
if "article" in marker and "not article" not in marker \
and "not uncertainties" in marker:
ax = plt.subplots(3, 1, figsize=(21, 21))
id_name_correspondance = {
......@@ -60,7 +61,7 @@ def plot_inversion(pytestconfig):
label=label, linewidth=5, linestyle='--', zorder=10)
else:
label = id_name_correspondance.get(mode, mode)
label = id_name_correspondance.get(mode, minimizer)
ax[1][ind_ax].plot(
cost["nsim"], cost["Jo"] + cost["Jb"],
......
......@@ -18,10 +18,12 @@ from pycif.utils.path import init_dir
@pytest.mark.article
@pytest.mark.parametrize(
"settings", [
{"mode": "4dvar", "minimizer": "M1QN3"},
# {"mode": "4dvar", "minimizer": "M1QN3"},
# pytest.param({"mode": "4dvar", "minimizer": "M1QN3", "montecarlo": 10},
# marks=pytest.mark.uncertainties),
{"mode": "4dvar", "minimizer": "congrad"},
{"mode": "analytical"},
{"mode": "ensrf"}
# {"mode": "ensrf"},
# {"mode": "analytical"}
]
)
def test_integration_inversion(dummy_config_inversion, settings, pytestconfig):
......@@ -62,6 +64,12 @@ def test_integration_inversion(dummy_config_inversion, settings, pytestconfig):
},
"save_out_netcdf": True
}
if settings["minimizer"] == "congrad":
mode["minimizer"]["save_uncertainties"] = True
if "montecarlo" in settings:
mode["montecarlo"] = settings["montecarlo"]
prior_dir = os.path.join(tmpdir, "obsoperator/fwd_-001/obsvect")
posterior_dir = os.path.join(tmpdir, "obsoperator/fwd_-002/obsvect")
......@@ -311,3 +319,60 @@ def test_integration_inversion(dummy_config_inversion, settings, pytestconfig):
Path(figure_dir).mkdir(parents=True, exist_ok=True)
plt.savefig("{}/map_dx_{}_{}.pdf".format(figure_dir, title, resol))
plt.close()
# Plot uncertainty reduction
if "pa_std" in ds:
dstd = ds["b_std"].mean(axis=(0, 1)) \
- ds["pa_std"].mean(axis=(0, 1))
# Plot the figure
plt.figure(figsize=(21, 11))
ax0 = plt.axes([0.05, 0.05, 0.73, 0.87])
im = plt.imshow(dstd, extent=(xmin, xmax, ymin, ymax),
cmap="YlOrRd", vmin=0, vmax=0.5)
sc = plt.scatter(coords["lon"], coords["lat"], c=coords["alt"],
cmap="Blues", linewidths=1, edgecolors="k", s=600)
plt.yticks(fontsize=25)
plt.xticks(fontsize=25)
ax1 = plt.axes([0.74, 0.05, 0.05, 0.87])
cb1 = plt.colorbar(im, cax=ax1)
plt.yticks(fontsize=25)
plt.ylabel("Uncertainty reduction (a.u.)", fontsize=30)
ax2 = plt.axes([0.86, 0.05, 0.05, 0.87])
cb2 = plt.colorbar(sc, cax=ax2)
plt.yticks(fontsize=25)
plt.ylabel("Station altitude (m a.g.l)", fontsize=30)
ax0.set_title(title, fontsize=45)
current_dir = os.path.abspath(
os.path.dirname(os.path.realpath(__file__)))
figure_dir = \
os.path.abspath(os.path.join(current_dir,
"../../figures_artifact/"))
Path(figure_dir).mkdir(parents=True, exist_ok=True)
plt.savefig("{}/map_dstd_{}_{}.pdf"
.format(figure_dir, title, resol))
plt.close()
# Plot matrix of uncertainty reduction
if hasattr(controlvect, "pa"):
bfull = controlvect.build_b(controlvect)
pa = controlvect.pa
plt.imshow((bfull - pa)[:int(controlvect.dim / 2),
:int(controlvect.dim / 2)])
current_dir = os.path.abspath(
os.path.dirname(os.path.realpath(__file__)))
figure_dir = \
os.path.abspath(os.path.join(current_dir,
"../../figures_artifact/"))
Path(figure_dir).mkdir(parents=True, exist_ok=True)
plt.savefig("{}/uncertaintyreduc_matrix_{}_{}.pdf"
.format(figure_dir, title, resol))
plt.close()
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment