Commit 137c22a1 authored by Antoine Berchet's avatar Antoine Berchet
Browse files

Fixing isotopes

parent 865c7b2b
......@@ -52,7 +52,17 @@ input_arguments = {
"accepted": bool
},
"save_debug": {
"doc": "Force transforms to save debugging informations",
"doc": """
Force transforms to save debugging informations.
.. warning ::
This option saves every intermediate states of the transformation
pipeline. It slows drastically the computation of the obsvervation
operator and can take a lot of disk space. Should be used only for
debugging or understanding what happens along the way.
""",
"default": False,
"accepted": bool
}
......
......@@ -7,6 +7,8 @@ import os
import shutil
from .....utils import path
from .....utils.datastores.dump import dump_datastore
from .utils.dump_debug import dump_debug
from .utils.check_adjtltest import check_adjtltest
def do_transforms(
......@@ -214,22 +216,7 @@ def do_transforms(
# Save outputs if save_debug activated
if save_debug and not transform_onlyinit:
for trid in tmp_datastore["outputs"]:
is_sparse = transf_mapper["outputs"][trid].get("sparse_data", False)
if is_sparse:
debug_dir = "{}/../transform_debug/{}/{}/{}/{}".format(
runsubdir, transform, ddi.strftime("%Y-%m-%d_%H:%M"),
trid[0], trid[1])
_, created = path.init_dir(debug_dir)
for d in tmp_datastore["outputs"][trid]:
debug_file = d.strftime(
"{}/monitor_debug_%Y%m%d%H%M".format(debug_dir))
pathlib.Path(debug_file).unlink(missing_ok=True)
dump_datastore(tmp_datastore["outputs"][trid][d], debug_file,
col2dump=tmp_datastore["outputs"][trid][d].columns,
dump_default=False
)
dump_debug(transform, transf_mapper, tmp_datastore, runsubdir, ddi)
# Redistribute the datastore accounting for successor/precursors
# and inputs/outputs sub-simulations
......@@ -304,73 +291,4 @@ def do_transforms(
if not (mode == "adj" and check_transforms):
return
# Check the adjoint for each transformations
adjtl_log = "{}/check_transforms.log".format(workdir)
f = open(adjtl_log, "w")
# First write header
towrite = \
"- {:>20},\t{:>30},\t{:>19}:\t{:>10}\t/\t{:>10}".format(
"transform_ID",
"transform_name",
"sub-simulation",
"<H(dx)|H(dx)>",
"<dx|H*(H(dx))>"
)
info(towrite)
f.write(towrite + "\n")
for ddi, transform, direction in period_order[:int(len(period_order) / 2)]:
if transform not in self.data_tl:
continue
data_tl = self.data_tl[transform]
data_adj = self.data_adj[transform]
dx_in = 0
dx_out = 0
if ddi not in data_tl:
continue
for trid in mapper[transform]["inputs"]:
if trid not in data_tl[ddi]["inputs"] \
or trid not in data_adj[ddi]["inputs"]:
continue
for di in mapper[transform]["subsimus"][ddi]["inputs"][trid]:
for precursor in mapper[transform]["precursors"][trid]:
tl = data_tl[ddi]["inputs"][trid][di][precursor]
adj = data_adj[ddi]["inputs"][trid][di][precursor]
if "incr" not in tl or "adj_out" not in adj:
continue
data_tl_in = tl["incr"]
data_adj_in = adj["adj_out"]
dx_in += np.float((data_tl_in * data_adj_in).sum())
for trid in mapper[transform]["outputs"]:
if trid not in data_tl[ddi]["outputs"] \
or trid not in data_adj[ddi]["outputs"]:
continue
for di in mapper[transform]["subsimus"][ddi]["outputs"][trid]:
for successor in mapper[transform]["successors"][trid]:
tl = data_tl[ddi]["outputs"][trid][di][successor]
adj = data_adj[ddi]["outputs"][trid][di][successor]
if "incr" not in tl or "adj_out" not in adj:
continue
data_tl_out = tl["incr"]
data_adj_out = adj["adj_out"]
dx_out += np.float((data_tl_out * data_adj_out).sum())
towrite = \
"- {:>20},\t{:>30},\t{}:\t{:>10}\t/\t{:>10}".format(
transform,
getattr(transform_pipe, transform).plugin.name,
ddi,
dx_in, dx_out
)
info(towrite)
f.write(towrite + "\n")
f.close()
check_adjtltest(self, period_order, mapper, transform_pipe)
from logging import info
import numpy as np
def check_adjtltest(self, period_order, mapper, transform_pipe):
workdir = self.workdir
# Check the adjoint for each transformations
adjtl_log = "{}/check_transforms.log".format(workdir)
f = open(adjtl_log, "w")
# First write header
towrite = \
"- {:>20},\t{:>30},\t{:>19}:\t{:>10}\t/\t{:>10}".format(
"transform_ID",
"transform_name",
"sub-simulation",
"<H(dx)|H(dx)>",
"<dx|H*(H(dx))>"
)
info(towrite)
f.write(towrite + "\n")
for ddi, transform, direction in period_order[:int(len(period_order) / 2)]:
if transform not in self.data_tl:
continue
data_tl = self.data_tl[transform]
data_adj = self.data_adj[transform]
dx_in = 0
dx_out = 0
if ddi not in data_tl:
continue
for trid in mapper[transform]["inputs"]:
if trid not in data_tl[ddi]["inputs"] \
or trid not in data_adj[ddi]["inputs"]:
continue
for di in mapper[transform]["subsimus"][ddi]["inputs"][trid]:
for precursor in mapper[transform]["precursors"][trid]:
tl = data_tl[ddi]["inputs"][trid][di][precursor]
adj = data_adj[ddi]["inputs"][trid][di][precursor]
is_sparse = mapper[transform]["inputs"][trid].get(
"sparse_data", False)
if is_sparse:
tl = tl["maindata"]
adj = adj["maindata"]
if "incr" not in tl or "adj_out" not in adj:
continue
data_tl_in = tl["incr"]
data_adj_in = adj["adj_out"]
dx_in += np.float((data_tl_in * data_adj_in).sum())
for trid in mapper[transform]["outputs"]:
if trid not in data_tl[ddi]["outputs"] \
or trid not in data_adj[ddi]["outputs"]:
continue
for di in mapper[transform]["subsimus"][ddi]["outputs"][trid]:
for successor in mapper[transform]["successors"][trid]:
tl = data_tl[ddi]["outputs"][trid][di][successor]
adj = data_adj[ddi]["outputs"][trid][di][successor]
is_sparse = mapper[transform]["outputs"][trid].get(
"sparse_data", False)
if is_sparse:
tl = tl["maindata"]
adj = adj["maindata"]
if "incr" not in tl or "adj_out" not in adj:
continue
data_tl_out = tl["incr"]
data_adj_out = adj["adj_out"]
dx_out += np.float((data_tl_out * data_adj_out).sum())
towrite = \
"- {:>20},\t{:>30},\t{}:\t{:>10}\t/\t{:>10}".format(
transform,
getattr(transform_pipe, transform).plugin.name,
ddi,
dx_in, dx_out
)
info(towrite)
f.write(towrite + "\n")
f.close()
from ......utils import path
from ......utils.datastores.dump import dump_datastore
import pathlib
import xarray as xr
def dump_debug(transform, transf_mapper, tmp_datastore, runsubdir, ddi):
for trid in tmp_datastore["outputs"]:
debug_dir = "{}/../transform_debug/{}/{}/{}/{}".format(
runsubdir, transform, ddi.strftime("%Y-%m-%d_%H:%M"),
trid[0], trid[1])
_, created = path.init_dir(debug_dir)
is_sparse = transf_mapper["outputs"][trid].get("sparse_data", False)
if is_sparse:
for d in tmp_datastore["outputs"][trid]:
debug_file = d.strftime(
"{}/monitor_debug_%Y%m%d%H%M.nc".format(debug_dir))
pathlib.Path(debug_file).unlink(missing_ok=True)
dump_datastore(tmp_datastore["outputs"][trid][d], debug_file,
col2dump=tmp_datastore["outputs"][trid][d].columns,
dump_default=False
)
else:
if "spec" not in tmp_datastore["outputs"][trid]:
continue
debug_file = "{}/dataarray_debug.nc".format(debug_dir)
pathlib.Path(debug_file).unlink(missing_ok=True)
# Turn data to dataset in case it is a dictionary of dataarrays
xr.Dataset(tmp_datastore["outputs"][trid]).to_netcdf(debug_file, mode="w")
......@@ -31,14 +31,20 @@ def adjoint(
# Concatenate output datastores
dfout = pd.concat(
[datastore_out[(comp, trcr)][di].assign(parameter_ref=trcr.lower())
[datastore_out[(comp, trcr)][di].assign(
parameter_ref=trcr.lower())
for comp, trcr in zip(components_out, outputs)])
dfout.columns = pd.MultiIndex.from_tuples(
[("metadata", "parameter_ref") if col[0] == "parameter_ref"
else col for col in dfout.columns]
)
# Initializes input dataframe from available total concentrations
# Implicitly includes the adjoint of the sum of isotopologues
for comp, trcr in zip(components_in, inputs):
datastore_in[(comp, trcr)][di] = \
copy.deepcopy(dfout.assign(parameter=trcr.lower()))
datastore_in[(comp, trcr)][di] = copy.deepcopy(dfout)
datastore_in[(comp, trcr)][di].loc[:, ("metadata", "parameter")] = trcr.lower()
# Stop here if do not need to compute the full adjoint
if onlyinit:
......@@ -50,13 +56,20 @@ def adjoint(
ddi.strftime(
"{}/chain/isotopes/{}_fwd_obsvect_{}_{}_%Y%m%d%H%M.nc".format(
transform.model.adj_refdir, transform.orig_name, comp, spec))
ds = read_datastore(file_fwd_data, reorder=False,
col2dump=["parameter", "parameter_ref",
"spec", "incr"])
datastore_in[(comp, spec)][di]\
.loc[:, ["parameter", "parameter_ref", "spec", "incr"]] = \
ds.values
ds = read_datastore(
file_fwd_data, reorder=False,
col2dump=[("metadata", "parameter"), ("metadata", "parameter_ref"),
("maindata", "spec"), ("maindata", "incr")])
datastore_in[(comp, spec)][di].loc[:, ("metadata", "parameter")] = \
ds[("metadata", "parameter")].values
datastore_in[(comp, spec)][di].loc[:, ("metadata", "parameter_ref")] = \
ds[("metadata", "parameter_ref")].values
datastore_in[(comp, spec)][di].loc[:, ("maindata", "spec")] = \
ds[("maindata", "spec")].values
datastore_in[(comp, spec)][di].loc[:, ("maindata", "incr")] = \
ds[("maindata", "incr")].values
# Compute
df_all_isos_fwd = {trid[1]: datastore_in[trid][di]
for trid in zip(components_in, inputs)}
......@@ -73,12 +86,12 @@ def adjoint(
isotopologues = sign_attr.isotopologues
# Retrieve specific forward data
ref_sims_fwd = sum([df_all_isos_fwd[ref]["spec"]
ref_sims_fwd = sum([df_all_isos_fwd[ref]["maindata"]["spec"]
for ref in refs])
isotopologue_sims_fwd = sum([df_all_isos_fwd[iso]["spec"]
isotopologue_sims_fwd = sum([df_all_isos_fwd[iso]["maindata"]["spec"]
for iso in isotopologues])
mask_sign = df0_fwd["parameter_ref"] == sign_id.lower()
mask_sign = df0_fwd["metadata"]["parameter_ref"] == sign_id.lower()
ref_fwd = ref_sims_fwd.loc[mask_sign]
isotopologue_fwd = isotopologue_sims_fwd.loc[mask_sign]
......@@ -89,16 +102,16 @@ def adjoint(
for ref in refs:
comp_ref = components_in[inputs.index(ref)]
df_ref = datastore_in[(comp_ref, ref)][di]
mask_sign_ref = df_ref["parameter_ref"] == sign_id.lower()
incr_ref = df_ref.loc[mask_sign_ref, "adj_out"]
df_ref.loc[mask_sign_ref, "adj_out"] = \
mask_sign_ref = df_ref["metadata"]["parameter_ref"] == sign_id.lower()
incr_ref = df_ref["maindata"].loc[mask_sign_ref, "adj_out"]
df_ref.loc[mask_sign_ref, ("maindata", "adj_out")] = \
- isotopologue_fwd / ref_fwd ** 2 * incr_ref * 1000 / r_std
for iso in isotopologues:
comp_iso = components_in[inputs.index(iso)]
df_iso = datastore_in[(comp_iso, iso)][di]
mask_sign_iso = df_iso["parameter_ref"] == sign_id.lower()
incr_iso = df_iso.loc[mask_sign_iso, "adj_out"]
df_iso.loc[mask_sign_iso, "adj_out"] = \
mask_sign_iso = df_iso["metadata"]["parameter_ref"] == sign_id.lower()
incr_iso = df_iso["maindata"].loc[mask_sign_iso, "adj_out"]
df_iso.loc[mask_sign_iso, ("maindata", "adj_out")] = \
1 / ref_fwd * incr_iso * 1000 / r_std
......@@ -47,7 +47,8 @@ def forward(
datastore_in[(comp, spec)][di],
file_monit=file_fwd_data,
dump_default=False,
col2dump=["parameter", "parameter_ref", "spec", "incr"],
col2dump=[("metadata", "parameter"), ("metadata", "parameter_ref"),
("maindata", "spec"), ("maindata", "incr")],
mode="w",
)
......@@ -67,15 +68,15 @@ def forward(
refs = sign_attr.refs
isotopologues = sign_attr.isotopologues
ref_sims = sum([df_all_isos[ref][["spec", "incr"]]
ref_sims = sum([df_all_isos[ref]["maindata"][["spec", "incr"]]
for ref in refs])
isotopologue_sims = sum([df_all_isos[iso][["spec", "incr"]]
isotopologue_sims = sum([df_all_isos[iso]["maindata"][["spec", "incr"]]
for iso in isotopologues])
mask_sign = dfspec["parameter_ref"] == sign_id.lower()
mask_sign = dfspec["metadata"]["parameter_ref"] == sign_id.lower()
ref_sim = ref_sims.loc[mask_sign, "spec"]
isotopologue_sim = isotopologue_sims.loc[mask_sign, "spec"]
ref_sim = ref_sims["maindata"].loc[mask_sign, "spec"]
isotopologue_sim = isotopologue_sims["maindata"].loc[mask_sign, "spec"]
sign_sim = isotopologue_sim / ref_sim
sign_sim /= r_std
......@@ -83,28 +84,29 @@ def forward(
comp_out = components_out[outputs.index(sign_id)]
dfout_iso = datastore_out[(comp_out, sign_id)][di]
dfout_iso.loc[:, "spec"] = sign_sim
dfout_iso.loc[:, ("maindata", "spec")] = sign_sim
# Applying tangent-linear
if mode == "tl":
ref_sim_tl = ref_sims.loc[mask_sign, "incr"]
isotopologue_sim_tl = isotopologue_sims.loc[mask_sign, "incr"]
ref_sim_tl = ref_sims["maindata"].loc[mask_sign, "incr"]
isotopologue_sim_tl = isotopologue_sims["maindata"].loc[mask_sign, "incr"]
sign_sim_tl = (isotopologue_sim_tl / ref_sim -
isotopologue_sim / ref_sim ** 2 * ref_sim_tl)
sign_sim_tl /= r_std / 1000
dfout_iso.loc[:, "incr"] = sign_sim_tl
dfout_iso.loc[:, ("maindata", "incr")] = sign_sim_tl
# Compute sum of isotopologues
spec_sims = sum([df_all_isos[s][["spec", "incr"]] for s in inputs])
spec_sims = sum([df_all_isos[s]["maindata"][["spec", "incr"]] for s in inputs])
spec_id = outputs[-1]
comp = components_out[-1]
mask_spec = dfspec["parameter_ref"] == spec_id.lower()
mask_spec = dfspec["metadata"]["parameter_ref"] == spec_id.lower()
dfout_ref = datastore_out[(comp, spec_id)][di]
dfout_ref.loc[:, "spec"] = spec_sims.loc[mask_spec, "spec"]
dfout_ref.loc[:, ("maindata", "spec")] = spec_sims["maindata"].loc[mask_spec, "spec"]
if mode == "tl":
dfout_ref.loc[:, "incr"] = spec_sims.loc[mask_spec, "incr"]
dfout_ref.loc[:, ("maindata", "incr")] = \
spec_sims["maindata"].loc[mask_spec, "incr"]
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment