Commit 1358e804 authored by Friedemann Reum's avatar Friedemann Reum
Browse files

Modify satellite transformation to handle OCO-2 data

Two major changes:
1. New formula (6) in apply_AK.py
2. Handle profiles defined on pressure boundaries
parent 6830e9d5
Pipeline #155 failed with stages
in 0 seconds
......@@ -207,7 +207,7 @@ def adjoint(
info("Fetching satellite infos from files: {}".format(files_aks))
try:
colsat = ["qa0", "ak", "pavg0", "date", "index"]
colsat = ["qa0", "ak", "pavg0", "pwf", "date", "index"]
if transf.formula == 5:
colsat = colsat + ["dryair"]
coord2dump = ["index"]
......@@ -238,26 +238,54 @@ def adjoint(
# / groups['dp'].sum().values[idx]
continue
# Defining ak info
# aks = sat_aks['ak'][nblloc, ::-1][:,1:].T
aks = sat_aks["ak"][:, ::-1].T
if transf.pressure == "Pa":
pavgs = sat_aks["pavg0"][:, ::-1].T
else:
pavgs = 100 * sat_aks["pavg0"][:, ::-1].T
coords0 = {"index": np.arange(nobs),
"level": np.arange(aks.level.size + 1)}
coords1 = {"index": np.arange(nobs),
"level": np.arange(aks.level.size)}
# Coordinates of the layer boundaries
coords_pb = {"index": np.arange(nobs),
"level": np.arange(sat_aks.level_pressure.size)}
# Coordinates of the retrieval. Can be pressure layer centers
# ("level") or layer boundaries ("level_pressure", e.g. OCO-2).
# Read which it is from dimensions of averaging kernel.
ret_level_dim = sat_aks["ak"].dims[1]
coords_ret = {"index": np.arange(nobs),
"level": np.arange(sat_aks[ret_level_dim].size)}
dims = ("level", "index")
pavgs = xr.DataArray(pavgs, coords0, dims).bfill("level")
pavgs_mid = xr.DataArray(
np.log(0.5 * (pavgs[:-1].values + pavgs[1:].values)),
coords1, dims)
dpavgs = xr.DataArray(np.diff(-pavgs, axis=0), coords1, dims)
qa0 = sat_aks["qa0"][:, ::-1].T
# Define pressure axis of the retrieval
pavgs_pb = xr.DataArray(pavgs, coords_pb, dims).bfill("level")
# Case: retrieval on layer midpoints
if ret_level_dim=="level":
pavgs_ret = xr.DataArray(
np.log(0.5 * (pavgs_pb[:-1].values + pavgs_pb[1:].values)),
coords_ret, dims)
# Case: retrieval on layer boundaries (e.g. OCO-2)
elif ret_level_dim=="level_pressure":
pavgs_ret = np.log(pavgs_pb)
else:
raise ValueError("Unknown dimension: '{}'".format(ret_level_dim))
# If present, read pressure weights from file
if "pwf" in sat_aks.keys():
dpavgs = xr.DataArray(sat_aks["pwf"][:, ::-1].T, coords_ret, dims)
# Else, construct pressure weights
else:
# Case: retrieval on layer midpoints
if ret_level_dim=="level":
dpavgs = xr.DataArray(np.diff(-pavgs, axis=0), coords_ret, dims)
# Case: retrieval on layer boundaries
elif ret_level_dim=="level_pressure":
raise NotImplementedError(
"Constructing pressure weights is not " + \
"for retrieval on layer boundaries. " + \
"Provide 'pwf' in observation file.")
# Defining ak info
# aks = sat_aks['ak'][nblloc, ::-1][:,1:].T
aks = xr.DataArray(sat_aks["ak"][:, ::-1].T, coords_ret, dims)
qa0 = xr.DataArray(sat_aks["qa0"][:, ::-1].T, coords_ret, dims)
# Exclude observations where there are all zeros in the simulation
exclude_zeros = np.where(
......@@ -265,7 +293,7 @@ def adjoint(
datasim = datasim.isel(index=exclude_zeros)
sat_aks = sat_aks.isel(index=exclude_zeros)
aks = aks.isel(index=exclude_zeros)
pavgs_mid = pavgs_mid.isel(index=exclude_zeros)
pavgs_ret = pavgs_ret.isel(index=exclude_zeros)
dpavgs = dpavgs.isel(index=exclude_zeros)
qa0 = qa0.isel(index=exclude_zeros)
......@@ -343,7 +371,7 @@ def adjoint(
# Vertical interpolation
xlow, xhigh, alphalow, alphahigh = vertical_interp(
sim_pressure,
pavgs_mid[:, k1:k2].values,
pavgs_ret[:, k1:k2].values,
cropstrato,
)
......@@ -351,10 +379,10 @@ def adjoint(
# WARNING: There might be repeated indexes in a given column
# To deal with repeated index, np.add.at is recommended
levmeshout = np.array(
1 * [list(range(pavgs_mid.shape[0]))]
1 * [list(range(pavgs_ret.shape[0]))]
).T
meshout = np.array(pavgs_mid.shape[0] * [list(range(k2 - k1))])
meshin = np.array(pavgs_mid.shape[0] * [list(range(k1, k2))])
meshout = np.array(pavgs_ret.shape[0] * [list(range(k2 - k1))])
meshin = np.array(pavgs_ret.shape[0] * [list(range(k1, k2))])
tmp_obs_incr = 0.0 * sim_pressure
np.add.at(
......
......@@ -31,6 +31,11 @@ def apply_ak(sim_ak, dpavgs, aks, nbformula, qa0, chosenlevel, drycols):
+ (aks * (sim_ak * drycols - qa0)).sum(axis=0)
) / drycols.sum(axis=0)
# OCO-2
elif nbformula == 6:
y0 = (dpavgs*qa0).sum(axis=0) \
+ (dpavgs*aks*(sim_ak - qa0)).sum(axis=0)
else:
raise Exception("Don't know formula: {}".format(nbformula))
......@@ -56,6 +61,9 @@ def apply_ak_tl(
elif nbformula == 5:
y0 = (aks * sim_ak_tl * drycols).sum(axis=0) / drycols.sum(axis=0)
elif nbformula == 6:
y0 = (dpavgs*aks*sim_ak_tl).sum(axis=0)
else:
raise Exception("Don't know formula: {}".format(nbformula))
......@@ -89,6 +97,9 @@ def apply_ak_ad(sim_ak, dpavgs, aks, nbformula, qa0, chosenlevel, obs_incr,
elif nbformula == 5:
obs_incr = aks * obs_incr * drycols / drycols.sum(axis=0)
elif nbformula == 6:
obs_incr = dpavgs*aks*obs_incr
else:
raise Exception("Don't know formula: {}".format(nbformula))
......
......@@ -172,7 +172,7 @@ def forward(
info("Fetching satellite infos from files: {}".format(files_aks))
try:
colsat = ["qa0", "ak", "pavg0", "date", "index", "station"]
colsat = ["qa0", "ak", "pavg0", "pwf", "date", "index", "station"]
if transf.formula == 5:
colsat = colsat + ["dryair"]
coord2dump = ["index"]
......@@ -220,31 +220,59 @@ def forward(
# groups['qdp'].sum().values / groups['dp'].sum().values
# continue
aks = sat_aks["ak"][:, ::-1].T
if transf.pressure == "Pa":
pavgs = sat_aks["pavg0"][:, ::-1].T
else:
pavgs = 100 * sat_aks["pavg0"][:, ::-1].T
coords0 = {"index": np.arange(nobs),
"level": np.arange(aks.level.size + 1)}
coords1 = {"index": np.arange(nobs),
"level": np.arange(aks.level.size)}
# Coordinates of the layer boundaries
coords_pb = {"index": np.arange(nobs),
"level": np.arange(sat_aks.level_pressure.size)}
# Coordinates of the retrieval. Can be pressure layer centers
# ("level") or layer boundaries ("level_pressure", e.g. OCO-2).
# Read which it is from dimensions of averaging kernel.
ret_level_dim = sat_aks["ak"].dims[1]
coords_ret = {"index": np.arange(nobs),
"level": np.arange(sat_aks[ret_level_dim].size)}
dims = ("level", "index")
pavgs = xr.DataArray(pavgs, coords0, dims).bfill("level")
pavgs_mid = xr.DataArray(
np.log(0.5 * (pavgs[:-1].values + pavgs[1:].values)),
coords1, dims)
dpavgs = xr.DataArray(np.diff(-pavgs, axis=0), coords1, dims)
qa0 = sat_aks["qa0"][:, ::-1].T
# Define pressure axis of the retrieval
pavgs_pb = xr.DataArray(pavgs, coords_pb, dims).bfill("level")
# Case: retrieval on layer midpoints
if ret_level_dim=="level":
pavgs_ret = xr.DataArray(
np.log(0.5 * (pavgs_pb[:-1].values + pavgs_pb[1:].values)),
coords_ret, dims)
# Case: retrieval on layer boundaries (e.g. OCO-2)
elif ret_level_dim=="level_pressure":
pavgs_ret = np.log(pavgs_pb)
else:
raise ValueError("Unknown dimension: '{}'".format(ret_level_dim))
# If present, read pressure weights from file
if "pwf" in sat_aks.keys():
dpavgs = xr.DataArray(sat_aks["pwf"][:, ::-1].T, coords_ret, dims)
# Else, construct pressure weights
else:
# Case: retrieval on layer midpoints
if ret_level_dim=="level":
dpavgs = xr.DataArray(np.diff(-pavgs, axis=0), coords_ret, dims)
# Case: retrieval on layer boundaries
elif ret_level_dim=="level_pressure":
raise NotImplementedError(
"Constructing pressure weights is not " + \
"for retrieval on layer boundaries. " + \
"Provide 'pwf' in observation file.")
aks = xr.DataArray(sat_aks["ak"][:, ::-1].T, coords_ret, dims)
qa0 = xr.DataArray(sat_aks["qa0"][:, ::-1].T, coords_ret, dims)
# Exclude observations where there are all zeros in the simulation
exclude_zeros = np.where(sim.sum(axis=0) != 0)[0]
datasim = datasim.isel(index=exclude_zeros)
sat_aks = sat_aks.isel(index=exclude_zeros)
aks = aks.isel(index=exclude_zeros)
pavgs_mid = pavgs_mid.isel(index=exclude_zeros)
pavgs_ret = pavgs_ret.isel(index=exclude_zeros)
dpavgs = dpavgs.isel(index=exclude_zeros)
qa0 = qa0.isel(index=exclude_zeros)
......@@ -280,14 +308,14 @@ def forward(
# while too many chunks do not take advantage
# of scipy automatic parallelisation
# 50 chunks seems to be fairly efficient
sim_ak = 0.0 * pavgs_mid
sim_ak_tl = 0.0 * pavgs_mid
sim_ak = 0.0 * pavgs_ret
sim_ak_tl = 0.0 * pavgs_ret
if transf.split_tropo_strato:
sim_ak_tropo = 0.0 * pavgs_mid
sim_ak_tl_tropo = 0.0 * pavgs_mid
sim_ak_strato = 0.0 * pavgs_mid
sim_ak_tl_strato = 0.0 * pavgs_mid
sim_ak_tropo = 0.0 * pavgs_ret
sim_ak_tl_tropo = 0.0 * pavgs_ret
sim_ak_strato = 0.0 * pavgs_ret
sim_ak_tl_strato = 0.0 * pavgs_ret
nchunks = transf.nchunks
chunks = np.linspace(0, len(datasim.index), num=nchunks + 1, dtype=int)
......@@ -367,12 +395,12 @@ def forward(
# Vertical interpolation
xlow, xhigh, alphalow, alphahigh = vertical_interp(
sim_pressure,
pavgs_mid[:, k1:k2].values,
pavgs_ret[:, k1:k2].values,
cropstrato,
)
# Applying coefficients
meshout = np.array(pavgs_mid.shape[0] * [list(range(k2 - k1))])
meshout = np.array(pavgs_ret.shape[0] * [list(range(k2 - k1))])
sim_ak[:, k1:k2] = (
alphalow * sim[xlow, meshout] + alphahigh * sim[xhigh, meshout]
)
......@@ -411,6 +439,7 @@ def forward(
/ (dpavgs * sim_ak).sum(axis=0).values \
* dpavgs.sum(axis=0).values / datasim['dp'].sum(
axis=0).values
raise Exception("debug here")
sim_ak *= scale_pthick
if 'sim_tl' in datasim:
sim_ak_tl *= scale_pthick
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment