Commit 1358e804 authored by Friedemann Reum's avatar Friedemann Reum
Browse files

Modify satellite transformation to handle OCO-2 data

Two major changes:
1. New formula (6) in apply_AK.py
2. Handle profiles defined on pressure boundaries
parent 6830e9d5
Pipeline #155 failed with stages
in 0 seconds
...@@ -207,7 +207,7 @@ def adjoint( ...@@ -207,7 +207,7 @@ def adjoint(
info("Fetching satellite infos from files: {}".format(files_aks)) info("Fetching satellite infos from files: {}".format(files_aks))
try: try:
colsat = ["qa0", "ak", "pavg0", "date", "index"] colsat = ["qa0", "ak", "pavg0", "pwf", "date", "index"]
if transf.formula == 5: if transf.formula == 5:
colsat = colsat + ["dryair"] colsat = colsat + ["dryair"]
coord2dump = ["index"] coord2dump = ["index"]
...@@ -238,26 +238,54 @@ def adjoint( ...@@ -238,26 +238,54 @@ def adjoint(
# / groups['dp'].sum().values[idx] # / groups['dp'].sum().values[idx]
continue continue
# Defining ak info
# aks = sat_aks['ak'][nblloc, ::-1][:,1:].T
aks = sat_aks["ak"][:, ::-1].T
if transf.pressure == "Pa": if transf.pressure == "Pa":
pavgs = sat_aks["pavg0"][:, ::-1].T pavgs = sat_aks["pavg0"][:, ::-1].T
else: else:
pavgs = 100 * sat_aks["pavg0"][:, ::-1].T pavgs = 100 * sat_aks["pavg0"][:, ::-1].T
coords0 = {"index": np.arange(nobs), # Coordinates of the layer boundaries
"level": np.arange(aks.level.size + 1)} coords_pb = {"index": np.arange(nobs),
coords1 = {"index": np.arange(nobs), "level": np.arange(sat_aks.level_pressure.size)}
"level": np.arange(aks.level.size)} # Coordinates of the retrieval. Can be pressure layer centers
# ("level") or layer boundaries ("level_pressure", e.g. OCO-2).
# Read which it is from dimensions of averaging kernel.
ret_level_dim = sat_aks["ak"].dims[1]
coords_ret = {"index": np.arange(nobs),
"level": np.arange(sat_aks[ret_level_dim].size)}
dims = ("level", "index") dims = ("level", "index")
pavgs = xr.DataArray(pavgs, coords0, dims).bfill("level")
pavgs_mid = xr.DataArray( # Define pressure axis of the retrieval
np.log(0.5 * (pavgs[:-1].values + pavgs[1:].values)), pavgs_pb = xr.DataArray(pavgs, coords_pb, dims).bfill("level")
coords1, dims) # Case: retrieval on layer midpoints
dpavgs = xr.DataArray(np.diff(-pavgs, axis=0), coords1, dims) if ret_level_dim=="level":
qa0 = sat_aks["qa0"][:, ::-1].T pavgs_ret = xr.DataArray(
np.log(0.5 * (pavgs_pb[:-1].values + pavgs_pb[1:].values)),
coords_ret, dims)
# Case: retrieval on layer boundaries (e.g. OCO-2)
elif ret_level_dim=="level_pressure":
pavgs_ret = np.log(pavgs_pb)
else:
raise ValueError("Unknown dimension: '{}'".format(ret_level_dim))
# If present, read pressure weights from file
if "pwf" in sat_aks.keys():
dpavgs = xr.DataArray(sat_aks["pwf"][:, ::-1].T, coords_ret, dims)
# Else, construct pressure weights
else:
# Case: retrieval on layer midpoints
if ret_level_dim=="level":
dpavgs = xr.DataArray(np.diff(-pavgs, axis=0), coords_ret, dims)
# Case: retrieval on layer boundaries
elif ret_level_dim=="level_pressure":
raise NotImplementedError(
"Constructing pressure weights is not " + \
"for retrieval on layer boundaries. " + \
"Provide 'pwf' in observation file.")
# Defining ak info
# aks = sat_aks['ak'][nblloc, ::-1][:,1:].T
aks = xr.DataArray(sat_aks["ak"][:, ::-1].T, coords_ret, dims)
qa0 = xr.DataArray(sat_aks["qa0"][:, ::-1].T, coords_ret, dims)
# Exclude observations where there are all zeros in the simulation # Exclude observations where there are all zeros in the simulation
exclude_zeros = np.where( exclude_zeros = np.where(
...@@ -265,7 +293,7 @@ def adjoint( ...@@ -265,7 +293,7 @@ def adjoint(
datasim = datasim.isel(index=exclude_zeros) datasim = datasim.isel(index=exclude_zeros)
sat_aks = sat_aks.isel(index=exclude_zeros) sat_aks = sat_aks.isel(index=exclude_zeros)
aks = aks.isel(index=exclude_zeros) aks = aks.isel(index=exclude_zeros)
pavgs_mid = pavgs_mid.isel(index=exclude_zeros) pavgs_ret = pavgs_ret.isel(index=exclude_zeros)
dpavgs = dpavgs.isel(index=exclude_zeros) dpavgs = dpavgs.isel(index=exclude_zeros)
qa0 = qa0.isel(index=exclude_zeros) qa0 = qa0.isel(index=exclude_zeros)
...@@ -343,7 +371,7 @@ def adjoint( ...@@ -343,7 +371,7 @@ def adjoint(
# Vertical interpolation # Vertical interpolation
xlow, xhigh, alphalow, alphahigh = vertical_interp( xlow, xhigh, alphalow, alphahigh = vertical_interp(
sim_pressure, sim_pressure,
pavgs_mid[:, k1:k2].values, pavgs_ret[:, k1:k2].values,
cropstrato, cropstrato,
) )
...@@ -351,10 +379,10 @@ def adjoint( ...@@ -351,10 +379,10 @@ def adjoint(
# WARNING: There might be repeated indexes in a given column # WARNING: There might be repeated indexes in a given column
# To deal with repeated index, np.add.at is recommended # To deal with repeated index, np.add.at is recommended
levmeshout = np.array( levmeshout = np.array(
1 * [list(range(pavgs_mid.shape[0]))] 1 * [list(range(pavgs_ret.shape[0]))]
).T ).T
meshout = np.array(pavgs_mid.shape[0] * [list(range(k2 - k1))]) meshout = np.array(pavgs_ret.shape[0] * [list(range(k2 - k1))])
meshin = np.array(pavgs_mid.shape[0] * [list(range(k1, k2))]) meshin = np.array(pavgs_ret.shape[0] * [list(range(k1, k2))])
tmp_obs_incr = 0.0 * sim_pressure tmp_obs_incr = 0.0 * sim_pressure
np.add.at( np.add.at(
......
...@@ -31,6 +31,11 @@ def apply_ak(sim_ak, dpavgs, aks, nbformula, qa0, chosenlevel, drycols): ...@@ -31,6 +31,11 @@ def apply_ak(sim_ak, dpavgs, aks, nbformula, qa0, chosenlevel, drycols):
+ (aks * (sim_ak * drycols - qa0)).sum(axis=0) + (aks * (sim_ak * drycols - qa0)).sum(axis=0)
) / drycols.sum(axis=0) ) / drycols.sum(axis=0)
# OCO-2
elif nbformula == 6:
y0 = (dpavgs*qa0).sum(axis=0) \
+ (dpavgs*aks*(sim_ak - qa0)).sum(axis=0)
else: else:
raise Exception("Don't know formula: {}".format(nbformula)) raise Exception("Don't know formula: {}".format(nbformula))
...@@ -56,6 +61,9 @@ def apply_ak_tl( ...@@ -56,6 +61,9 @@ def apply_ak_tl(
elif nbformula == 5: elif nbformula == 5:
y0 = (aks * sim_ak_tl * drycols).sum(axis=0) / drycols.sum(axis=0) y0 = (aks * sim_ak_tl * drycols).sum(axis=0) / drycols.sum(axis=0)
elif nbformula == 6:
y0 = (dpavgs*aks*sim_ak_tl).sum(axis=0)
else: else:
raise Exception("Don't know formula: {}".format(nbformula)) raise Exception("Don't know formula: {}".format(nbformula))
...@@ -89,6 +97,9 @@ def apply_ak_ad(sim_ak, dpavgs, aks, nbformula, qa0, chosenlevel, obs_incr, ...@@ -89,6 +97,9 @@ def apply_ak_ad(sim_ak, dpavgs, aks, nbformula, qa0, chosenlevel, obs_incr,
elif nbformula == 5: elif nbformula == 5:
obs_incr = aks * obs_incr * drycols / drycols.sum(axis=0) obs_incr = aks * obs_incr * drycols / drycols.sum(axis=0)
elif nbformula == 6:
obs_incr = dpavgs*aks*obs_incr
else: else:
raise Exception("Don't know formula: {}".format(nbformula)) raise Exception("Don't know formula: {}".format(nbformula))
......
...@@ -172,7 +172,7 @@ def forward( ...@@ -172,7 +172,7 @@ def forward(
info("Fetching satellite infos from files: {}".format(files_aks)) info("Fetching satellite infos from files: {}".format(files_aks))
try: try:
colsat = ["qa0", "ak", "pavg0", "date", "index", "station"] colsat = ["qa0", "ak", "pavg0", "pwf", "date", "index", "station"]
if transf.formula == 5: if transf.formula == 5:
colsat = colsat + ["dryair"] colsat = colsat + ["dryair"]
coord2dump = ["index"] coord2dump = ["index"]
...@@ -220,31 +220,59 @@ def forward( ...@@ -220,31 +220,59 @@ def forward(
# groups['qdp'].sum().values / groups['dp'].sum().values # groups['qdp'].sum().values / groups['dp'].sum().values
# continue # continue
aks = sat_aks["ak"][:, ::-1].T
if transf.pressure == "Pa": if transf.pressure == "Pa":
pavgs = sat_aks["pavg0"][:, ::-1].T pavgs = sat_aks["pavg0"][:, ::-1].T
else: else:
pavgs = 100 * sat_aks["pavg0"][:, ::-1].T pavgs = 100 * sat_aks["pavg0"][:, ::-1].T
coords0 = {"index": np.arange(nobs), # Coordinates of the layer boundaries
"level": np.arange(aks.level.size + 1)} coords_pb = {"index": np.arange(nobs),
coords1 = {"index": np.arange(nobs), "level": np.arange(sat_aks.level_pressure.size)}
"level": np.arange(aks.level.size)} # Coordinates of the retrieval. Can be pressure layer centers
# ("level") or layer boundaries ("level_pressure", e.g. OCO-2).
# Read which it is from dimensions of averaging kernel.
ret_level_dim = sat_aks["ak"].dims[1]
coords_ret = {"index": np.arange(nobs),
"level": np.arange(sat_aks[ret_level_dim].size)}
dims = ("level", "index") dims = ("level", "index")
pavgs = xr.DataArray(pavgs, coords0, dims).bfill("level")
pavgs_mid = xr.DataArray( # Define pressure axis of the retrieval
np.log(0.5 * (pavgs[:-1].values + pavgs[1:].values)), pavgs_pb = xr.DataArray(pavgs, coords_pb, dims).bfill("level")
coords1, dims) # Case: retrieval on layer midpoints
dpavgs = xr.DataArray(np.diff(-pavgs, axis=0), coords1, dims) if ret_level_dim=="level":
qa0 = sat_aks["qa0"][:, ::-1].T pavgs_ret = xr.DataArray(
np.log(0.5 * (pavgs_pb[:-1].values + pavgs_pb[1:].values)),
coords_ret, dims)
# Case: retrieval on layer boundaries (e.g. OCO-2)
elif ret_level_dim=="level_pressure":
pavgs_ret = np.log(pavgs_pb)
else:
raise ValueError("Unknown dimension: '{}'".format(ret_level_dim))
# If present, read pressure weights from file
if "pwf" in sat_aks.keys():
dpavgs = xr.DataArray(sat_aks["pwf"][:, ::-1].T, coords_ret, dims)
# Else, construct pressure weights
else:
# Case: retrieval on layer midpoints
if ret_level_dim=="level":
dpavgs = xr.DataArray(np.diff(-pavgs, axis=0), coords_ret, dims)
# Case: retrieval on layer boundaries
elif ret_level_dim=="level_pressure":
raise NotImplementedError(
"Constructing pressure weights is not " + \
"for retrieval on layer boundaries. " + \
"Provide 'pwf' in observation file.")
aks = xr.DataArray(sat_aks["ak"][:, ::-1].T, coords_ret, dims)
qa0 = xr.DataArray(sat_aks["qa0"][:, ::-1].T, coords_ret, dims)
# Exclude observations where there are all zeros in the simulation # Exclude observations where there are all zeros in the simulation
exclude_zeros = np.where(sim.sum(axis=0) != 0)[0] exclude_zeros = np.where(sim.sum(axis=0) != 0)[0]
datasim = datasim.isel(index=exclude_zeros) datasim = datasim.isel(index=exclude_zeros)
sat_aks = sat_aks.isel(index=exclude_zeros) sat_aks = sat_aks.isel(index=exclude_zeros)
aks = aks.isel(index=exclude_zeros) aks = aks.isel(index=exclude_zeros)
pavgs_mid = pavgs_mid.isel(index=exclude_zeros) pavgs_ret = pavgs_ret.isel(index=exclude_zeros)
dpavgs = dpavgs.isel(index=exclude_zeros) dpavgs = dpavgs.isel(index=exclude_zeros)
qa0 = qa0.isel(index=exclude_zeros) qa0 = qa0.isel(index=exclude_zeros)
...@@ -280,14 +308,14 @@ def forward( ...@@ -280,14 +308,14 @@ def forward(
# while too many chunks do not take advantage # while too many chunks do not take advantage
# of scipy automatic parallelisation # of scipy automatic parallelisation
# 50 chunks seems to be fairly efficient # 50 chunks seems to be fairly efficient
sim_ak = 0.0 * pavgs_mid sim_ak = 0.0 * pavgs_ret
sim_ak_tl = 0.0 * pavgs_mid sim_ak_tl = 0.0 * pavgs_ret
if transf.split_tropo_strato: if transf.split_tropo_strato:
sim_ak_tropo = 0.0 * pavgs_mid sim_ak_tropo = 0.0 * pavgs_ret
sim_ak_tl_tropo = 0.0 * pavgs_mid sim_ak_tl_tropo = 0.0 * pavgs_ret
sim_ak_strato = 0.0 * pavgs_mid sim_ak_strato = 0.0 * pavgs_ret
sim_ak_tl_strato = 0.0 * pavgs_mid sim_ak_tl_strato = 0.0 * pavgs_ret
nchunks = transf.nchunks nchunks = transf.nchunks
chunks = np.linspace(0, len(datasim.index), num=nchunks + 1, dtype=int) chunks = np.linspace(0, len(datasim.index), num=nchunks + 1, dtype=int)
...@@ -367,12 +395,12 @@ def forward( ...@@ -367,12 +395,12 @@ def forward(
# Vertical interpolation # Vertical interpolation
xlow, xhigh, alphalow, alphahigh = vertical_interp( xlow, xhigh, alphalow, alphahigh = vertical_interp(
sim_pressure, sim_pressure,
pavgs_mid[:, k1:k2].values, pavgs_ret[:, k1:k2].values,
cropstrato, cropstrato,
) )
# Applying coefficients # Applying coefficients
meshout = np.array(pavgs_mid.shape[0] * [list(range(k2 - k1))]) meshout = np.array(pavgs_ret.shape[0] * [list(range(k2 - k1))])
sim_ak[:, k1:k2] = ( sim_ak[:, k1:k2] = (
alphalow * sim[xlow, meshout] + alphahigh * sim[xhigh, meshout] alphalow * sim[xlow, meshout] + alphahigh * sim[xhigh, meshout]
) )
...@@ -411,6 +439,7 @@ def forward( ...@@ -411,6 +439,7 @@ def forward(
/ (dpavgs * sim_ak).sum(axis=0).values \ / (dpavgs * sim_ak).sum(axis=0).values \
* dpavgs.sum(axis=0).values / datasim['dp'].sum( * dpavgs.sum(axis=0).values / datasim['dp'].sum(
axis=0).values axis=0).values
raise Exception("debug here")
sim_ak *= scale_pthick sim_ak *= scale_pthick
if 'sim_tl' in datasim: if 'sim_tl' in datasim:
sim_ak_tl *= scale_pthick sim_ak_tl *= scale_pthick
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment