Commit 52728345 authored by Antoine Berchet's avatar Antoine Berchet
Browse files

Becker ocean fluxes in CIF

parent 67fec599
......@@ -2,7 +2,7 @@ import datetime
import glob
import os
import pandas as pd
import xarray as xr
import numpy as np
from .....utils import path
......@@ -37,7 +37,7 @@ def fetch(ref_dir, ref_file, date_interval, target_dir,
"""
# Reshape input interval to include full years
datei, datef = date_interval
datei = datetime.datetime(year=datei.year, month=1, day=1)
......@@ -49,31 +49,20 @@ def fetch(ref_dir, ref_file, date_interval, target_dir,
list_files = {}
for dd in list_period_dates:
file = dd.strftime("{}/{}".format(ref_dir, ref_file))
file_hours = pd.date_range(
dd, dd + pd.to_timedelta(tracer.file_freq), freq="1H")
list_dates[dd] = [[hh, hh + datetime.timedelta(hours=1)]
for hh in file_hours]
list_files[dd] = (len(file_hours) * [file])
if not os.path.isfile(file):
continue
# Load times
times = xr.open_dataset(file)["time"].values[:, np.newaxis]
freq = np.unique(np.diff(times.flatten()))
out_dates = np.concatenate([times, times + freq[0]],
axis=1)
list_dates[dd] = [list(d) for d in out_dates]
list_files[dd] = len(times) * [file]
# Fetching
if os.path.isfile(file):
target_file = "{}/{}".format(target_dir, os.path.basename(file))
path.link(file, target_file)
debug(
"Fetched files and dates as follows:\n"
"Dates: {\n" +
"\n".join(["\n".join([" {}:".format(ddi)]
+ [" {}".format(dd)
for dd in list_dates[ddi]])
for ddi in list_dates])
+ "\n}\n\n" +
"Files: {\n" +
"\n".join(["\n".join([" {}:".format(ddi)]
+ [" {}".format(dd)
for dd in list_files[ddi]])
for ddi in list_files])
+ "\n}"
)
target_file = "{}/{}".format(target_dir, os.path.basename(file))
path.link(file, target_file)
return list_files, list_dates
import numpy as np
from logging import debug
import os
import xarray as xr
import itertools
from .....utils.classes.setup import Setup
from .....utils.classes.domains import Domain
def get_domain(ref_dir, ref_file, input_dates, target_dir, tracer=None):
"""Read information to define the data horizontal and, if relevant, vertical domain.
There are several possible approaches:
- read a reference file that is necessary in :bash:`ref_dir`
- read a file among the available data files
- read a file specified in the yaml,
by using the corresponding variable name; for instance, tracer.my_file
From the chosen file, obtain the coordinates of the centers and/or the corners
of the grid cells. If corners or centers are not available, deduce them from
the available information.
Warning:
the grid must not be overlapping: e.g for a global grid,
the last grid cell must not be the same as the first
Warning:
Order the centers and corners latitudes and longitudes increasing order
"""Read information from the reference file
to define the data horizontal and, if relevant, vertical domain.
Args:
ref_dir (str): the path to the input files
......@@ -37,67 +23,47 @@ def get_domain(ref_dir, ref_file, input_dates, target_dir, tracer=None):
given by the user
Return:
Domain: a domain class object, with the definition of the center grid
domain (Domain): a domain class object, with the definition of the center grid
cells coordinates, as well as corners
"""
# Print some explanation
debug(__doc__)
# For the purpose of demonstration, the domain dimensions are specified by default
# in input_arguments.
# Individual arguments can be modified manually in the yaml
lon_min = tracer.lon_min
lon_max = tracer.lon_max
lat_min = tracer.lat_min
lat_max = tracer.lat_max
nlon = tracer.nlon
nlat = tracer.nlat
debug("if lon and lat are flat vectors in the definition, "
"convert into a grid with:\n"
"zlon, zlat = np.meshgrid(lon, lat)\n")
# Some explanations
debug(
'Here, read the vertical information, from the same file as the horizontal '
'information or from another.\n'
'Get the number of vertical levels.\n'
'Get or deduce the sigma_a/sigma_b coefficients from bottom to top if the '
'vertical '
'extension is in pressure.\n'
'It is possible to specify the vertical extension in m a.g.l. as well by '
'defining the variable heights'
)
nlev = tracer.nlev
punit = "Pa"
sigma_a = np.linspace(0, 1, nlev)
sigma_b = np.linspace(1, 0, nlev)
# Initializes domain
setup = Setup.from_dict(
{
"domain": {
"plugin": {
"name": "dummy",
"version": "std",
"type": "domain",
},
"xmin": lon_min, # minimum longitude for centers
"xmax": lon_max, # maximum longitude for centers
"ymin": lat_min, # minimum latitude for centers
"ymax": lat_max, # maximum latitude for centers
"nlon": nlon, # number of longitudinal cells
"nlat": nlat, # number of latitudinal cells
"nlev": nlev, # number of vertical levels
"sigma_a": sigma_a,
"sigma_b": sigma_b,
"pressure_unit": "Pa" # adapted to sigmas
}
}
)
Setup.load_setup(setup, level=1)
return setup.domain
ref_file = list(itertools.chain.from_iterable(tracer.input_files.values()))[0]
if not os.path.isfile(ref_file):
raise Exception(
"Could not initialize the domain as no reference file is available. "
"Expecting the following file: {}".format(ref_file))
# Grid cell centers
coords = xr.open_dataset(ref_file)[["latitude", "longitude"]]
lat = coords["latitude"].values
lon = coords["longitude"].values
nlat = len(lat)
nlon = len(lon)
zlon, zlat = np.meshgrid(lon, lat)
# Grid cell corners
dlat = np.unique(np.diff(lat))[0]
dlon = np.unique(np.diff(lon))[0]
latc = np.append(lat - dlat / 2, lat[-1] + dlat / 2)
lonc = np.append(lon - dlon / 2, lon[-1] + dlon / 2)
zlonc, zlatc = np.meshgrid(lonc, latc)
# Vertical definition (surface level)
pressure_unit = "Pa"
nlev = 1
sigma_a = np.array([0])
sigma_b = np.array([1])
# Put it to a domain Plugin
domain = Domain(nlon=nlon, nlat=nlat,
zlon=zlon, zlat=zlat,
zlonc=zlonc, zlatc=zlatc,
nlev=nlev, pressure_unit=pressure_unit,
sigma_b=sigma_b, sigma_a=sigma_a)
return domain
import numpy as np
import xarray as xr
from logging import debug
import xarray as xr
def read(
......@@ -18,14 +19,6 @@ def read(
"""Get fluxes from raw files and load them into a pyCIF
variables.
The list of date intervals and corresponding files is directly provided,
coming from what is returned by the :bash:`fetch` function.
One should loop on dates and files and extract the corresponding temporal slice of data
Warning:
Make sure to optimize the opening of files. There is high chances that
the same file has to be open and closed over and over again to loop on the dates.
If this is the case, make sure not to close it between each date.
Args:
name (str): name of the component
......@@ -33,19 +26,15 @@ def read(
if `varnames` is empty
dates (list): list of the date intervals to extract
files (list): list of the files matching dates
Return:
xr.DataArray: the actual data with dimension:
time, levels, latitudes, longitudes
"""
# Get domain dimensions for random generation
domain = tracer.domain
nlon = domain.nlon
nlat = domain.nlat
nlev = domain.nlev
var2extract = varnames if varnames != "" else name
# Loop over dates/files and import data
data = []
out_dates = []
......@@ -55,19 +44,19 @@ def read(
ff, dd
)
)
# Generate random values instead of reading
data.append(
np.random.normal(
tracer.average_value, tracer.average_value / 2,
(nlev, nlat, nlon)))
times = xr.open_dataset(ff)["time"].values
ind_time = np.where(times == np.datetime64(dd[0]))[0][0]
data.append(xr.open_dataset(ff)[var2extract][ind_time].values)
out_dates.append(dd[0])
# if only one level for emissions, create the axis:
# if only one level for emissions, create the axis
dataout = np.array(data)[:, np.newaxis]
dataout[np.isnan(dataout)] = 0
xmod = xr.DataArray(
np.array(data),
dataout,
coords={"time": out_dates},
dims=("time", "lev", "lat", "lon"),
)
return xmod
......@@ -5,6 +5,7 @@ import pandas as pd
import xarray as xr
import numpy as np
import glob
from netCDF4 import Dataset, num2date
from .....utils import path
from logging import info
......@@ -50,10 +51,19 @@ def fetch(ref_dir, ref_file, date_interval, target_dir,
indout = 0
file = list_files_avail[indout]
ds = xr.open_dataset(file)
if "time" in ds:
dates = ds["time"].values
# Check if "time" is in variables
# Do not do it with open_dataset which is slow with big files...
with Dataset(file, "r") as f:
available_time = "time" in f.variables
if available_time:
with Dataset(file, "r") as f:
times = f.variables["time"]
dates = num2date(times[:], times.units,
only_use_python_datetimes=True,
only_use_cftime_datetimes=False)
dt = np.unique(np.diff(dates))[0]
tmp_dates[dd] = [[d - dt, d] for d in dates]
tmp_files[dd] = len(tmp_dates[dd]) * [file]
......
......@@ -3,12 +3,13 @@ import glob
import os
import numpy as np
import xarray as xr
from netCDF4 import Dataset
from .....utils.classes.setup import Setup
def get_domain(ref_dir, ref_file, input_dates, target_dir, tracer=None):
# Looking for a reference file to read lon/lat in
list_file = glob.glob("{}/*nc".format(ref_dir))
......@@ -35,11 +36,10 @@ def get_domain(ref_dir, ref_file, input_dates, target_dir, tracer=None):
)
# Read lon/lat in
nc = xr.open_dataset(domain_file)
lon = nc["lon"]
lat = nc["lat"]
with Dataset(domain_file, "r") as f:
lon = f.variables["lon"][:]
lat = f.variables["lat"][:]
lon_min = lon.min() - (lon[1] - lon[0]) / 2
lon_max = lon.max() + (lon[-1] - lon[-2]) / 2
lat_min = lat.min() - (lat[1] - lat[0]) / 2
......@@ -74,5 +74,4 @@ def get_domain(ref_dir, ref_file, input_dates, target_dir, tracer=None):
setup.domain.nlev = 1
setup.domain.sigma_a = np.array([0])
setup.domain.sigma_b = np.array([1])
return setup.domain
......@@ -2,6 +2,7 @@
import datetime
import glob
import os
from netCDF4 import Dataset, num2date
import numpy as np
import xarray as xr
......@@ -25,14 +26,27 @@ def read(
trcr_flx = []
trcr_dates = []
for dd, file_flx in zip(dates, files):
nc = xr.open_dataset(file_flx)
if "time" in nc:
times = nc["time"].values
ind_data = np.where(times == np.datetime64(dd[1]))[0][0]
trcr_flx.append(nc[varnames][ind_data].values)
# Check if "time" is in variables
# Do not do it with open_dataset which is slow with big files...
with Dataset(file_flx, "r") as f:
available_time = "time" in f.variables
if available_time:
with Dataset(file_flx, "r") as f:
times = f.variables["time"]
times = num2date(times[:], times.units,
only_use_python_datetimes=True,
only_use_cftime_datetimes=False)
ind_data = np.where(times == dd[1])[0][0]
# Now read the data at the correct index
with Dataset(file_flx, "r") as f:
trcr_flx.append(f.variables[varnames][ind_data].data)
else:
nc = xr.open_dataset(file_flx)
trcr_flx.append(nc[varnames].values)
trcr_dates.append(dd[0])
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment