Skip to content
Snippets Groups Projects
Commit 1fd9b278 authored by Elise Potier's avatar Elise Potier
Browse files

TNO read in datastreams

parent 7daf662f
No related branches found
No related tags found
1 merge request!44Grouping flux/meteo/field into datatream; cleaning plurals in class names;...
Read TNO yearly fluxes and apply time profil
Time profile is considered in UTC. Time zone does not taken into account.
All the profil files are mandatory even the vertical one.
If point_sources: True, vertical profil is also applied
WARNING : Currently PS are put in the TNO grid
from .fetch import fetch
from .get_domain import get_domain
from .read import read
from .write import write
_name = "TNO"
_version = "netcdf"
"point_sources": {
"doc": "Point Soucre type"
"If True, enable to have vertical projection"
"Default: False",
"default": False,
"accepted": bool
"dir_profils": {
"doc": ""
"Directory where the time and vertical profils are"
" files should be TNO_height-distribution_GNFR.csv, "
" timeprofiles-month-in-year_GNFR.csv, "
" timeprofiles-day-in-week_GNFR.csv, "
" timeprofiles-hour-in-day_GNFR.csv"
"accepted": str,
"default": False
import datetime
import glob
import os
import pandas as pd
import numpy as np
from pycif.utils import path
from .utils import find_valid_file
def fetch(ref_dir, ref_file, input_dates, target_dir, tracer=None, **kwargs):
# Inputs:
# ref_dir: directory where the original files are found
# ref_file: (template) name of the original files
# input_dates: list of the periods to simulate, each item is the list of the dates of the period
# target_dir: directory where the links to the orginal files are created
# Ouputs:
# list_files: for each date that begins a period, an array containing the names of the files that are available
# for the dates within this period
# list_dates: for each date that begins a period, an array containing the names of the dates mathcin the files
# listed in list_files
list_period_dates = pd.date_range(input_dates[0], input_dates[1], freq="1D")
list_dates = {}
list_files = {}
for dd in list_period_dates:
dir_dd = dd.strftime(ref_dir)
dir_dd_next = (dd + datetime.timedelta(hours=1)).strftime(ref_dir)
dir_dd_previous = (dd - datetime.timedelta(hours=1)).strftime(ref_dir)
files_3d, dates_3d = find_valid_file(dir_dd, ref_file, dd, dir_dd_next,ref_dir_previous=dir_dd_previous)
list_hours = pd.date_range(dd, dd + datetime.timedelta(hours=23), freq="1H")
if os.path.isfile(files_3d[0]):
#list_dates[dd] = [[dd, dd + datetime.timedelta(hours=1)]]
#list_files[dd] = [files_3d]* len(list_dates[dd])
list_dates[dd] = [[hh, hh + datetime.timedelta(hours=1)] for hh in list_hours]
list_files[dd] = [files_3d]
# the to fetch is a forecast
local_files = []
target_file = "{}/{}".format(target_dir, dd.strftime(ref_file))[0], target_file)
return list_files, list_dates
import numpy as np
import xarray as xr
import glob
import datetime
import os
from pycif.utils.classes.setup import Setup
from logging import info
def get_domain(ref_dir, ref_file, input_dates, target_dir, tracer=None):
# Inputs:
# ref_dir: directory where the original files are found
# ref_file: (template) name of the original files
# input_dates: list of the periods to simulate, each item is the list of the dates of the period
# target_dir: directory where the links to the orginal files are created
# Ouputs:
# setup of the domain in section "Initializes domain"
# Looking for a reference file to read lon/lat in
list_file = glob.glob("{}/*nc".format(ref_dir))
domain_file = None
# Either a file is specified in the Yaml
if ref_file in list_file:
domain_file = "{}/{}".format(ref_dir, ref_file)
# Or loop over available file regarding file pattern
for flx_file in list_file:
date = datetime.datetime.strptime(
os.path.basename(flx_file), ref_file
domain_file = flx_file
except ValueError:
if domain_file is None:
raise Exception(
"TNO domain could not be initialized as no file was found"
# Read lon/lat in
nc = xr.open_dataset(domain_file, decode_times=False)
llon = nc['longitude'].values
llat = nc['latitude'].values
llonb = nc['longitude_bounds'].values
llatb = nc['latitude_bounds'].values
# compute the corner matrix
resol_lon =1./10
resol_lat =1./20
llonc = np.append(llon-resol_lon*0.5,llon[-1]+resol_lon*0.5)
llatc = np.append(llat-resol_lat*0.5,llat[-1]+resol_lat*0.5)
lon, lat = np.meshgrid(llon,llat)
lonc, latc = np.meshgrid(llonc,llatc)
nlat, nlon = lat.shape[0],lat.shape[1]
#print('Get the min and max latitude and longitude of centers + the number of longitudes and latitudes')
lon_min = lon.min() #- (lon[1] - lon[0]) / 2
lon_max = lon.max() #+ (lon[-1] - lon[-2]) / 2
lat_min = lat.min() #- (lat[1] - lat[0]) / 2
lat_max = lat.max() #+ (lat[-1] - lat[-2]) / 2
info('lon min {}, lon max {}'.format(lon_min,lon_max))
info('lat min {}, lat max {}'.format(lat_min,lat_max))
if tracer.point_sources == False :
#If no vetical dimension for emissions, provide dummy vertical
punit = "Pa"
nlevs = 1
sigma_a = np.array([0])
sigma_b = np.array([1])
# Initializes domain
setup = Setup.from_dict(
"domain": {
"plugin": {
"name": "dummy",
"version": "std",
"type": "domain",
"xmin": lon_min, # minimum longitude for centers
"xmax": lon_max, # maximum longitude for centers
"ymin": lat_min, # minimum latitude for centers
"ymax": lat_max, # maximum latitude for centers
"nlon": nlon, # number of longitudinal cells
"nlat": nlat, # number of latitudinal cells
"nlev": nlevs, # number of vertical levels
"sigma_a": sigma_a,
"sigma_b": sigma_b,
"pressure_unit": "Pa" # adapted to sigmas
# if lon and lat are vectors, convert into a grid with
setup.domain.zlon = lon # longitudes of centers
setup.domain.zlat = lat # latitudes of centers
setup.domain.zlonc = lonc # longitudes of corners
setup.domain.zlatc = latc # latitudes of corners
else :
punit = "m"
VerticalP_file = tracer.dir_profils + '/TNO_height-distribution_GNFR.csv'
lheader = 17
fi = open(VerticalP_file,'r')
ln = fi.readlines()
height = ln[lheader][:-2].split(';')[2:]
height_down =np.array([float(x.split('-')[0].replace(" ", "")) for x in height])
height_top = np.array([float(x.split('-')[1].replace("m","").replace(" ", "")) for x in height])
# Initializes domain
setup = Setup.from_dict(
"domain": {
"plugin": {
"name": "dummy",
"version": "std",
"type": "domain",
"xmin": lon_min, # minimum longitude for centers
"xmax": lon_max, # maximum longitude for centers
"ymin": lat_min, # minimum latitude for centers
"ymax": lat_max, # maximum latitude for centers
"nlon": nlon, # number of longitudinal cells
"nlat": nlat, # number of latitudinal cells
"nlev": nlevs, # number of vertical levels
"height": height_top,
"height_unit": "m"
# if lon and lat are vectors, convert into a grid with
setup.domain.zlon = lon # longitudes of centers
setup.domain.zlat = lat # latitudes of centers
setup.domain.zlonc = lonc # longitudes of corners
setup.domain.zlatc = latc # latitudes of corners
setup.domain.height = height_top
return setup.domain
import datetime
import os
import numpy as np
import xarray as xr
import pandas as pd
import string
import calendar
import pytz
from pycif.utils.netcdf import readnc
from logging import info
def read(
"""Get fluxes from raw files and load them into a pyCIF
# Time profiles
Pdir = tracer.dir_profils
TPmonth_file = Pdir + 'timeprofiles-month-in-year_GNFR.csv'
TPday_file = Pdir + 'timeprofiles-day-in-week_GNFR.csv'
TPhour_file = Pdir + 'timeprofiles-hour-in-day_GNFR.csv'
VerticalP_file = Pdir + 'TNO_height-distribution_GNFR.csv'
coef_dict = {}
lheader = 12
for key, f in zip(['month', 'day', 'hour','height'], [TPmonth_file, TPday_file, TPhour_file,VerticalP_file]):
if key == 'height':
lheader = 18
lheader = 12
fi = open(f,'r')
ln = fi.readlines()
coef_dict[key] = [x[:-2].split(';')[2:] for x in ln[lheader:]]
if key == 'height' :
coef_dict[key] = [[0. if xx=='' else float(xx) for xx in x] for x in coef_dict[key]]
else :
coef_dict[key] = [[1. if xx=='' else float(xx) for xx in x] for x in coef_dict[key]]
nlevel = len(coef_dict['height'][0])
if tracer.point_sources == False:
nlevel = 1
# list of the various fields read:
data = []
outdate = []
dates_step = dates[::24]
files_step = files[::24]
ntime = 24
for ddi, ff in zip(dates_step, files_step):
#dd_UTC = ddi.tz_localize('UTC')
#dd_CET = dd_UTC.tz_convert('Europe/Berlin')
dd_CET = ddi[0]#pd.date_range(ddi[0], ddi[0] + datetime.timedelta(hours=23), freq="1H")
yr = dd_CET.year
mm = dd_CET.month
dd = dd_CET.weekday()
hh = dd_CET.hour
info('Reading of {} in {} for {}'.format([varnames],ff,ddi))
ds = pd.DataFrame({})
nc = xr.open_dataset(ff[0], decode_times=False)
ds['cat_index'] = nc['emission_category_index'].values
ds['source_type'] = nc['source_type_index'].values
ds['ilon'] = nc['longitude_index'].values
ds['ilat'] = nc['latitude_index'].values
areas = nc['area'].values
ds['emis'] = nc[varnames].values
min_cat = ds['cat_index'].min()
max_cat = ds['cat_index'].max()
nlon = ds['ilon'].max()
nlat = ds['ilat'].max()
TNO_array = np.zeros((ntime,nlevel,nlat, nlon))
for cat in range(min_cat,max_cat+1):
local_month_coef = coef_dict['month'][cat-1][mm-1]
local_nbdays = calendar.mdays[mm] + (mm == 2 and calendar.isleap(yr))
local_wday_coef = coef_dict['day'][cat-1][dd]
local_hour_coef = np.array(coef_dict['hour'][cat-1])
local_height_coef = np.array(coef_dict['height'][cat-1])
ds_cat = ds[(ds.cat_index == cat) & (ds.source_type == 1)]
if tracer.point_sources:
ds_cat = ds[(ds.cat_index == cat) & (ds.source_type == 2)]
ilons = ds_cat['ilon']
ilats = ds_cat['ilat']
emis = ds_cat['emis']
for lon, lat, e in zip(ilons, ilats, emis):
emis_hour = (((e/12. * local_month_coef)/local_nbdays * local_wday_coef)/areas[lat-1,lon-1])/24. * local_hour_coef
if tracer.point_sources:
TNO_array[:,:,lat-1,lon-1] += emis_hour[:, np.newaxis] * local_height_coef.reshape((1,nlevel))
else :
TNO_array[:,0,lat-1,lon-1] += emis_hour
outdate.extend([ddi[0]+datetime.timedelta(hours=x) for x in range(ntime)])
data = np.array(data).reshape((ntime,nlevel,nlat, nlon))
xmod = xr.DataArray(
coords={"time": outdate},
dims=("time", "lev", "lat", "lon"),
return xmod
import datetime
import glob
import os
import calendar
import numpy as np
def find_valid_file(ref_dir, file_format, dd, ref_dir_next,ref_dir_previous=False):
# Get all files and dates matching the file and format
list_files_orig = os.listdir(ref_dir)
# Convert ref date
ref_date = datetime.datetime.strptime(dd.strftime(file_format), file_format)
previous_date = ref_date - datetime.timedelta(hours=3)
if previous_date.month < ref_date.month and ref_dir_previous:
try : list_files_orig += os.listdir(ref_dir_previous)
except: info ("Did not find any valid files in {} "
"with format {}"
.format(ref_dir_previous, file_format))
next_date = ref_date + datetime.timedelta(hours=3)
if next_date.month>ref_date.month:
try : list_files_orig += os.listdir(ref_dir_next)
except: info ("Did not find any valid files in {} "
"with format {}"
.format(ref_dir_previous, file_format))
list_dates_cur = []
list_files_cur = []
for f in list_files_orig:
if f.find('idx') < 0:
datetime.datetime.strptime(f, file_format))
list_files = np.array(list_files_cur)
list_dates = np.array(list_dates_cur)
# Sorting along dates
isort = np.argsort(list_dates)
list_dates = list_dates[isort]
list_files = list_files[isort]
if list_files == []:
raise Exception("Did not find any valid files in {} "
"with format {}. Please check your yml file"
.format(ref_dir, file_format))
# Compute deltas
mask = (list_dates - ref_date) <= datetime.timedelta(0)
# find nearest previous date
file_ref1 = ref_dir + list_files[mask][np.argmax(list_dates[mask])]
date_ref1 = list_dates[mask].max()
mask = (list_dates - ref_date) >= datetime.timedelta(0)
# find nearest next date
file_ref2 = ref_dir + list_files[mask][np.argmin(list_dates[mask])]
date_ref2 = list_dates[mask].min()
# Reconvert to original date
dd1 = dd + (date_ref1 - ref_date)
dd2 = dd + (date_ref2 - ref_date)
return [file_ref1, file_ref2], [dd1, dd2]
def write(self, name, flx_file, flx, mode="a"):
raise Exception("Can't write template fluxes so far")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment