TNO read in datastreams

1fd9b278 · Elise Potier · 7daf662f · 1fd9b278 · 1fd9b278 · 1fd9b278
Commit 1fd9b278 authored 3 years ago by Elise Potier
--- a/pycif/plugins/datastreams/fluxes/TNO_nc/__init__.py
+++ b/pycif/plugins/datastreams/fluxes/TNO_nc/__init__.py
+"""
+Read TNO yearly fluxes and apply time profil
+Time profile is considered in UTC. Time zone does not taken into account.
+All the profil files are mandatory even the vertical one.
+If point_sources: True, vertical profil is also applied
+WARNING : Currently PS are put in the TNO grid
+
+"""
+
+from .fetch import fetch
+from .get_domain import get_domain
+from .read import read
+from .write import write
+
+_name = "TNO"
+_version = "netcdf"
+
+
+input_arguments={
+    "point_sources": {
+       "doc": "Point Soucre type"
+              "If True, enable to have vertical projection"
+              "Default: False",
+       "default": False,
+       "accepted": bool
+    },
+    "dir_profils": {
+       "doc": ""
+              "Directory where the time and vertical profils are"
+              " files should be TNO_height-distribution_GNFR.csv, "
+              "                 timeprofiles-month-in-year_GNFR.csv, "
+              "                 timeprofiles-day-in-week_GNFR.csv, "
+              "                 timeprofiles-hour-in-day_GNFR.csv"
+              "",
+       "accepted": str,
+       "default": False
+    }
+}
+
+
--- a/pycif/plugins/datastreams/fluxes/TNO_nc/fetch.py
+++ b/pycif/plugins/datastreams/fluxes/TNO_nc/fetch.py
+import datetime
+import glob
+import os
+import pandas as pd
+
+import numpy as np
+
+from pycif.utils import path
+from .utils import find_valid_file
+
+def fetch(ref_dir, ref_file, input_dates, target_dir, tracer=None, **kwargs):
+
+# Inputs: 
+#---------
+# ref_dir: directory where the original files are found
+# ref_file: (template) name of the original files
+# input_dates: list of the periods to simulate, each item is the list of the dates of the period
+# target_dir: directory where the links to the orginal files are created
+#
+# Ouputs:
+#---------
+# list_files: for each date that begins a period, an array containing the names of the files that are available
+# for the dates within this period
+# list_dates: for each date that begins a period, an array containing the names of the dates mathcin the files
+# listed in list_files
+    list_period_dates = pd.date_range(input_dates[0], input_dates[1], freq="1D")
+    list_dates = {}
+    list_files = {}
+    for dd in list_period_dates:
+        dir_dd = dd.strftime(ref_dir)
+        dir_dd_next = (dd + datetime.timedelta(hours=1)).strftime(ref_dir)
+        dir_dd_previous = (dd - datetime.timedelta(hours=1)).strftime(ref_dir)
+        files_3d, dates_3d = find_valid_file(dir_dd, ref_file, dd, dir_dd_next,ref_dir_previous=dir_dd_previous)
+        list_hours = pd.date_range(dd, dd + datetime.timedelta(hours=23), freq="1H")
+ 
+        if os.path.isfile(files_3d[0]):
+            #list_dates[dd] = [[dd, dd + datetime.timedelta(hours=1)]]
+            #list_files[dd] = [files_3d]* len(list_dates[dd])
+            list_dates[dd] = [[hh, hh + datetime.timedelta(hours=1)] for hh in list_hours]
+            list_files[dd] = [files_3d]
+            # the  to fetch is a forecast
+            local_files = []
+            target_file = "{}/{}".format(target_dir, dd.strftime(ref_file))
+            path.link(files_3d[0], target_file)
+            local_files.append(target_file)
+
+    return list_files, list_dates
--- a/pycif/plugins/datastreams/fluxes/TNO_nc/get_domain.py
+++ b/pycif/plugins/datastreams/fluxes/TNO_nc/get_domain.py
+import numpy as np
+import xarray as xr
+import glob
+import datetime
+import os
+
+from pycif.utils.classes.setup import Setup
+from logging import info
+
+def get_domain(ref_dir, ref_file, input_dates, target_dir, tracer=None):
+    
+# Inputs: 
+#---------
+# ref_dir: directory where the original files are found
+# ref_file: (template) name of the original files
+# input_dates: list of the periods to simulate, each item is the list of the dates of the period
+# target_dir: directory where the links to the orginal files are created
+#
+# Ouputs:
+#---------
+# setup of the domain in section "Initializes domain"
+
+    # Looking for a reference file to read lon/lat in
+    list_file = glob.glob("{}/*nc".format(ref_dir))
+    domain_file = None
+    # Either a file is specified in the Yaml
+    if ref_file in list_file:
+        domain_file = "{}/{}".format(ref_dir, ref_file)
+
+    # Or loop over available file regarding file pattern
+    else:
+        for flx_file in list_file:
+            try:
+                date = datetime.datetime.strptime(
+                    os.path.basename(flx_file), ref_file
+                )
+                domain_file = flx_file
+                break
+            except ValueError:
+                continue
+
+    if domain_file is None:
+        raise Exception(
+            "TNO domain could not be initialized as no file was found"
+        )
+    # Read lon/lat in
+    nc = xr.open_dataset(domain_file, decode_times=False)
+    llon = nc['longitude'].values
+    llat = nc['latitude'].values
+    llonb = nc['longitude_bounds'].values
+    llatb = nc['latitude_bounds'].values
+
+    # compute the corner matrix
+    resol_lon =1./10
+    resol_lat =1./20
+    llonc = np.append(llon-resol_lon*0.5,llon[-1]+resol_lon*0.5)
+    llatc = np.append(llat-resol_lat*0.5,llat[-1]+resol_lat*0.5)
+
+    lon, lat = np.meshgrid(llon,llat)
+    lonc, latc = np.meshgrid(llonc,llatc)
+    nlat, nlon = lat.shape[0],lat.shape[1]
+
+    #print('Get the min and max latitude and longitude of centers + the number of longitudes and latitudes')
+    lon_min = lon.min() #- (lon[1] - lon[0]) / 2
+    lon_max = lon.max() #+ (lon[-1] - lon[-2]) / 2
+    lat_min = lat.min() #- (lat[1] - lat[0]) / 2
+    lat_max = lat.max() #+ (lat[-1] - lat[-2]) / 2
+    info('lon min {}, lon max {}'.format(lon_min,lon_max))
+    info('lat min {}, lat max {}'.format(lat_min,lat_max))
+
+    if tracer.point_sources == False :
+            #If no vetical dimension for emissions, provide dummy vertical
+            punit = "Pa"
+            nlevs = 1
+            sigma_a = np.array([0])
+            sigma_b = np.array([1])
+            # Initializes domain
+            setup = Setup.from_dict(
+                {
+                    "domain": {
+                        "plugin": {
+                            "name": "dummy",
+                            "version": "std",
+                            "type": "domain",
+                        },
+                        "xmin": lon_min,    # minimum longitude for centers
+                        "xmax": lon_max,    # maximum longitude for centers
+                        "ymin": lat_min,    # minimum latitude for centers
+                        "ymax": lat_max,    # maximum latitude for centers
+                        "nlon": nlon,       # number of longitudinal cells
+                        "nlat": nlat,       # number of latitudinal cells
+                        "nlev": nlevs,      # number of vertical levels
+                        "sigma_a": sigma_a,
+                        "sigma_b": sigma_b,
+                        "pressure_unit": "Pa" # adapted to sigmas
+                    }
+                }
+            )
+            # if lon and lat are vectors, convert into a grid with
+            setup.domain.zlon = lon   # longitudes of centers
+            setup.domain.zlat = lat   # latitudes of centers
+            setup.domain.zlonc = lonc # longitudes of corners
+            setup.domain.zlatc = latc # latitudes of corners
+    else :
+            punit = "m"
+            VerticalP_file = tracer.dir_profils + '/TNO_height-distribution_GNFR.csv'
+            lheader = 17
+            fi = open(VerticalP_file,'r')
+            ln = fi.readlines()
+            fi.close()
+            height = ln[lheader][:-2].split(';')[2:]
+            height_down =np.array([float(x.split('-')[0].replace(" ", "")) for x in height])
+            height_top = np.array([float(x.split('-')[1].replace("m","").replace(" ", "")) for x in height])
+            nlevs=len(height_top)
+            # Initializes domain
+            setup = Setup.from_dict(
+                {
+                    "domain": {
+                        "plugin": {
+                            "name": "dummy",
+                            "version": "std",
+                            "type": "domain",
+                        },
+                        "xmin": lon_min,    # minimum longitude for centers
+                        "xmax": lon_max,    # maximum longitude for centers
+                        "ymin": lat_min,    # minimum latitude for centers
+                        "ymax": lat_max,    # maximum latitude for centers
+                        "nlon": nlon,       # number of longitudinal cells
+                        "nlat": nlat,       # number of latitudinal cells
+                        "nlev": nlevs,      # number of vertical levels
+                        "height": height_top,
+                        "height_unit": "m" 
+                    }
+                }
+            )
+            # if lon and lat are vectors, convert into a grid with
+            setup.domain.zlon = lon   # longitudes of centers
+            setup.domain.zlat = lat   # latitudes of centers
+            setup.domain.zlonc = lonc # longitudes of corners
+            setup.domain.zlatc = latc # latitudes of corners
+            setup.domain.height = height_top
+
+    return setup.domain
--- a/pycif/plugins/datastreams/fluxes/TNO_nc/read.py
+++ b/pycif/plugins/datastreams/fluxes/TNO_nc/read.py
+import datetime
+import os
+
+import numpy as np
+import xarray as xr
+import pandas as pd
+import string
+import calendar
+import pytz
+
+from pycif.utils.netcdf import readnc
+from logging import info
+
+def read(
+        self,
+        name,
+        varnames,
+        dates,
+        files,
+        interpol_flx=False,
+        comp_type=None,
+        tracer=None,
+        **kwargs
+):
+    """Get fluxes from raw files and load them into a pyCIF
+    variables
+
+    """
+    # Time profiles
+    Pdir  = tracer.dir_profils
+    TPmonth_file = Pdir + 'timeprofiles-month-in-year_GNFR.csv'
+    TPday_file = Pdir + 'timeprofiles-day-in-week_GNFR.csv'
+    TPhour_file = Pdir + 'timeprofiles-hour-in-day_GNFR.csv'
+    VerticalP_file = Pdir + 'TNO_height-distribution_GNFR.csv'
+    coef_dict = {}
+    lheader = 12
+    for key, f in zip(['month', 'day', 'hour','height'], [TPmonth_file, TPday_file, TPhour_file,VerticalP_file]):
+       if key == 'height': 
+          lheader = 18 
+       else: 
+          lheader = 12
+       fi = open(f,'r')
+       ln = fi.readlines()
+       fi.close()
+       coef_dict[key] = [x[:-2].split(';')[2:] for x in ln[lheader:]]
+       if key == 'height' :
+          coef_dict[key] = [[0. if xx=='' else float(xx) for xx in x] for x in coef_dict[key]]
+       else :
+          coef_dict[key] = [[1. if xx=='' else float(xx) for xx in x] for x in coef_dict[key]]
+    nlevel = len(coef_dict['height'][0]) 
+    if tracer.point_sources == False:
+       nlevel = 1
+    # list of the various fields read:
+    data = []
+    outdate = []
+    dates_step = dates[::24] 
+    files_step = files[::24]
+    ntime = 24
+    for ddi, ff in zip(dates_step, files_step):
+        #dd_UTC = ddi.tz_localize('UTC')
+        #dd_CET = dd_UTC.tz_convert('Europe/Berlin')
+        dd_CET = ddi[0]#pd.date_range(ddi[0], ddi[0] + datetime.timedelta(hours=23), freq="1H")
+        yr = dd_CET.year
+        mm = dd_CET.month
+        dd = dd_CET.weekday()
+        hh = dd_CET.hour
+        info('Reading of {} in {} for {}'.format([varnames],ff,ddi))
+        ds = pd.DataFrame({})
+        nc = xr.open_dataset(ff[0], decode_times=False) 
+        ds['cat_index'] = nc['emission_category_index'].values
+        ds['source_type'] = nc['source_type_index'].values
+        ds['ilon'] = nc['longitude_index'].values
+        ds['ilat'] = nc['latitude_index'].values
+        areas = nc['area'].values
+        ds['emis'] = nc[varnames].values
+
+        min_cat = ds['cat_index'].min()
+        max_cat = ds['cat_index'].max()
+        nlon = ds['ilon'].max()
+        nlat = ds['ilat'].max()
+        
+        TNO_array = np.zeros((ntime,nlevel,nlat, nlon))
+        for cat in range(min_cat,max_cat+1):
+            local_month_coef = coef_dict['month'][cat-1][mm-1]
+            local_nbdays = calendar.mdays[mm] + (mm == 2 and calendar.isleap(yr))
+            local_wday_coef = coef_dict['day'][cat-1][dd]
+            local_hour_coef = np.array(coef_dict['hour'][cat-1])
+            local_height_coef = np.array(coef_dict['height'][cat-1])
+            ds_cat = ds[(ds.cat_index == cat) & (ds.source_type == 1)]
+            if tracer.point_sources:
+               ds_cat = ds[(ds.cat_index == cat) & (ds.source_type == 2)]
+            
+            ilons = ds_cat['ilon']
+            ilats = ds_cat['ilat']
+            emis = ds_cat['emis']
+            for lon, lat, e in zip(ilons, ilats, emis):
+                 emis_hour = (((e/12. * local_month_coef)/local_nbdays * local_wday_coef)/areas[lat-1,lon-1])/24. * local_hour_coef
+                 if tracer.point_sources:
+                    TNO_array[:,:,lat-1,lon-1] += emis_hour[:, np.newaxis] * local_height_coef.reshape((1,nlevel))
+                 else :
+                    TNO_array[:,0,lat-1,lon-1] += emis_hour
+        data.append(TNO_array)
+        outdate.extend([ddi[0]+datetime.timedelta(hours=x) for x in range(ntime)])
+    data = np.array(data).reshape((ntime,nlevel,nlat, nlon))
+    xmod = xr.DataArray(
+              data,
+              coords={"time": outdate},
+              dims=("time", "lev", "lat", "lon"),
+          )
+    return xmod
--- a/pycif/plugins/datastreams/fluxes/TNO_nc/utils.py
+++ b/pycif/plugins/datastreams/fluxes/TNO_nc/utils.py
+import datetime
+import glob
+import os
+import calendar 
+import numpy as np
+
+def find_valid_file(ref_dir, file_format, dd, ref_dir_next,ref_dir_previous=False):
+    # Get all files and dates matching the file and format
+    list_files_orig = os.listdir(ref_dir)
+    # Convert ref date
+    ref_date = datetime.datetime.strptime(dd.strftime(file_format), file_format)
+    previous_date = ref_date - datetime.timedelta(hours=3)
+    if previous_date.month < ref_date.month and ref_dir_previous:
+       try : list_files_orig += os.listdir(ref_dir_previous)
+       except: info ("Did not find any valid files in {} "
+                        "with format {}"
+                        .format(ref_dir_previous, file_format))
+    next_date = ref_date + datetime.timedelta(hours=3)
+    if next_date.month>ref_date.month:
+       try : list_files_orig += os.listdir(ref_dir_next)
+       except: info ("Did not find any valid files in {} "
+                        "with format {}"
+                        .format(ref_dir_previous, file_format))
+
+    list_dates_cur = []
+    list_files_cur = []
+    for f in list_files_orig:
+        try:
+            if f.find('idx') < 0:
+                    list_dates_cur.append(
+                             datetime.datetime.strptime(f, file_format))
+                    list_files_cur.append(f)
+        except:
+                continue
+
+    list_files = np.array(list_files_cur)
+    list_dates = np.array(list_dates_cur)
+
+    # Sorting along dates
+    isort = np.argsort(list_dates)
+    list_dates = list_dates[isort]
+    list_files = list_files[isort]
+
+    if list_files == []:
+        raise Exception("Did not find any valid files in {} "
+                        "with format {}. Please check your yml file"
+                        .format(ref_dir, file_format))
+
+    # Compute deltas
+    mask = (list_dates - ref_date) <= datetime.timedelta(0)
+    # find nearest previous date
+    file_ref1 = ref_dir + list_files[mask][np.argmax(list_dates[mask])]
+    date_ref1 = list_dates[mask].max()
+
+    mask = (list_dates - ref_date) >= datetime.timedelta(0)
+    # find nearest next date
+    file_ref2 = ref_dir + list_files[mask][np.argmin(list_dates[mask])]
+    date_ref2 = list_dates[mask].min()
+
+    # Reconvert to original date
+    dd1 = dd + (date_ref1 - ref_date)
+    dd2 = dd + (date_ref2 - ref_date)
+
+    return [file_ref1, file_ref2], [dd1, dd2]
+
+
--- a/pycif/plugins/datastreams/fluxes/TNO_nc/write.py
+++ b/pycif/plugins/datastreams/fluxes/TNO_nc/write.py
+def write(self, name, flx_file, flx, mode="a"):
+    raise Exception("Can't write template fluxes so far")