Commit 071c32e5 authored by Espen Sollum's avatar Espen Sollum
Browse files

Bug from previous bug resolved

parent ab34842a
......@@ -52,6 +52,15 @@ def hresol2dim(tracer, dom, **kwargs):
tracer.regions = f.variables['regions'][:]
tracer.nregions = len(np.unique(tracer.regions))
# Default behaviour: optimize ocean boxes
tracer.inc_ocean = getattr(tracer, 'inc_ocean', True)
# Set ocean boxes to positive values
tr_tmp = tracer.regions
tr_tmp[tr_tmp >= 1] = tr_tmp[tr_tmp >= 1] - 1
tr_tmp = tr_tmp + np.abs(tr_tmp.min()) + 1
tracer.regions = tr_tmp.astype(int)
# Check that regions have the correct dimensions
if tracer.regions.shape != (dom.nlat, dom.nlon):
raise Exception("Regions were not correctly defined in {}"
......
from builtins import str
from pycif.utils.check import verbose
from pycif.utils.datastores import dump
from pycif.utils.datastores.empty import init_empty
......@@ -13,7 +12,7 @@ import shutil
def parse_tracers(self,
datei,
datef,
file_monitor='',
fic_monitor='',
workdir='',
**kwargs):
"""Parses all observation files related to the tracers specified as
......@@ -24,7 +23,7 @@ def parse_tracers(self,
file
datei (datetime.datetime): initial date for the inversion window
datef (datetime.datetime): end date for the inversion window
file_monitor (str): file with pre-compile observations if exists
fic_monitor (str): file with pre-compile observations if exists
workdir (str): working directory
logfile (str): path to the log file for verbose instances
**kwargs (dictionary) : any additional argument that might be useful
......@@ -42,14 +41,14 @@ def parse_tracers(self,
# Dump type: default is nc
self.dump_type = getattr(self, 'dump_type', 'nc')
# If file_monitor is defined, tries reading it
if hasattr(self, 'file_monitor'):
file_monitor = self.file_monitor
# If fic_monitor is defined, tries reading it
if hasattr(self, 'fic_monitor'):
fic_monitor = self.fic_monitor
try:
verbose("Extracting measurements from {}".format(file_monitor))
verbose("Extracting measurements from {}".format(fic_monitor))
return \
dump.read_datastore(file_monitor,
dump.read_datastore(fic_monitor,
dump_type=self.dump_type,
**kwargs)
......@@ -60,14 +59,14 @@ def parse_tracers(self,
except Exception as e:
verbose(e)
verbose("Could not read the specified monitor file: {}",
file_monitor)
fic_monitor)
raise e
# Otherwise, create the monitor from observations
if hasattr(self, 'workdir'):
workdir = self.workdir
file_monitor = workdir + '/obs/monit_standard.nc'
fic_monitor = workdir + '/obs/monit_standard.nc'
# If the measurement definition is empty in the Yaml,
# return an empty datastore
......@@ -76,7 +75,7 @@ def parse_tracers(self,
# Loops through tracers if monitor not found
path.init_dir(workdir + '/obs/')
shutil.rmtree(file_monitor, ignore_errors=True)
shutil.rmtree(fic_monitor, ignore_errors=True)
datastore = {}
......@@ -114,14 +113,14 @@ def parse_tracers(self,
dataspec[provider] = \
dataspec[provider].loc[str(datei):str(datef)]
datastore[spec] = pd.concat(list(dataspec.values()))
datastore[spec] = pd.concat(dataspec.values())
# Grouping species into a single datastore
datastore = pd.concat(list(datastore.values()))
datastore = pd.concat(datastore.values())
# Dumping
dump.dump_datastore(datastore,
file_monitor, workdir,
fic_monitor, workdir,
dump_type=self.dump_type,
**kwargs)
......
# -*- coding: utf-8 -*-
from builtins import map
from builtins import zip
from builtins import range
import pandas as pd
import os
import numpy as np
import pycif.utils.check as check
from .headers import get_header, parse_header
from .utils import parse_file, shiftdate, rescale, convert_unit
from . import headers, utils
def do_parse(self,
obs_file,
fic,
maxlen=300,
default_unit='ppm',
default_tz='utc',
......@@ -26,7 +22,7 @@ def do_parse(self,
"""Parse function for a file from WDCGG
Args:
obs_file (str) :
fic (str) :
Path to input file
maxlen (int):
Maximum possible length for a WDCGG header. Default is `300`
......@@ -55,7 +51,7 @@ def do_parse(self,
"""
check.verbose("Reading observation file: {}".format(os.path.basename(obs_file)))
check.verbose("Reading observation file: {}".format(os.path.basename(fic)))
# Get default unit from species description if available
if hasattr(self, 'default_unit'):
......@@ -74,37 +70,37 @@ def do_parse(self,
err_na_values = self.err_na_values
# Scans file to get the header
header = get_header(obs_file, maxlen)
header = headers.get_header(fic, maxlen)
# Does not read empty files
if len(header) == 0:
check.verbose("{} is empty. Not reading it".format(obs_file))
check.verbose("{} is empty. Not reading it".format(fic))
return pd.DataFrame({})
else:
# Get spec info either from the function argument
# or from the file name
file_infos = parse_file(obs_file)
fic_infos = utils.parse_fic(fic)
if spec is None:
spec = file_infos['parameter']
spec = fic_infos['parameter']
list_extract = [spec] + extract
# Get the content of columns and extra information from the header
names, columns, date_ids, extra = \
parse_header(header,
headers.parse_header(header,
spec,
list_extract,
default_unit,
default_tz)
# Reads the file with Pandas
df = pd.read_csv(obs_file,
df = pd.read_csv(fic,
delim_whitespace=True,
skiprows=len(header),
usecols=date_ids + columns,
parse_dates=[list(range(len(date_ids)))],
parse_dates=[range(len(date_ids))],
infer_datetime_format=True,
quoting=3,
header=None,
......@@ -113,7 +109,7 @@ def do_parse(self,
# Rename columns according to standard names
df.rename(columns=dict(list(zip(columns, names))), inplace=True)
df.rename(columns=dict(zip(columns, names)), inplace=True)
df.rename(columns={'_'.join(map(str, date_ids)): 'time'}, inplace=True)
# Set the data frame index as time
......@@ -128,13 +124,15 @@ def do_parse(self,
# Removes hours > 24 and minutes > 60
hours = \
np.array(
[ln.split(':')[0].split(' ')[-1] for ln in index]).astype(int)
map(lambda ln: ln.split(':')[0].split(' ')[-1],
index)).astype(int)
df = df[hours <= 24]
index = index[hours <= 24]
minutes = \
np.array(
[ln.split(':')[1] for ln in index]).astype(int)
map(lambda ln: ln.split(':')[1],
index)).astype(int)
df = df[minutes <= 60]
index = index[minutes <= 60]
......@@ -154,7 +152,7 @@ def do_parse(self,
df.index = index
# Shifting dates depending on time zone, then removing corresponding key
df.index = shiftdate(df.index, extra['tz'])
df.index = utils.shiftdate(df.index, extra['tz'])
del extra['tz']
# Fill extra columns with the same value everywhere
......@@ -162,8 +160,8 @@ def do_parse(self,
for e in extra:
df[e] = extra[e]
df['station'] = file_infos['stat']
df['network'] = file_infos['provider']
df['station'] = fic_infos['stat']
df['network'] = fic_infos['provider']
df['parameter'] = spec.lower()
df['duration'] = default_duration
df.rename(columns={spec.lower(): 'obs'}, inplace=True)
......@@ -180,7 +178,7 @@ def do_parse(self,
# Rescales if needed
if kwargs.get('rescale', False):
coeffscale = rescale(obs_file, header)
coeffscale = utils.rescale(fic, header)
if np.isnan(coeffscale):
check.verbose("Unknown scale, please check with provider")
......@@ -188,7 +186,7 @@ def do_parse(self,
df['obserror'] *= coeffscale
# Converts unit
df = convert_unit(df, ['obs', 'obserror'],
df = utils.convert_unit(df, ['obs', 'obserror'],
default_unit=default_unit)
return df
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import absolute_import
from .utils import remap_extract, find_header
import utils
import string
import pycif.utils.check as check
def get_header(obs_file, maxlen):
def get_header(fic, maxlen):
"""Extract the header from a WDCGG File
Args:
obs_file (str): path to input file
fic (str): path to input file
maxlen (int): abort after this amount of lines when reading header.
Default 300
......@@ -20,7 +18,7 @@ def get_header(obs_file, maxlen):
List[str]: List with all Lines of the Header
"""
with open(obs_file, "r") as input_file:
with open(fic, "r") as input_file:
lines = []
nheader = 0
......@@ -85,8 +83,8 @@ def parse_header(header, spec, list_extract,
date_ids = [head.index('date')]
except:
print(header)
print(head)
print header
print head
raise ValueError("Cant find a date in this WDCGG file. " \
"Please check format")
......@@ -102,7 +100,7 @@ def parse_header(header, spec, list_extract,
for id_extract in list_extract:
try:
# First look into columns names
columns.append(head.index(remap_extract(id_extract)))
columns.append(head.index(utils.remap_extract(id_extract)))
names.append(id_extract.lower())
except:
......@@ -110,7 +108,7 @@ def parse_header(header, spec, list_extract,
# Some files have a name with CH4_Air instead of CH4
columns.append(
head.index(
remap_extract(id_extract) + '_air'))
utils.remap_extract(id_extract) + '_air'))
names.append(id_extract.lower())
except:
......@@ -119,7 +117,7 @@ def parse_header(header, spec, list_extract,
id_value = find_header(id_extract, header)
extra[id_extract.lower()] = id_value
except Exception as e:
except Exception, e:
# If cannot find,
# assume default values for unit and timezone
check.verbose("Cant extract " + id_extract)
......@@ -133,5 +131,46 @@ def parse_header(header, spec, list_extract,
else:
extra[id_extract] = None
return names, columns, date_ids, extra
def remap_head(s):
"""Adapts names to extract values in WDCGG header
"""
if s.lower() == 'lat':
return 'latitude:'
elif s.lower() == 'lon':
return 'longitude:'
elif s.lower() == 'alt':
return 'altitude:'
elif s.lower() == 'unit':
return 'measurement unit'
elif s.lower() == 'tz':
return 'time zone'
else:
return s.lower()
def find_header(id_extract, header):
"""Finds the value of a constant parameter (e.g., latitude, altitude, etc.)
in the header of a file
"""
for ln in header:
if remap_head(id_extract) in ln.lower():
value = string.split(ln.lower(), ':')[1].strip()
try:
return float(value)
except ValueError:
return value
raise ValueError("Couldn't extract {}".format(id_extract))
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import string
import os
from dateutil.tz import tzoffset
import numpy as np
import headers
def remap_head(s):
"""Adapts names to extract values in WDCGG header
"""
if s.lower() == 'lat':
return 'latitude:'
elif s.lower() == 'lon':
return 'longitude:'
elif s.lower() == 'alt':
return 'altitude:'
elif s.lower() == 'unit':
return 'measurement unit'
elif s.lower() == 'tz':
return 'time zone'
else:
return s.lower()
def find_header(id_extract, header):
"""Finds the value of a constant parameter (e.g., latitude, altitude, etc.)
in the header of a file
"""
for ln in header:
if remap_head(id_extract) in ln.lower():
value = string.split(ln.lower(), ':')[1].strip()
try:
return float(value)
except ValueError:
return value
raise ValueError("Couldn't extract {}".format(id_extract))
def rescale(obs_file, header):
def rescale(fic, header):
"""Finds out on what scale the measurement was reported and returns the
corresponding scaling factor.
......@@ -59,10 +19,10 @@ def rescale(obs_file, header):
"""
try:
scale = find_header('scale', header)
scale = headers.find_header('scale', header)
except:
scale = parse_file(obs_file)['provider']
scale = parse_fic(fic)['provider']
if '04' in scale or 'wmo' in scale.lower():
return 1.
......@@ -89,22 +49,22 @@ def rescale(obs_file, header):
return np.nan
def parse_file(obs_file, **kwargs):
def parse_fic(fic):
"""Parses WDCGG file name and extract corresponding information.
This is based on WDCGG standard naming format as detailed in:
http://ds.data.jma.go.jp/gmd/wdcgg/pub/data/WDCGG_filename_format.pdf
"""
filesplit = string.split(os.path.basename(obs_file), '.')
ficsplit = string.split(os.path.basename(fic), '.')
infos = {}
infos['stat'] = filesplit[0][:3]
infos['provider'] = filesplit[1].replace('_', '-')
infos['site category'] = filesplit[2]
infos['obs type'] = filesplit[-5]
infos['parameter'] = filesplit[-4]
infos['freq'] = filesplit[-3]
infos['stat'] = ficsplit[0][:3]
infos['provider'] = ficsplit[1].replace('_', '-')
infos['site category'] = ficsplit[2]
infos['obs type'] = ficsplit[-5]
infos['parameter'] = ficsplit[-4]
infos['freq'] = ficsplit[-3]
return infos
......
......@@ -80,11 +80,11 @@ class ObsParser(Plugin):
.register_plugin(provider, file_format_id, parse_module,
plugin_type='obsparser')
def parse_file(self, obs_file, **kwargs):
def parse_file(self, fic, **kwargs):
"""This function does the parsing (and post processing if necessary).
Args:
obs_file (str): path to input file
fic (str): path to input file
Keyword Args:
encoding (str): Encoding of input files
......@@ -99,7 +99,7 @@ class ObsParser(Plugin):
Dataframe df[obssite_id, parameter] with t as index
"""
df = self.do_parse(obs_file, **kwargs)
df = self.do_parse(fic, **kwargs)
# Removing rows with only NaNs
df = df.dropna(axis=1, how="all")
......@@ -130,7 +130,7 @@ class ObsParser(Plugin):
See the respective documentation
Returns:
dict: {obs_file} = df[obssite_id, parameter]
dict: {fic} = df[obssite_id, parameter]
"""
# parser = cls.get_parser(provider_name, file_format_id)
......@@ -139,18 +139,18 @@ class ObsParser(Plugin):
verbose("Reading files in " + self.dir_obs)
for obs_file in sorted(glob.glob(self.dir_obs + '*')):
for fic in sorted(glob.glob(self.dir_obs + '*')):
try:
dfs[os.path.basename(obs_file)] = \
self.parse_file(obs_file, **kwargs)
dfs[os.path.basename(fic)] = \
self.parse_file(fic, **kwargs)
except error.PluginError as e:
verbose("{} was not loaded for the following reason"
.format(obs_file))
.format(fic))
verbose(e.message)
if dfs != {}:
return pd.concat(list(dfs.values()))
return pd.concat(dfs.values())
else:
return pd.DataFrame({})
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment