Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
VERIFY
CIF
Commits
071c32e5
Commit
071c32e5
authored
Nov 20, 2019
by
Espen Sollum
Browse files
Bug from previous bug resolved
parent
ab34842a
Changes
6
Hide whitespace changes
Inline
Side-by-side
pycif/plugins/controlvects/standard/utils/dimensions.py
View file @
071c32e5
...
...
@@ -52,6 +52,15 @@ def hresol2dim(tracer, dom, **kwargs):
tracer
.
regions
=
f
.
variables
[
'regions'
][:]
tracer
.
nregions
=
len
(
np
.
unique
(
tracer
.
regions
))
# Default behaviour: optimize ocean boxes
tracer
.
inc_ocean
=
getattr
(
tracer
,
'inc_ocean'
,
True
)
# Set ocean boxes to positive values
tr_tmp
=
tracer
.
regions
tr_tmp
[
tr_tmp
>=
1
]
=
tr_tmp
[
tr_tmp
>=
1
]
-
1
tr_tmp
=
tr_tmp
+
np
.
abs
(
tr_tmp
.
min
())
+
1
tracer
.
regions
=
tr_tmp
.
astype
(
int
)
# Check that regions have the correct dimensions
if
tracer
.
regions
.
shape
!=
(
dom
.
nlat
,
dom
.
nlon
):
raise
Exception
(
"Regions were not correctly defined in {}"
...
...
pycif/plugins/measurements/standard/__init__.py
View file @
071c32e5
from
builtins
import
str
from
pycif.utils.check
import
verbose
from
pycif.utils.datastores
import
dump
from
pycif.utils.datastores.empty
import
init_empty
...
...
@@ -13,7 +12,7 @@ import shutil
def
parse_tracers
(
self
,
datei
,
datef
,
fi
le
_monitor
=
''
,
fi
c
_monitor
=
''
,
workdir
=
''
,
**
kwargs
):
"""Parses all observation files related to the tracers specified as
...
...
@@ -24,7 +23,7 @@ def parse_tracers(self,
file
datei (datetime.datetime): initial date for the inversion window
datef (datetime.datetime): end date for the inversion window
fi
le
_monitor (str): file with pre-compile observations if exists
fi
c
_monitor (str): file with pre-compile observations if exists
workdir (str): working directory
logfile (str): path to the log file for verbose instances
**kwargs (dictionary) : any additional argument that might be useful
...
...
@@ -42,14 +41,14 @@ def parse_tracers(self,
# Dump type: default is nc
self
.
dump_type
=
getattr
(
self
,
'dump_type'
,
'nc'
)
# If fi
le
_monitor is defined, tries reading it
if
hasattr
(
self
,
'fi
le
_monitor'
):
fi
le
_monitor
=
self
.
fi
le
_monitor
# If fi
c
_monitor is defined, tries reading it
if
hasattr
(
self
,
'fi
c
_monitor'
):
fi
c
_monitor
=
self
.
fi
c
_monitor
try
:
verbose
(
"Extracting measurements from {}"
.
format
(
fi
le
_monitor
))
verbose
(
"Extracting measurements from {}"
.
format
(
fi
c
_monitor
))
return
\
dump
.
read_datastore
(
fi
le
_monitor
,
dump
.
read_datastore
(
fi
c
_monitor
,
dump_type
=
self
.
dump_type
,
**
kwargs
)
...
...
@@ -60,14 +59,14 @@ def parse_tracers(self,
except
Exception
as
e
:
verbose
(
e
)
verbose
(
"Could not read the specified monitor file: {}"
,
fi
le
_monitor
)
fi
c
_monitor
)
raise
e
# Otherwise, create the monitor from observations
if
hasattr
(
self
,
'workdir'
):
workdir
=
self
.
workdir
fi
le
_monitor
=
workdir
+
'/obs/monit_standard.nc'
fi
c
_monitor
=
workdir
+
'/obs/monit_standard.nc'
# If the measurement definition is empty in the Yaml,
# return an empty datastore
...
...
@@ -76,7 +75,7 @@ def parse_tracers(self,
# Loops through tracers if monitor not found
path
.
init_dir
(
workdir
+
'/obs/'
)
shutil
.
rmtree
(
fi
le
_monitor
,
ignore_errors
=
True
)
shutil
.
rmtree
(
fi
c
_monitor
,
ignore_errors
=
True
)
datastore
=
{}
...
...
@@ -114,14 +113,14 @@ def parse_tracers(self,
dataspec
[
provider
]
=
\
dataspec
[
provider
].
loc
[
str
(
datei
):
str
(
datef
)]
datastore
[
spec
]
=
pd
.
concat
(
list
(
dataspec
.
values
())
)
datastore
[
spec
]
=
pd
.
concat
(
dataspec
.
values
())
# Grouping species into a single datastore
datastore
=
pd
.
concat
(
list
(
datastore
.
values
())
)
datastore
=
pd
.
concat
(
datastore
.
values
())
# Dumping
dump
.
dump_datastore
(
datastore
,
fi
le
_monitor
,
workdir
,
fi
c
_monitor
,
workdir
,
dump_type
=
self
.
dump_type
,
**
kwargs
)
...
...
pycif/plugins/obsparsers/wdcgg/__init__.py
View file @
071c32e5
# -*- coding: utf-8 -*-
from
builtins
import
map
from
builtins
import
zip
from
builtins
import
range
import
pandas
as
pd
import
os
import
numpy
as
np
import
pycif.utils.check
as
check
from
.headers
import
get_header
,
parse_header
from
.utils
import
parse_file
,
shiftdate
,
rescale
,
convert_unit
from
.
import
headers
,
utils
def
do_parse
(
self
,
obs_file
,
fic
,
maxlen
=
300
,
default_unit
=
'ppm'
,
default_tz
=
'utc'
,
...
...
@@ -26,7 +22,7 @@ def do_parse(self,
"""Parse function for a file from WDCGG
Args:
obs_file
(str) :
fic
(str) :
Path to input file
maxlen (int):
Maximum possible length for a WDCGG header. Default is `300`
...
...
@@ -55,7 +51,7 @@ def do_parse(self,
"""
check
.
verbose
(
"Reading observation file: {}"
.
format
(
os
.
path
.
basename
(
obs_file
)))
check
.
verbose
(
"Reading observation file: {}"
.
format
(
os
.
path
.
basename
(
fic
)))
# Get default unit from species description if available
if
hasattr
(
self
,
'default_unit'
):
...
...
@@ -74,37 +70,37 @@ def do_parse(self,
err_na_values
=
self
.
err_na_values
# Scans file to get the header
header
=
get_header
(
obs_file
,
maxlen
)
header
=
headers
.
get_header
(
fic
,
maxlen
)
# Does not read empty files
if
len
(
header
)
==
0
:
check
.
verbose
(
"{} is empty. Not reading it"
.
format
(
obs_file
))
check
.
verbose
(
"{} is empty. Not reading it"
.
format
(
fic
))
return
pd
.
DataFrame
({})
else
:
# Get spec info either from the function argument
# or from the file name
fi
le
_infos
=
parse_fi
le
(
obs_file
)
fi
c
_infos
=
utils
.
parse_fi
c
(
fic
)
if
spec
is
None
:
spec
=
fi
le
_infos
[
'parameter'
]
spec
=
fi
c
_infos
[
'parameter'
]
list_extract
=
[
spec
]
+
extract
# Get the content of columns and extra information from the header
names
,
columns
,
date_ids
,
extra
=
\
parse_header
(
header
,
headers
.
parse_header
(
header
,
spec
,
list_extract
,
default_unit
,
default_tz
)
# Reads the file with Pandas
df
=
pd
.
read_csv
(
obs_file
,
df
=
pd
.
read_csv
(
fic
,
delim_whitespace
=
True
,
skiprows
=
len
(
header
),
usecols
=
date_ids
+
columns
,
parse_dates
=
[
list
(
range
(
len
(
date_ids
))
)
],
parse_dates
=
[
range
(
len
(
date_ids
))],
infer_datetime_format
=
True
,
quoting
=
3
,
header
=
None
,
...
...
@@ -113,7 +109,7 @@ def do_parse(self,
# Rename columns according to standard names
df
.
rename
(
columns
=
dict
(
list
(
zip
(
columns
,
names
))
)
,
inplace
=
True
)
df
.
rename
(
columns
=
dict
(
zip
(
columns
,
names
)),
inplace
=
True
)
df
.
rename
(
columns
=
{
'_'
.
join
(
map
(
str
,
date_ids
)):
'time'
},
inplace
=
True
)
# Set the data frame index as time
...
...
@@ -128,13 +124,15 @@ def do_parse(self,
# Removes hours > 24 and minutes > 60
hours
=
\
np
.
array
(
[
ln
.
split
(
':'
)[
0
].
split
(
' '
)[
-
1
]
for
ln
in
index
]).
astype
(
int
)
map
(
lambda
ln
:
ln
.
split
(
':'
)[
0
].
split
(
' '
)[
-
1
],
index
)).
astype
(
int
)
df
=
df
[
hours
<=
24
]
index
=
index
[
hours
<=
24
]
minutes
=
\
np
.
array
(
[
ln
.
split
(
':'
)[
1
]
for
ln
in
index
]).
astype
(
int
)
map
(
lambda
ln
:
ln
.
split
(
':'
)[
1
],
index
)).
astype
(
int
)
df
=
df
[
minutes
<=
60
]
index
=
index
[
minutes
<=
60
]
...
...
@@ -154,7 +152,7 @@ def do_parse(self,
df
.
index
=
index
# Shifting dates depending on time zone, then removing corresponding key
df
.
index
=
shiftdate
(
df
.
index
,
extra
[
'tz'
])
df
.
index
=
utils
.
shiftdate
(
df
.
index
,
extra
[
'tz'
])
del
extra
[
'tz'
]
# Fill extra columns with the same value everywhere
...
...
@@ -162,8 +160,8 @@ def do_parse(self,
for
e
in
extra
:
df
[
e
]
=
extra
[
e
]
df
[
'station'
]
=
fi
le
_infos
[
'stat'
]
df
[
'network'
]
=
fi
le
_infos
[
'provider'
]
df
[
'station'
]
=
fi
c
_infos
[
'stat'
]
df
[
'network'
]
=
fi
c
_infos
[
'provider'
]
df
[
'parameter'
]
=
spec
.
lower
()
df
[
'duration'
]
=
default_duration
df
.
rename
(
columns
=
{
spec
.
lower
():
'obs'
},
inplace
=
True
)
...
...
@@ -180,7 +178,7 @@ def do_parse(self,
# Rescales if needed
if
kwargs
.
get
(
'rescale'
,
False
):
coeffscale
=
rescale
(
obs_file
,
header
)
coeffscale
=
utils
.
rescale
(
fic
,
header
)
if
np
.
isnan
(
coeffscale
):
check
.
verbose
(
"Unknown scale, please check with provider"
)
...
...
@@ -188,7 +186,7 @@ def do_parse(self,
df
[
'obserror'
]
*=
coeffscale
# Converts unit
df
=
convert_unit
(
df
,
[
'obs'
,
'obserror'
],
df
=
utils
.
convert_unit
(
df
,
[
'obs'
,
'obserror'
],
default_unit
=
default_unit
)
return
df
pycif/plugins/obsparsers/wdcgg/headers.py
View file @
071c32e5
# -*- coding: utf-8 -*-
from
__future__
import
print_function
from
__future__
import
absolute_import
from
.utils
import
remap_extract
,
find_header
import
utils
import
string
import
pycif.utils.check
as
check
def
get_header
(
obs_file
,
maxlen
):
def
get_header
(
fic
,
maxlen
):
"""Extract the header from a WDCGG File
Args:
obs_file
(str): path to input file
fic
(str): path to input file
maxlen (int): abort after this amount of lines when reading header.
Default 300
...
...
@@ -20,7 +18,7 @@ def get_header(obs_file, maxlen):
List[str]: List with all Lines of the Header
"""
with
open
(
obs_file
,
"r"
)
as
input_file
:
with
open
(
fic
,
"r"
)
as
input_file
:
lines
=
[]
nheader
=
0
...
...
@@ -85,8 +83,8 @@ def parse_header(header, spec, list_extract,
date_ids
=
[
head
.
index
(
'date'
)]
except
:
print
(
header
)
print
(
head
)
print
header
print
head
raise
ValueError
(
"Cant find a date in this WDCGG file. "
\
"Please check format"
)
...
...
@@ -102,7 +100,7 @@ def parse_header(header, spec, list_extract,
for
id_extract
in
list_extract
:
try
:
# First look into columns names
columns
.
append
(
head
.
index
(
remap_extract
(
id_extract
)))
columns
.
append
(
head
.
index
(
utils
.
remap_extract
(
id_extract
)))
names
.
append
(
id_extract
.
lower
())
except
:
...
...
@@ -110,7 +108,7 @@ def parse_header(header, spec, list_extract,
# Some files have a name with CH4_Air instead of CH4
columns
.
append
(
head
.
index
(
remap_extract
(
id_extract
)
+
'_air'
))
utils
.
remap_extract
(
id_extract
)
+
'_air'
))
names
.
append
(
id_extract
.
lower
())
except
:
...
...
@@ -119,7 +117,7 @@ def parse_header(header, spec, list_extract,
id_value
=
find_header
(
id_extract
,
header
)
extra
[
id_extract
.
lower
()]
=
id_value
except
Exception
as
e
:
except
Exception
,
e
:
# If cannot find,
# assume default values for unit and timezone
check
.
verbose
(
"Cant extract "
+
id_extract
)
...
...
@@ -133,5 +131,46 @@ def parse_header(header, spec, list_extract,
else
:
extra
[
id_extract
]
=
None
return
names
,
columns
,
date_ids
,
extra
def
remap_head
(
s
):
"""Adapts names to extract values in WDCGG header
"""
if
s
.
lower
()
==
'lat'
:
return
'latitude:'
elif
s
.
lower
()
==
'lon'
:
return
'longitude:'
elif
s
.
lower
()
==
'alt'
:
return
'altitude:'
elif
s
.
lower
()
==
'unit'
:
return
'measurement unit'
elif
s
.
lower
()
==
'tz'
:
return
'time zone'
else
:
return
s
.
lower
()
def
find_header
(
id_extract
,
header
):
"""Finds the value of a constant parameter (e.g., latitude, altitude, etc.)
in the header of a file
"""
for
ln
in
header
:
if
remap_head
(
id_extract
)
in
ln
.
lower
():
value
=
string
.
split
(
ln
.
lower
(),
':'
)[
1
].
strip
()
try
:
return
float
(
value
)
except
ValueError
:
return
value
raise
ValueError
(
"Couldn't extract {}"
.
format
(
id_extract
))
pycif/plugins/obsparsers/wdcgg/utils.py
View file @
071c32e5
# -*- coding: utf-8 -*-
from
__future__
import
absolute_import
import
string
import
os
from
dateutil.tz
import
tzoffset
import
numpy
as
np
import
headers
def
remap_head
(
s
):
"""Adapts names to extract values in WDCGG header
"""
if
s
.
lower
()
==
'lat'
:
return
'latitude:'
elif
s
.
lower
()
==
'lon'
:
return
'longitude:'
elif
s
.
lower
()
==
'alt'
:
return
'altitude:'
elif
s
.
lower
()
==
'unit'
:
return
'measurement unit'
elif
s
.
lower
()
==
'tz'
:
return
'time zone'
else
:
return
s
.
lower
()
def
find_header
(
id_extract
,
header
):
"""Finds the value of a constant parameter (e.g., latitude, altitude, etc.)
in the header of a file
"""
for
ln
in
header
:
if
remap_head
(
id_extract
)
in
ln
.
lower
():
value
=
string
.
split
(
ln
.
lower
(),
':'
)[
1
].
strip
()
try
:
return
float
(
value
)
except
ValueError
:
return
value
raise
ValueError
(
"Couldn't extract {}"
.
format
(
id_extract
))
def
rescale
(
obs_file
,
header
):
def
rescale
(
fic
,
header
):
"""Finds out on what scale the measurement was reported and returns the
corresponding scaling factor.
...
...
@@ -59,10 +19,10 @@ def rescale(obs_file, header):
"""
try
:
scale
=
find_header
(
'scale'
,
header
)
scale
=
headers
.
find_header
(
'scale'
,
header
)
except
:
scale
=
parse_fi
le
(
obs_file
)[
'provider'
]
scale
=
parse_fi
c
(
fic
)[
'provider'
]
if
'04'
in
scale
or
'wmo'
in
scale
.
lower
():
return
1.
...
...
@@ -89,22 +49,22 @@ def rescale(obs_file, header):
return
np
.
nan
def
parse_fi
le
(
obs_file
,
**
kwargs
):
def
parse_fi
c
(
fic
):
"""Parses WDCGG file name and extract corresponding information.
This is based on WDCGG standard naming format as detailed in:
http://ds.data.jma.go.jp/gmd/wdcgg/pub/data/WDCGG_filename_format.pdf
"""
fi
le
split
=
string
.
split
(
os
.
path
.
basename
(
obs_file
),
'.'
)
fi
c
split
=
string
.
split
(
os
.
path
.
basename
(
fic
),
'.'
)
infos
=
{}
infos
[
'stat'
]
=
fi
le
split
[
0
][:
3
]
infos
[
'provider'
]
=
fi
le
split
[
1
].
replace
(
'_'
,
'-'
)
infos
[
'site category'
]
=
fi
le
split
[
2
]
infos
[
'obs type'
]
=
fi
le
split
[
-
5
]
infos
[
'parameter'
]
=
fi
le
split
[
-
4
]
infos
[
'freq'
]
=
fi
le
split
[
-
3
]
infos
[
'stat'
]
=
fi
c
split
[
0
][:
3
]
infos
[
'provider'
]
=
fi
c
split
[
1
].
replace
(
'_'
,
'-'
)
infos
[
'site category'
]
=
fi
c
split
[
2
]
infos
[
'obs type'
]
=
fi
c
split
[
-
5
]
infos
[
'parameter'
]
=
fi
c
split
[
-
4
]
infos
[
'freq'
]
=
fi
c
split
[
-
3
]
return
infos
...
...
pycif/utils/classes/obsparsers.py
View file @
071c32e5
...
...
@@ -80,11 +80,11 @@ class ObsParser(Plugin):
.
register_plugin
(
provider
,
file_format_id
,
parse_module
,
plugin_type
=
'obsparser'
)
def
parse_file
(
self
,
obs_file
,
**
kwargs
):
def
parse_file
(
self
,
fic
,
**
kwargs
):
"""This function does the parsing (and post processing if necessary).
Args:
obs_file
(str): path to input file
fic
(str): path to input file
Keyword Args:
encoding (str): Encoding of input files
...
...
@@ -99,7 +99,7 @@ class ObsParser(Plugin):
Dataframe df[obssite_id, parameter] with t as index
"""
df
=
self
.
do_parse
(
obs_file
,
**
kwargs
)
df
=
self
.
do_parse
(
fic
,
**
kwargs
)
# Removing rows with only NaNs
df
=
df
.
dropna
(
axis
=
1
,
how
=
"all"
)
...
...
@@ -130,7 +130,7 @@ class ObsParser(Plugin):
See the respective documentation
Returns:
dict: {
obs_file
} = df[obssite_id, parameter]
dict: {
fic
} = df[obssite_id, parameter]
"""
# parser = cls.get_parser(provider_name, file_format_id)
...
...
@@ -139,18 +139,18 @@ class ObsParser(Plugin):
verbose
(
"Reading files in "
+
self
.
dir_obs
)
for
obs_file
in
sorted
(
glob
.
glob
(
self
.
dir_obs
+
'*'
)):
for
fic
in
sorted
(
glob
.
glob
(
self
.
dir_obs
+
'*'
)):
try
:
dfs
[
os
.
path
.
basename
(
obs_file
)]
=
\
self
.
parse_file
(
obs_file
,
**
kwargs
)
dfs
[
os
.
path
.
basename
(
fic
)]
=
\
self
.
parse_file
(
fic
,
**
kwargs
)
except
error
.
PluginError
as
e
:
verbose
(
"{} was not loaded for the following reason"
.
format
(
obs_file
))
.
format
(
fic
))
verbose
(
e
.
message
)
if
dfs
!=
{}:
return
pd
.
concat
(
list
(
dfs
.
values
())
)
return
pd
.
concat
(
dfs
.
values
())
else
:
return
pd
.
DataFrame
({})
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment