Skip to content
Snippets Groups Projects
Commit d2dcd199 authored by Paul Eckhardt's avatar Paul Eckhardt
Browse files

REFACTOR: ebas_commandline, new custom args group FILE_OUTPUT

          substritutes common output arguments in all main programs dealing with file output
parent 7419e82c
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python
"""
$Id: ebas_convert.py 1633 2017-05-23 16:12:18Z pe $
$Id: ebas_convert.py 2721 2021-10-22 23:02:49Z pe $
Example for reading an EBAS_1.1 NasaAmes datafile and writing the same file
in a different file format.
......@@ -11,9 +11,8 @@ import argparse
import textwrap
import re
from nilutility.argparse_helper import ParseStrings, ParseIntegers
from ebas.db_fileindex import IndexDb, OperationalError
from ebas.commandline import EbasCommandline
from ebas.io.file import EbasIOFlagOption
from ebas.io.file import EBAS_IOSTYLE_KEEP
from ebas.io.file.nasa_ames import EbasNasaAmes, EbasNasaAmesReadError
from ebas.io.fileset import EbasIOResultSet
from ebas.io.file.base import EBAS_IOFORMAT_NASA_AMES, EBAS_IOFORMAT_CSV, \
......@@ -35,30 +34,6 @@ class ParseDestDirAction(argparse.Action): # pylint: disable-msg=R0903
setattr(namespace, self.dest, values)
def _parse_format_arg(string):
"""
Parser for output format.
Parameters:
string cmd-line parameter string to be checked
Returns:
format constant (FORMAT_NASA, FORMAT_CSV)
Raises:
argparse.ArgumentTypeError
in case of argument error
"""
if string == 'NasaAmes':
return EBAS_IOFORMAT_NASA_AMES
if string == 'CSV':
return EBAS_IOFORMAT_CSV
if string == 'XML':
return EBAS_IOFORMAT_XML
if string == 'NetCDF':
return EBAS_IOFORMAT_NETCDF
if string == 'OPeNDAP':
return EBAS_IOFORMAT_OPENDAP
raise argparse.ArgumentTypeError('unsupported output format: {}'.\
format(string))
def add_private_args(parser, cmdline): # pylint: disable=W0613
# W0613: Unused argument 'cmdline'
"""
......@@ -72,133 +47,10 @@ def add_private_args(parser, cmdline): # pylint: disable=W0613
None
"""
parser_nasa_group = parser.add_argument_group('nasa ames reading options')
parser_nasa_group.add_argument(
'--ignore',
type=ParseStrings(allow_scalar=True, allow_list=True,
allowed_values=['numformat', 'dx0',
'identical-values', 'parameter',
'rescode']),
default=[],
help='ignore certain errors while reading (numformat, dx0, '
'identical-values)\n\n'
'numformat: ignore if number format does not match missing value code'
'\n\n'
'dx0: allow DX=0 with regular data (DX!=0 will always raise an error '
'if not correct)\n\n'
'identical-values: ignore errors related to all values of one variable '
'being equal (e.g. all data missing for one variable, all values 0.0, '
'or all values any other constant value). Those cases usually indicate '
'an ERROR in the data. However if this is investigated and those data '
'should be read, use this flag to downgrade the ERROR severity to '
'WARNING. For the opposite (those cases are definitely identified as '
'ERROR should not be read while the rest of the file should be read, '
'consider the --skip-variables parameter)\n\n'
'parameter: ignore validity of parameters (component names or '
'component/matrix combination.\n\n'
'rescode: ignore wrong resolution codes (not consistent with '
'resolution of data).')
parser_nasa_group.add_argument(
'--skip-variables',
type=ParseIntegers(allow_scalar=True, allow_list=True,
allow_range=False),
default=[],
help='skip the variable(s) referenced by the variable number(s) while '
'reading the file.\n'
'Specify a single variable number or a comma separated list of '
'variable numbers. '
'Variable numbers start with 1 for the variable succeeding '
'end_time (same numbering as in all WARNING/ERROR messages)\n'
'start_time and end_time can not be skipped, numflags may not be '
'skipped')
parser_nasa_group.add_argument('--skip_unitconvert', action='store_true')
parser_nasa_group.add_argument(
'filenames', nargs='*',
help='input file(s), EBAS NASA-Ames format')
parser_output_group = parser.add_argument_group('output options')
parser_output_group.add_argument(
'--format', type=_parse_format_arg, default=EBAS_IOFORMAT_NASA_AMES,
help='file format for output: (NasaAmes, CSV, XML, NetCDF, OPeNDAP), '
'(default: NasaAmes)')
parser_target_group = parser_output_group.add_mutually_exclusive_group()
parser_target_group.add_argument(
'--xmlwrap', action='store_true',
help='wrap output in xml containers')
parser_target_group.add_argument(
'--createfiles', action='store_true',
help='create files instead of output to stdout')
parser_output_group.add_argument(
'--destdir', action=ParseDestDirAction,
help='set output directory for files (only allowed after '
'--createfiles)')
parser_output_group.add_argument(
'--flags',
choices=EbasIOFlagOption.legal_options(), action=None,
default=EbasIOFlagOption('one-or-all'), type=EbasIOFlagOption,
help=textwrap.dedent('''
flag columns style:
* one-or-all (default):
If all variables share the same sequence of flags throughout\
the whole file, use one flag column as last column.
Else, one flag column per variable is used.
This is the default behavior starting from EBAS 3.0.
* compress:
If multiple variables share the same sequence of flags\
throughout the whole file, one flag column after this group\
of variables is used.
This produces files as narrow as possible without losing any flag\
information.
This used to be the default behavior up to EBAS 2.2.
* all
All variables get a dedicated flag column.
* none
No flag columns are exported. Invalid or missing data are both\
reported as MISSING value. This should be used very carfully,\
as information is LOST on export!
Intended for non expert uses, as the easiest approach to process\
only valid data, without bothering about the EBAS flag system.
Note: Detection limit values (flag 781) are exported as\
value/2.0 (only in this case, when no flag information is \
extracted).
As a general rule, a flag column applies ALWAYS to all preceding\
variables after the previous flag column.'''))
parser_output_group.add_argument(
'--fileindex', action=None,
help='file path and name for ebas file index database (sqlite3).'
'Prepend a plus sign (+) to the filename in order to add to an '
'existing database instead of creating a new one.')
def open_indexdb(dbname, logger):
"""
Open and create file index db.
Parameters:
dname db file name
logger logger object
"""
if dbname:
try:
indexdb = IndexDb(dbname)
except OperationalError as excpt:
if str(excpt) == 'unable to open database file':
logger.error("can not open index db {}".format(dbname))
sys.exit(1)
raise
try:
indexdb.create_tables()
except OperationalError as excpt:
if re.match('table .* already exists', str(excpt)):
logger.error("index db {} exists already".format(dbname))
sys.exit(1)
else:
raise
return indexdb
return None
def ebas_convert(cmdline):
"""
Main program for ebas_convert
......@@ -211,26 +63,24 @@ def ebas_convert(cmdline):
"""
logger = logging.getLogger('ebas_convert')
args = cmdline.args
nas_opt = cmdline.get_custom_args('NASA_READ')
output_options = cmdline.get_custom_args('FILE_OUTPUT')
exitcode = 0 # number of failed files (max 255)
indexdb = open_indexdb(args.fileindex, logger)
fileset = EbasIOResultSet(
outformat=args.format, createfiles=args.createfiles,
destdir=args.destdir, xmlwrap=args.xmlwrap, flags=args.flags,
indexdb=indexdb)
for filename in args.filenames:
nas = read_file(args, logger, filename)
fileset = EbasIOResultSet(**output_options)
for filename in args.filenames:
nas = read_file(nas_opt, logger, filename)
if nas is None:
exitcode += 1
else:
outputfile = fileset.new_result()
outputfile.from_ebasfile(nas)
fileset.add_ebasfile(nas)
fileset.extract_all()
exitcode = min(255, exitcode)
sys.exit(exitcode)
def read_file(args, logger, filename):
def read_file(nas_opt, logger, filename):
"""
Reads a single file.
Parameters:
......@@ -241,33 +91,11 @@ def read_file(args, logger, filename):
EbasNasaAmes file object or None in case of erros.
"""
logger.info('reading input file {}'.format(filename))
nas = EbasNasaAmes()
ignore_numformat = False
if 'numformat' in args.ignore:
ignore_numformat = True
ignore_dx0 = False
if 'dx0' in args.ignore:
ignore_dx0 = True
ignore_identicalvalues = False
if 'identical-values' in args.ignore:
ignore_identicalvalues = True
ignore_parameter = False
if 'parameter' in args.ignore:
ignore_parameter = True
ignore_rescode = False
if 'rescode' in args.ignore:
ignore_rescode = True
skip_variables = [args.skip_variables] \
if isinstance(args.skip_variables, int) else args.skip_variables
nas = EbasNasaAmes(**nas_opt['nas_init'])
try:
nas.read(filename, skip_unitconvert=args.skip_unitconvert,
ignore_parameter=ignore_parameter,
ignore_rescode=ignore_rescode,
ignore_numformat=ignore_numformat, ignore_dx0=ignore_dx0,
ignore_identicalvalues=ignore_identicalvalues,
skip_variables=skip_variables)
nas.read(filename, **nas_opt['nas_read'])
except (IOError, EbasNasaAmesReadError) as excpt:
logger.error("file {}: {}".format(filename, str(excpt)))
return None
......@@ -275,7 +103,8 @@ def read_file(args, logger, filename):
EbasCommandline(
ebas_convert,
custom_args=['CONFIG', 'LOGGING', 'TIME_CRIT'],
custom_args=['CONFIG', 'LOGGING', 'TIME_CRIT', 'NASA_READ',
'FILE_OUTPUT_KEEPCOLUMNS'],
private_args=add_private_args,
help_description='%(prog)s example for converting a NasaAmes datafile to '
'different dataformats', version=__version__).run()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment