REFACTOR: ebas_commandline, new custom args group FILE_OUTPUT

substritutes common output arguments in all main programs dealing with file output

REFACTOR: ebas_commandline, new custom args group FILE_OUTPUT
d2dcd199 · Paul Eckhardt · 7419e82c · d2dcd199
Commit d2dcd199 authored 3 years ago by Paul Eckhardt
--- a/Examples/ebas_convert.py
+++ b/Examples/ebas_convert.py
 #!/usr/bin/env python
 """
-$Id: ebas_convert.py 1633 2017-05-23 16:12:18Z pe $
+$Id: ebas_convert.py 2721 2021-10-22 23:02:49Z pe $

 Example for reading an EBAS_1.1 NasaAmes datafile and writing the same file
 in a different file format.
@@ -11,9 +11,8 @@ import argparse
 import textwrap
 import re
 from nilutility.argparse_helper import ParseStrings, ParseIntegers
-from ebas.db_fileindex import IndexDb, OperationalError
 from ebas.commandline import EbasCommandline
-from ebas.io.file import EbasIOFlagOption
+from ebas.io.file import EBAS_IOSTYLE_KEEP
 from ebas.io.file.nasa_ames import EbasNasaAmes, EbasNasaAmesReadError
 from ebas.io.fileset import EbasIOResultSet
 from ebas.io.file.base import EBAS_IOFORMAT_NASA_AMES, EBAS_IOFORMAT_CSV, \
@@ -35,30 +34,6 @@ class ParseDestDirAction(argparse.Action):  # pylint: disable-msg=R0903
            setattr(namespace, self.dest, values)


-def _parse_format_arg(string):
-    """
-    Parser for output format.
-    Parameters:
-        string   cmd-line parameter string to be checked
-    Returns:
-        format   constant (FORMAT_NASA, FORMAT_CSV)
-    Raises:
-        argparse.ArgumentTypeError
-                 in case of argument error
-    """
-    if string == 'NasaAmes':
-        return EBAS_IOFORMAT_NASA_AMES
-    if string == 'CSV':
-        return EBAS_IOFORMAT_CSV
-    if string == 'XML':
-        return EBAS_IOFORMAT_XML
-    if string == 'NetCDF':
-        return EBAS_IOFORMAT_NETCDF
-    if string == 'OPeNDAP':
-        return EBAS_IOFORMAT_OPENDAP
-    raise argparse.ArgumentTypeError('unsupported output format: {}'.\
-                                                        format(string))
-
 def add_private_args(parser, cmdline):  # pylint: disable=W0613
    # W0613: Unused argument 'cmdline'
    """
@@ -72,133 +47,10 @@ def add_private_args(parser, cmdline):  # pylint: disable=W0613
        None
    """
    parser_nasa_group = parser.add_argument_group('nasa ames reading options')
-    parser_nasa_group.add_argument(
-        '--ignore',
-        type=ParseStrings(allow_scalar=True, allow_list=True,
-                          allowed_values=['numformat', 'dx0',
-                                          'identical-values', 'parameter',
-                                          'rescode']),
-        default=[],
-        help='ignore certain errors while reading (numformat, dx0, '
-        'identical-values)\n\n'
-        'numformat: ignore if number format does not match missing value code'
-        '\n\n'
-        'dx0: allow DX=0 with regular data (DX!=0 will always raise an error '
-        'if not correct)\n\n'
-        'identical-values: ignore errors related to all values of one variable '
-        'being equal (e.g. all data missing for one variable, all values 0.0, '
-        'or all values any other constant value). Those cases usually indicate '
-        'an ERROR in the data. However if this is investigated and those data '
-        'should be read, use this flag to downgrade the ERROR severity to '
-        'WARNING. For the opposite (those cases are definitely identified as '
-        'ERROR should not be read while the rest of the file should be read, '
-        'consider the --skip-variables parameter)\n\n'
-        'parameter: ignore validity of parameters (component names or '
-        'component/matrix combination.\n\n'
-        'rescode: ignore wrong resolution codes (not consistent with '
-        'resolution of data).')
-    parser_nasa_group.add_argument(
-        '--skip-variables',
-        type=ParseIntegers(allow_scalar=True, allow_list=True,
-                           allow_range=False),
-        default=[],
-        help='skip the variable(s) referenced by the variable number(s) while '
-        'reading the file.\n'
-        'Specify a single variable number or a comma separated list of '
-        'variable numbers. '
-        'Variable numbers start with 1 for the variable succeeding '
-        'end_time (same numbering as in all WARNING/ERROR messages)\n'
-        'start_time and end_time can not be skipped, numflags may not be '
-        'skipped')
-    parser_nasa_group.add_argument('--skip_unitconvert', action='store_true')
    parser_nasa_group.add_argument(
        'filenames', nargs='*',
        help='input file(s), EBAS NASA-Ames format')

-    parser_output_group = parser.add_argument_group('output options')
-    parser_output_group.add_argument(
-        '--format', type=_parse_format_arg, default=EBAS_IOFORMAT_NASA_AMES,
-        help='file format for output: (NasaAmes, CSV, XML, NetCDF, OPeNDAP), '
-             '(default: NasaAmes)')
-    parser_target_group = parser_output_group.add_mutually_exclusive_group()
-    parser_target_group.add_argument(
-        '--xmlwrap', action='store_true',
-        help='wrap output in xml containers')
-    parser_target_group.add_argument(
-        '--createfiles', action='store_true',
-        help='create files instead of output to stdout')
-    parser_output_group.add_argument(
-        '--destdir', action=ParseDestDirAction,
-        help='set output directory for files (only allowed after '
-             '--createfiles)')
-    parser_output_group.add_argument(
-        '--flags',
-        choices=EbasIOFlagOption.legal_options(), action=None,
-        default=EbasIOFlagOption('one-or-all'), type=EbasIOFlagOption,
-        help=textwrap.dedent('''
-            flag columns style:
-            
-            * one-or-all (default):
-            If all variables share the same sequence of flags throughout\
-            the whole file, use one flag column as last column.
-            Else, one flag column per variable is used.
-            This is the default behavior starting from EBAS 3.0. 
-            
-            * compress:
-            If multiple variables share the same sequence of flags\
-            throughout the whole file, one flag column after this group\
-            of variables is used.
-            This produces files as narrow as possible without losing any flag\
-            information.
-            This used to be the default behavior up to EBAS 2.2.
-                
-            * all
-            All variables get a dedicated flag column.
-            
-            * none
-            No flag columns are exported. Invalid or missing data are both\
-            reported as MISSING value. This should be used very carfully,\
-            as information is LOST on export!
-            Intended for non expert uses, as the easiest approach to process\
-            only valid data, without bothering about the EBAS flag system.
-            Note: Detection limit values (flag 781) are exported as\
-            value/2.0 (only in this case, when no flag information is \
-            extracted).
-
-            As a general rule, a flag column applies ALWAYS to all preceding\
-            variables after the previous flag column.'''))
-    parser_output_group.add_argument(
-        '--fileindex', action=None,
-        help='file path and name for ebas file index database (sqlite3).'
-             'Prepend a plus sign (+) to the filename in order to add to an '
-             'existing database instead of creating a new one.')
-
-def open_indexdb(dbname, logger):
-    """
-    Open and create file index db.
-    Parameters:
-        dname    db file name
-        logger   logger object
-    """
-    if dbname:
-        try:
-            indexdb = IndexDb(dbname)
-        except OperationalError as excpt:
-            if str(excpt) == 'unable to open database file':
-                logger.error("can not open index db {}".format(dbname))
-                sys.exit(1)
-            raise
-        try:
-            indexdb.create_tables()
-        except OperationalError as excpt:
-            if re.match('table .* already exists', str(excpt)):
-                logger.error("index db {} exists already".format(dbname))
-                sys.exit(1)
-            else:
-                raise
-        return indexdb
-    return None
-
 def ebas_convert(cmdline):
    """
    Main program for ebas_convert
@@ -211,26 +63,24 @@ def ebas_convert(cmdline):
    """
    logger = logging.getLogger('ebas_convert')
    args = cmdline.args
+    nas_opt = cmdline.get_custom_args('NASA_READ')
+    output_options = cmdline.get_custom_args('FILE_OUTPUT')

    exitcode = 0 # number of failed files (max 255)
-    indexdb = open_indexdb(args.fileindex, logger)
-    fileset = EbasIOResultSet(
-        outformat=args.format, createfiles=args.createfiles,
-        destdir=args.destdir, xmlwrap=args.xmlwrap, flags=args.flags,
-        indexdb=indexdb)
-    for filename in args.filenames:

-        nas = read_file(args, logger, filename)
+    fileset = EbasIOResultSet(**output_options)
+
+    for filename in args.filenames:
+        nas = read_file(nas_opt, logger, filename)
        if nas is None:
            exitcode += 1
        else:
-            outputfile = fileset.new_result()
-            outputfile.from_ebasfile(nas)
+            fileset.add_ebasfile(nas)
    fileset.extract_all()
    exitcode = min(255, exitcode)
    sys.exit(exitcode)

-def read_file(args, logger, filename):
+def read_file(nas_opt, logger, filename):
    """
    Reads a single file.
    Parameters:
@@ -241,33 +91,11 @@ def read_file(args, logger, filename):
        EbasNasaAmes file object or None in case of erros.
    """
    logger.info('reading input file {}'.format(filename))
-    nas = EbasNasaAmes()

-    ignore_numformat = False
-    if 'numformat' in args.ignore:
-        ignore_numformat = True
-    ignore_dx0 = False
-    if 'dx0' in args.ignore:
-        ignore_dx0 = True
-    ignore_identicalvalues = False
-    if 'identical-values' in args.ignore:
-        ignore_identicalvalues = True
-    ignore_parameter = False
-    if 'parameter' in args.ignore:
-        ignore_parameter = True
-    ignore_rescode = False
-    if 'rescode' in args.ignore:
-        ignore_rescode = True
-    skip_variables = [args.skip_variables] \
-        if isinstance(args.skip_variables, int) else args.skip_variables
+    nas = EbasNasaAmes(**nas_opt['nas_init'])

    try:
-        nas.read(filename, skip_unitconvert=args.skip_unitconvert,
-                 ignore_parameter=ignore_parameter,
-                 ignore_rescode=ignore_rescode,
-                 ignore_numformat=ignore_numformat, ignore_dx0=ignore_dx0,
-                 ignore_identicalvalues=ignore_identicalvalues,
-                 skip_variables=skip_variables)
+        nas.read(filename, **nas_opt['nas_read'])
    except (IOError, EbasNasaAmesReadError) as excpt:
        logger.error("file {}: {}".format(filename, str(excpt)))
        return None
@@ -275,7 +103,8 @@ def read_file(args, logger, filename):

 EbasCommandline(
    ebas_convert,
-    custom_args=['CONFIG', 'LOGGING', 'TIME_CRIT'],
+    custom_args=['CONFIG', 'LOGGING', 'TIME_CRIT', 'NASA_READ',
+                 'FILE_OUTPUT_KEEPCOLUMNS'],
    private_args=add_private_args,
    help_description='%(prog)s example for converting a NasaAmes datafile to '
    'different dataformats', version=__version__).run()