Skip to content
Snippets Groups Projects
Commit d3c123cf authored by Aurelien Chauvigne's avatar Aurelien Chauvigne
Browse files

adapt code to process from ACTRIS level 0 file

parent c8e068d9
No related branches found
No related tags found
No related merge requests found
......@@ -2,3 +2,6 @@
*.pyc
src/__pycache__/
src/*/__pycache__/
# test files
tests/
#!/bin/bash
SCRIPTDIR="$( cd "$( /usr/bin/dirname ${BASH_SOURCE} )" && pwd )"
PYTHONPATH=/usr/local//modules/rhel7/Python/3.8.0/lib//site-packages
PATH=/usr/local//modules/rhel7/Python/3.8.0/bin:/home/aurelien/bin:/home/aurelien/bin
LD_LIBRARY_PATH=/usr/local//modules/rhel7/Python/3.8.0/lib::/usr/local//modules/rhel7/gcc/9.2.0/lib:/usr/local//modules/rhel7/gcc/9.2.0/lib64
export PYTHONPATH PATH LD_LIBRARY_PATH
# PYTHONPATH=/usr/local//modules/rhel7/Python/3.8.0/lib//site-packages
# PATH=/usr/local//modules/rhel7/Python/3.8.0/bin:/home/aurelien/bin:/home/aurelien/bin
# LD_LIBRARY_PATH=/usr/local//modules/rhel7/Python/3.8.0/lib::/usr/local//modules/rhel7/gcc/9.2.0/lib:/usr/local//modules/rhel7/gcc/9.2.0/lib64
# export PYTHONPATH PATH LD_LIBRARY_PATH
/usr/local/modules/rhel7/Python/3.8.0/bin/python3 src/rawto012.py
......
......@@ -3,13 +3,20 @@
import datetime as dt
import sys, os
from pathlib import Path
import pandas as pd
import click
pkgdir = os.path.dirname(__file__)
d = os.path.abspath(os.path.join(pkgdir, "ptrms_lib"))
if d not in sys.path:
sys.path.append(os.path.join(d))
from ptrms_lib import calibration as calib
from ptrms_lib import ptrms
from ptrms_lib import utils
from datetime import datetime, timedelta
from optparse import OptionParser
......@@ -18,140 +25,99 @@ DATE_CMD_FMT = ["%Y%m%d", "%Y-%m-%d"]
class ptrms_lib():
def __init__(self):
def __init__(self, output_dir, conf_file):
"""Run SIRTA PTR-MS processing."""
# check input arguments
# ---------------------------------------------------------------------------------
# default value
zip_input_fmt = False
if len(zip_files) == 0 and len(ptrms_files) == 0:
click.echo("ERROR: No input files provided.")
sys.exit(1)
elif len(zip_files) > 0 and len(ptrms_files) > 0:
click.echo(
"ERROR: Both zip and ptrms files provided. Only one type of input is allowed." # noqa
)
sys.exit(1)
elif len(zip_files) == 0 and len(ptrms_files) > 0:
zip_input_fmt = False
click.echo("reading input files in raw format")
elif len(zip_files) > 0 and len(ptrms_files) == 0:
zip_input_fmt = True
click.echo("reading input files in zip format")
else:
click.echo("ERROR: Unknown error. Check input files provided.")
sys.exit(1)
# checking valve
if len(valve_files) == 0 and not zip_input_fmt:
click.echo("WARNING: No valve file provided.")
# read configuration file
# ---------------------------------------------------------------------------------
conf = utils.load_conf(config)
# date to process
# ---------------------------------------------------------------------------------
date_start = date
date_end = date + dt.timedelta(days=1)
cur_dir = os.getcwd()
# str version
date_start_str = date_start.strftime("%Y%m%d")
date_end_str = date_end.strftime("%Y%m%d")
self.conf = utils.load_conf(cur_dir + conf_file)
# working directories and files
# ---------------------------------------------------------------------------------
# tmp dir
tmp_dir = Path(conf["paths"]["tmp"])
tmp_dir = Path(self.conf["paths"]["tmp"])
wrk_dir = tmp_dir / "ptrms"
# output dir
output_dir_csv = output_dir / "csv"
output_dir_csv = Path(output_dir) / "csv"
if not output_dir_csv.exists():
output_dir_csv.mkdir()
self.output_dir_data = Path(output_dir) / "data"
if not self.output_dir_data.exists():
self.output_dir_data.mkdir()
# data
data_dir = Path(conf["paths"]["data"]).resolve()
data_dir = Path(self.conf["paths"]["data"]).resolve()
# quality
quality_file = data_dir / conf["paths"]["quality"]["file"]
quality_file = data_dir / self.conf["paths"]["quality"]["file"]
# calibration
calib_file = data_dir / conf["paths"]["calibration"]["file"]
calib_file = data_dir / self.conf["paths"]["calibration"]["file"]
# accuracy
accuracy_file = (
Path(conf["paths"]["accuracy"]["dir"]).resolve()
/ conf["paths"]["accuracy"]["file"]
Path(self.conf["paths"]["accuracy"]["dir"]).resolve()
/ self.conf["paths"]["accuracy"]["file"]
)
# time correction files
time_corr_files = sorted(
Path(conf["paths"]["time_corr"]["dir"])
self.time_corr_files = sorted(
Path(self.conf["paths"]["time_corr"]["dir"])
.resolve()
.glob(conf["paths"]["time_corr"]["mask"])
.glob(self.conf["paths"]["time_corr"]["mask"])
)
# download and/or read quality and qualibration
# ---------------------------------------------------------------------------------
quality = utils.download_and_read(
conf["paths"]["quality"]["url"],
self.quality = utils.download_and_read(
self.conf["paths"]["quality"]["url"],
quality_file,
file_type="quality",
force_download=conf["paths"]["quality"]["force_download"],
force_download=self.conf["paths"]["quality"]["force_download"],
)
calibration = utils.download_and_read(
conf["paths"]["calibration"]["url"],
self.calibration = utils.download_and_read(
self.conf["paths"]["calibration"]["url"],
calib_file,
file_type="calibration",
force_download=conf["paths"]["calibration"]["force_download"],
force_download=self.conf["paths"]["calibration"]["force_download"],
)
accuracy_ref = ptrms.read_accuracy(accuracy_file)
self.accuracy_ref = ptrms.read_accuracy(accuracy_file)
self.bck_save = cur_dir + '/tests/SIRTA/backgrounds/'+ 'background_save.csv'
# extract raw data from zip files
# ---------------------------------------------------------------------------------
ptrms_ext = conf["paths"]["ptrms_ext"]
valve_ext = conf["paths"]["valve_ext"]
# uncompress the data if zip provided
if zip_input_fmt:
extracted_files = utils.extract_from_zip(
zip_files, wrk_dir, [ptrms_ext, valve_ext]
)
ptrms_files = sorted(extracted_files[ptrms_ext])
valve_files = sorted(extracted_files[valve_ext])
else:
ptrms_files = sorted(ptrms_files)
valve_files = sorted(valve_files)
if zip_input_fmt and len(ptrms_files) == 0:
click.echo("ERROR: No ptrms files found in zip.")
sys.exit(1)
if len(valve_files) == 0 and not bck_save.exists():
click.echo("ERROR: No valve file provided and no background file.")
sys.exit(1)
def process(self, ptrms_files, valve_files, output_dir_csv, levels, hour=None):
self.ptrms_ext = self.conf["paths"]["ptrms_ext"]
self.valve_ext = self.conf["paths"]["valve_ext"]
def process(self, indata, valve_files, output_dir_csv, levels, hour=None):
# read and check raw files
# ---------------------------------------------------------------------------------
ptrms_data_raw = ptrms.read_ptrms_files(ptrms_files).loc[date_start:date_end]
indata['time'] = [datetime(2024, 1, 1) + timedelta(days=d) for d in indata['Days from the file reference point (start_time)']]
ptrms_data_raw = pd.DataFrame.from_dict(indata)
ptrms_data_raw = ptrms_data_raw.set_index('time')
# generate dates
date_start_str = ptrms_data_raw.index[0].strftime("%Y%m%d")
date_end_str = (ptrms_data_raw.index[0] + timedelta(days=1)).strftime("%Y%m%d")
# check duplicated timesteps
file_dup_ts = output_dir_csv / f"dup_ts_{date_start_str}_{date_end_str}.csv"
file_dup_ts = Path(output_dir_csv) / f"dup_ts_{date_start_str}_{date_end_str}.csv"
ptrms_data_raw = ptrms.check_and_correct_duplicated(
ptrms_data_raw, file_dup_ts=file_dup_ts
)
# check monotonic time
file_nomono = output_dir_csv / f"not-monotonic_{date_start_str}_{date_end_str}.csv"
file_nomono = Path(output_dir_csv) / f"not-monotonic_{date_start_str}_{date_end_str}.csv"
ptrms_data_raw = ptrms.check_and_correct_monotonic(
ptrms_data_raw, file_nomono=file_nomono
)
# correct timestep if needed
ptrms_data_raw = ptrms.correct_ptrms_time(ptrms_data_raw, time_corr_files)
# read valve data
# ---------------------------------------------------------------------------------
valve_data = ptrms.read_valve_sirta(valve_files)
cur_dir = os.getcwd()
valve_data = ptrms.read_valve_sirta(cur_dir + valve_files)
valve_data = valve_data[~valve_data.index.duplicated(keep="first")]
# apply QC on PTR-MS species
# ---------------------------------------------------------------------------------
ptrms_data = ptrms.apply_qc_invalid(ptrms_data_raw, quality, conf["cols_species"])
ptrms_data = ptrms.apply_qc_invalid(ptrms_data_raw, self.quality, self.conf["cols_species"])
# processing to NCPS
# ---------------------------------------------------------------------------------
......@@ -160,245 +126,89 @@ class ptrms_lib():
ptrms_data_raw["mz_21"],
ptrms_data_raw["mz_37"],
"30min",
conf["cols_species"],
conf,
self.conf["cols_species"],
self.conf,
)
# sync data with valve
# ---------------------------------------------------------------------------------
ncps_data = ptrms.sync_valve(valve_data, ncps_data, rm_unknown=True)
ncps_data = ptrms.apply_qc_valve(ncps_data, quality)
ncps_data = ptrms.apply_qc_valve(ncps_data, self.quality)
valve_status = ptrms.get_valve_status(ncps_data, "valve")
# quality on BLANK (invalidate some period of blank)
ncps_data = ptrms.apply_qc_invalidate_blank(
ncps_data,
valve_status,
conf["parameters"]["flush_minute"],
conf["cols_species"],
quality,
self.conf["parameters"]["flush_minute"],
self.conf["cols_species"],
self.quality,
)
# remove background
# ---------------------------------------------------------------------------------
ncps_nobck_data, bck_data = ptrms.remove_background(
ncps_data, valve_status, conf, bck_save
ncps_data, valve_status, self.conf, self.bck_save
)
# save background
file_bck = output_dir_csv / f"bck_{date_start_str}_{date_end_str}.csv"
file_bck = Path(output_dir_csv) / f"bck_{date_start_str}_{date_end_str}.csv"
bck_data.to_csv(file_bck, float_format="%.3f")
# get calibration coefficients
# ---------------------------------------------------------------------------------
snorm_data = calib.get_snorm(calibration, ncps_nobck_data, conf, create_plot=False)
snorm_data = calib.get_snorm(self.calibration, ncps_nobck_data, self.conf, create_plot=False)
# final concentration and ancillary data
# ---------------------------------------------------------------------------------
# concentration ppb
ppb_data = ptrms.get_ppb(ncps_nobck_data, snorm_data, conf)
ppb_data = ptrms.get_ppb(ncps_nobck_data, snorm_data, self.conf)
file_ppb = output_dir_csv / f"ppb_{date_start_str}_{date_end_str}.csv"
ppb_data.to_csv(file_ppb, float_format="%.3f")
# file_ppb = Path(output_dir_csv) / f"ppb_{date_start_str}_{date_end_str}.csv"
# ppb_data.to_csv(file_ppb, float_format="%.3f")
# LOD
mz_21_sm = ptrms_data_raw["mz_21"].rolling("30min", center=True).mean()
mz_37_sm = ptrms_data_raw["mz_37"].rolling("30min", center=True).mean()
lod = ptrms.get_lod(bck_data, snorm_data, mz_21_sm, mz_37_sm, 1, conf)
lod = ptrms.get_lod(bck_data, snorm_data, mz_21_sm, mz_37_sm, 1, self.conf)
file_lod = output_dir_csv / f"lod_{date_start_str}_{date_end_str}.csv"
lod.to_csv(file_lod, float_format="%.3f")
# file_lod = Path(output_dir_csv) / f"lod_{date_start_str}_{date_end_str}.csv"
# lod.to_csv(file_lod, float_format="%.3f")
# precision
rsd = ptrms.get_rsd(
ppb_data, bck_data, snorm_data, lod, mz_21_sm, mz_37_sm, 1, conf
ppb_data, bck_data, snorm_data, lod, mz_21_sm, mz_37_sm, 1, self.conf
)
file_rsd = output_dir_csv / f"rsd_{date_start_str}_{date_end_str}.csv"
rsd.to_csv(file_rsd, float_format="%.3f")
# file_rsd = Path(output_dir_csv) / f"rsd_{date_start_str}_{date_end_str}.csv"
# rsd.to_csv(file_rsd, float_format="%.3f")
# accuracy
accuracy = ptrms.get_accuracy(ppb_data, accuracy_ref, lod, conf)
accuracy = ptrms.get_accuracy(ppb_data, self.accuracy_ref, lod, self.conf)
file_accuracy = output_dir_csv / f"accuracy_{date_start_str}_{date_end_str}.csv"
accuracy.to_csv(file_accuracy, float_format="%.3f")
# file_accuracy = Path(output_dir_csv) / f"accuracy_{date_start_str}_{date_end_str}.csv"
# accuracy.to_csv(file_accuracy, float_format="%.3f")
# uncertainty
uncertainty = ptrms.get_uncertainty(ppb_data, lod, rsd, accuracy)
file_uncertainty = (
output_dir_csv / f"uncertainty_{date_start_str}_{date_end_str}.csv"
)
uncertainty.to_csv(file_uncertainty, float_format="%.3f")
# creation csv output
# ---------------------------------------------------------------------------------
output_dir_actris = output_dir / "data"
if not output_dir_actris.exists():
output_dir_actris.mkdir()
output_file_actris = (
output_dir_actris / f"ptrms-actris_{date_start_str}_{date_end_str}.csv"
)
ptrms.create_aeris_csv(
ppb_data, lod, rsd, accuracy, quality, conf, output_file_actris
)
def get_parser(app, SRC_DIR):
"""
Sets the available program options, and their default values
"""
parser = OptionParser()
parser.usage = "python3 " + app + ".py [options] <infiles>" + os.linesep
parser.usage += os.linesep
parser.usage += "with :" + os.linesep
parser.usage += (
"\t<infiles> full path to the trace gazes file(s) to convert" + os.linesep
)
parser.add_option(
"-d",
"--date",
action="store",
default=None,
type="str",
dest="prod_id",
help="Product_id as <STATION>_<INSTRUMENT>_<LEVEL>",
)
parser.add_option(
"-o",
"--out-dir",
action="store",
default=SRC_DIR,
type="str",
dest="outdir",
help="Output directory",
)
@click.command()
@click.option(
"-d",
"--date",
type=click.DateTime(formats=DATE_CMD_FMT),
required=True,
help="Date to process.",
)
@click.option(
"-c",
"--config",
required=True,
type=click.Path(
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
resolve_path=True,
path_type=Path,
),
help="Path to configuration file.",
)
@click.option(
"-o",
"--output-dir",
required=True,
type=click.Path(
exists=True,
file_okay=False,
dir_okay=True,
writable=True,
resolve_path=True,
path_type=Path,
),
help="Path to output directory.",
)
@click.option(
"-b",
"--bck-save",
required=True,
type=click.Path(
file_okay=True,
dir_okay=False,
readable=True,
writable=True,
resolve_path=True,
path_type=Path,
),
help="Pathe to save last found background timestep.",
)
@click.option(
"--zip-files",
multiple=True,
type=click.Path(
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
resolve_path=True,
path_type=Path,
),
)
@click.option(
"--ptrms-files",
multiple=True,
type=click.Path(
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
resolve_path=True,
path_type=Path,
),
)
@click.option(
"--valve-files",
multiple=True,
type=click.Path(
exists=True,
file_okay=True,
dir_okay=False,
readable=True,
resolve_path=True,
path_type=Path,
),
)
infiles = sys.argv[-1]
# check the number of command line arguments
if len(sys.argv) < 1:
parser.print_help()
raise ValueError("Missing command-line options")
return infiles, parser.parse_args()
def main():
SRC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..","/tests/")
infiles, (options, arg) = ptrms_lib.get_parser(__APP__, SRC_DIR)
prog = ptrms_lib(options.prod_id)
prog.process(infiles, options.outdir, levels=[0])
def test():
"""
Unit test
"""
# SRC_DIR = os.path.dirname(__file__)
prod_id = "GIF_PTRMS_RAW"
SRC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..")
# --- load the raw data
infiles = SRC_DIR + "/tests/SIRTA/inputs/20240315_000004_routine.ptr"
prog = PtrmsConverter(prod_id)
outdir = "/home/aurelien/test/GIF_PTRMS_CAMS"
prog.process(infiles, outdir, levels=[0])
if __name__ == "__main__":
main()
\ No newline at end of file
# file_uncertainty = (
# Path(output_dir_csv) / f"uncertainty_{date_start_str}_{date_end_str}.csv"
# )
# uncertainty.to_csv(file_uncertainty, float_format="%.3f")
# return ppb and uncertainties
return ppb_data, lod, rsd, accuracy, uncertainty
# # creation csv output
# # ---------------------------------------------------------------------------------
# output_dir_actris = self.output_dir_data
# if not output_dir_actris.exists():
# output_dir_actris.mkdir()
# output_file_actris = (
# Path(output_dir_actris) / f"ptrms-actris_{date_start_str}_{date_end_str}.csv"
# )
# ptrms.create_aeris_csv(
# ppb_data, lod, rsd, accuracy, self.quality, self.conf, output_file_actris
# )
\ No newline at end of file
......@@ -202,9 +202,14 @@ class Converter(object):
# start_time = time.strftime("%Y%m%d%H%M%S", dt.timetuple())
# print("\nstart_time : ",start_time)
# start_day = time.strftime("%Y 01 01", dt.timetuple())
start_time = time.strftime(
"%Y%m%d%H%M%S", time.gmtime(numpy.round(t_start)))
start_day = time.strftime("%Y 01 01", time.gmtime(t_start))
if year is not None:
start_time = time.strftime(
"{0:s}%m%d%H%M%S".format(str(year)), time.gmtime(numpy.round(t_start)))
start_day = "{0:s} 01 01".format(str(year))
else:
start_time = time.strftime(
"%Y%m%d%H%M%S", time.gmtime(numpy.round(t_start)))
start_day = time.strftime("%Y 01 01", time.gmtime(t_start))
return start_time, start_day
......@@ -348,7 +353,6 @@ class Converter(object):
@param args other arguments to pass to the reader interface constructor
@return a buffer dictionnary
"""
print('ok2')
# make sure infiles is a list
if isinstance(infiles, str):
infiles = [infiles]
......
......@@ -45,14 +45,13 @@ from product.AMESProduct import AMESProduct
from iointerface.AMESReader import AMESReader
from product.Product import Product
from misc import Standardizer, TimeConverter
import algo.ptrms_lib.ptrms_lib.ptrms as ptrms
from uploader.DataSeeker import DataSeeker
import json
import re
from algo.ptrms_lib import run_ptrms
from algo.ptrms_lib import ptrms_lib
from pathlib import Path
__VERSION__ = "0.0.0"
__APP__ = os.path.splitext(os.path.basename(__file__))[0]
......@@ -94,7 +93,7 @@ class PtrmsConverter(Converter.Converter):
for prod_id in prod_ids:
Converter.Converter.__init__(self, prod_id, AMESProduct)
def process(self, infiles, outdir, levels, hour=None):
def process(self, infiles, conf_file, valve_files, outdir, levels, hour=None):
"""
@brief convert a list of files
@param infiles the files to convert
......@@ -109,7 +108,7 @@ class PtrmsConverter(Converter.Converter):
if __DEBUG__:
print("*" * 20 + " LEVEL %d " % lvl + "*" * 20)
try:
self.process_lvl(infiles, outdir, lvl, hour)
self.process_lvl(infiles, conf_file, valve_files, outdir, lvl, hour)
except Exception as e:
if __DEBUG__:
msg = " > " + str(e) + " : " + os.linesep + "ABORT"
......@@ -125,7 +124,7 @@ class PtrmsConverter(Converter.Converter):
+ "*" * 20
)
def process_lvl(self, infiles, outdir, lvl, hour):
def process_lvl(self, infiles, conf_file, valve_files, outdir, lvl, hour):
"""
@brief convert a list of files
@param infiles the files to convert
......@@ -133,25 +132,78 @@ class PtrmsConverter(Converter.Converter):
@param year year of the eventual yearly synthesis
"""
# load data
reader = AMESReader(infiles[0])
ds_names = reader.get_ds_names()
self.reader = AMESReader(infiles[0])
ds_names = self.reader.get_ds_names()
indata = Converter.Converter.load_indata(self, infiles, ds_names, reader_interface=AMESReader)
self.var_ids_mz = []
self.var_ids_mz_name = []
self.var_ids_env_tag = []
self.var_ids_env = []
for var_id in ds_names:
if 'start_time' in var_id:
indata[Product.START_ACQ_ID] = indata[var_id]
elif 'end_time' in var_id:
indata[Product.END_ACQ_ID] = indata[var_id]
elif "temperature" in var_id and "Location=inlet" in var_id:
self.var_ids_env.append(var_id)
self.var_ids_env_tag.append('T_inlet')
elif "pressure" in var_id and "Location=inlet" in var_id:
self.var_ids_env.append(var_id)
self.var_ids_env_tag.append('P_inlet')
elif 'count_rate' in var_id and "numflag" not in var_id:
m = re.search(r'_[0-9]+_',var_id)
if m != None:
mass_num = m.group(0).replace('_','')
indata['mz_' + mass_num] = indata[var_id]
self.var_ids_mz_name.append(var_id)
self.var_ids_mz.append('mz_' + mass_num)
else:
if 'methanal' in var_id:
mass_num = '31'
elif 'methanol' in var_id:
mass_num = '33'
elif 'acetonitrile' in var_id:
mass_num = '42'
elif 'ethanal' in var_id:
mass_num = '45'
elif 'dimethylsulfide' in var_id:
mass_num = '63'
elif 'methyl_acetate' in var_id:
mass_num = '75'
elif 'benzene' in var_id:
mass_num = '79'
elif 'monoterpenes' in var_id:
mass_num = '137'
indata['mz_' + mass_num] = indata[var_id]
self.var_ids_mz.append('mz_' + mass_num)
start_dt = TimeConverter.from_epoch(indata[Product.START_ACQ_ID])
year = start_dt[0].year
dt_start_date = datetime(year, 1, 1)
year = int(self.reader.attributes['Startdate'][0:4])
# convert to ppm concentration
ptrms_lib()
prog_ptrms_lib = ptrms_lib(outdir, conf_file)
ppb_data, lod, rsd, accuracy, uncertainty = prog_ptrms_lib.process(indata, valve_files, outdir, levels=[0])
# group outputs
outdata = ppb_data
outdata[Product.START_ACQ_ID] = indata[Product.START_ACQ_ID]
outdata[Product.END_ACQ_ID] = indata[Product.END_ACQ_ID]
for var_id in self.var_ids_env:
outdata[var_id] = indata[var_id]
for var_id in lod.keys():
outdata[var_id+'_lod'] = lod[var_id]
for var_id in rsd.keys():
outdata[var_id+'_rsd'] = rsd[var_id]
for var_id in accuracy.keys():
outdata[var_id+'_accuracy'] = accuracy[var_id]
for var_id in uncertainty.keys():
outdata[var_id+'_uncertainty'] = uncertainty[var_id]
# identify invalid values
indata_fill, indata_nan, invalid_acq_val = self.get_invalid(indata)
indata_fill, indata_nan, invalid_acq_val = self.get_invalid(outdata)
# - acquisition time
v_start_acq_in = indata["start_acq"]
......@@ -160,22 +212,11 @@ class PtrmsConverter(Converter.Converter):
# get header
fname, sout, _ = self.get_header(t_start, lvl, v_start_acq_in, year)[:3]
# convert time variables to doy
end_dt = TimeConverter.from_epoch(indata[Product.END_ACQ_ID])
indata[Product.START_ACQ_ID] = numpy.array(
[(d - dt_start_date).total_seconds() / 86400.0 for d in start_dt]
) # to seconds
indata[Product.END_ACQ_ID] = numpy.array(
[(d - dt_start_date).total_seconds() / 86400.0 for d in end_dt]
) # to seconds
outdata = indata
# sort according start_acq
i_sort = numpy.argsort(outdata[Product.START_ACQ_ID])
outdata["start_acq"] = [outdata["start_acq"][i] for i in i_sort]
outdata["end_acq"] = [outdata["end_acq"][i] for i in i_sort]
outdata["valve"] = [outdata["valve"][i] for i in i_sort]
for var_id in self.var_ids_env + self.var_ids_me:
for var_id in self.var_ids_env + self.var_ids_mz:
outdata[var_id] = [outdata[var_id][i] for i in i_sort]
invalid_acq_val[var_id] = numpy.array(
[invalid_acq_val[var_id][i] for i in i_sort]
......@@ -183,22 +224,6 @@ class PtrmsConverter(Converter.Converter):
# outdata = outdata.sort_values(by=['start_acq'])
# outdata= sorted(outdata,key=lambda x:x["start_acq"].max(axis=0))
# insert zero / cal info
if lvl in [0]:
for var_id in self.var_ids_env + self.var_ids_me:
# seperate calib / zero
f_calib = [
((a in [0.682, 0.684, 0.686, 0.687]) & (b == 2))
for a, b in zip(invalid_acq_val[var_id], outdata["valve"])
]
f_zero = [
((a in [0.682, 0.684, 0.686, 0.687]) & (b == 1))
for a, b in zip(invalid_acq_val[var_id], outdata["valve"])
]
# set variables
invalid_acq_val[var_id][f_calib] = 0.687
invalid_acq_val[var_id][f_zero] = 0.686
# --- write data to output
if __DEBUG__:
print("--- Write outputs ---")
......@@ -211,11 +236,8 @@ class PtrmsConverter(Converter.Converter):
outdata["start_acq"][i], outdata["end_acq"][i]
)
valve = outdata["valve"][i]
line += "{0:<20d}".format(valve)
# concentrations coef
for var_id in self.var_ids_env + self.var_ids_me:
for var_id in self.var_ids_env + self.var_ids_mz:
conc = outdata[var_id][i]
qa = 0.0
......@@ -255,7 +277,7 @@ class PtrmsConverter(Converter.Converter):
"""
if var_id == self.QA_FLAGS_ID:
return 9.999
elif var_id in self.var_ids_me:
elif var_id in self.var_ids_mz:
return self.var_fills_conc
elif var_id in self.var_ids_env:
return self.var_fills_env
......@@ -274,7 +296,7 @@ class PtrmsConverter(Converter.Converter):
infill = self.var_fills_conc_native
# --- environmental property and concentration automatic QA
for var_id in self.var_ids_env + self.var_ids_me:
for var_id in self.var_ids_env + self.var_ids_mz:
data = indata[var_id]
# data = numpy.array([float(d.replace('E','e').replace(',','.')) for d in data])
data = numpy.array(data)
......@@ -288,13 +310,7 @@ class PtrmsConverter(Converter.Converter):
else:
is_extreme = data > 100000
# --- valve automatic QA
valve_data = indata["valve"]
is_blank = numpy.array([(v == 1) for v in valve_data])
is_calib = numpy.array([(v == 2) for v in valve_data])
invalid_databuf["valve"] = is_blank | is_calib
is_invalid = is_nan_data | is_infill | is_extreme | is_blank | is_calib
is_invalid = is_nan_data | is_infill | is_extreme
invalid_databuf[var_id] = is_invalid
# - replace native fill value by ebas one
......@@ -308,12 +324,6 @@ class PtrmsConverter(Converter.Converter):
# invalid_val[var_id][is_invalid] = 0.980
invalid_val[var_id][is_infill | is_nan_data] = 0.999
invalid_val[var_id][is_extreme] = 0.459
invalid_val[var_id][
is_blank
] = 0.686 # Invalid due to zero check. Used for Level 0.
invalid_val[var_id][
is_calib
] = 0.682 # Invalid due to calibration or zero/span check. Used for Level 0.
return invalid_databuf, nan_databuf, invalid_val
......@@ -326,7 +336,7 @@ class PtrmsConverter(Converter.Converter):
nvars = 1 # end time
nvars += 1 # status
nvars += len(self.var_ids_env) * 2 # T, P and QA
nvars += len(self.ME_range) * 2 # m/e and QA flag
nvars += len(self.var_ids_mz) * 2 # m/e and QA flag
return nvars
......@@ -336,15 +346,14 @@ class PtrmsConverter(Converter.Converter):
@param lvl level of the ACTRIS product
@return the time tags
"""
dt_raw_tag = self.dt_raw_tag
dt_tag = self.dt_tag
dt_raw_tag = self.reader.attributes['Resolution code']
dt_tag = self.reader.attributes['Sample duration']
if lvl in [0, 1]:
if dt_tag.endswith("mn"):
dt = float(dt_tag.replace("mn", "")) / float(
24 * 60
) # in decimal day unit
if self.dt_tag.endswith("s"):
if dt_tag.endswith("s"):
dt = float(dt_tag.replace("s", "")) / float(24 * 60 * 60)
period_tag = "1d" # NRT mode
elif lvl in [2]:
......@@ -362,12 +371,11 @@ class PtrmsConverter(Converter.Converter):
@return the list of variable fill values as a string
"""
s = "9999.999999 " # end time
s += "9999 " # valve status
s += str(self.var_fills_conc) + " " # T
s += "9.999 "
s += str(self.var_fills_conc) + " " # P
s += "9.999 "
for me in self.ME_range:
for me in self.var_ids_mz:
s += str(self.var_fills_conc)
s += " "
s += "9.999 "
......@@ -380,10 +388,7 @@ class PtrmsConverter(Converter.Converter):
@return (vars_long_name, vars_tags)
"""
vars_long_name = ""
vars_long_name += (
'status, no unit, Status type=calibration standard, Matrix=instrument, Comment=See metadata elements "Calibration standard ID" and "Secondary standard ID”'
+ os.linesep
)
vars_long_name += (
"{0:s}, {1:s}, Location=inlet, Matrix=instrument".format("temperature", "K")
+ os.linesep
......@@ -394,87 +399,21 @@ class PtrmsConverter(Converter.Converter):
+ os.linesep
)
vars_long_name += "numflag_pressure, no unit" + os.linesep
vars_long_name += (
"{0:s}, {1:s}, Location=reaction chamber, Matrix=instrument".format(
"temperature", "K"
)
+ os.linesep
)
vars_long_name += "numflag_temperature, no unit" + os.linesep
vars_long_name += (
"{0:s}, {1:s}, Location=reaction chamber, Matrix=instrument".format(
"pressure", "hPa"
)
+ os.linesep
)
vars_long_name += "numflag_pressure , no unit" + os.linesep
vars_long_name += (
"{0:s}, {1:s}, Location=reaction chamber, Matrix=instrument".format(
"electric_tension", "V"
)
+ os.linesep
)
vars_long_name += "numflag_electric_tension, no unit" + os.linesep
for var_id in self.var_ids_me:
if len(self.ME_list[var_id]) != 0:
var_id_long_name = [self.ME_list[var_id]["long_name"]][0]
else:
var_id_long_name = (
var_id.lower()
.replace(" ", "-")
.replace("/", "")
.replace(",", ".")
.replace("me", "mz")
)
unit = "1/s"
if self.ME_list[var_id]["accuracy"] is not None:
vars_long_name += (
"{0:s}, {1:s}, k={2:.2E}, XR={3:.2f}, accuracy={4:.3f} %, dwell_time={5:.2f} s, background_method={6:s},calibration_method={7:s}".format(
var_id_long_name,
unit,
self.ME_list[var_id]["k"],
self.ME_list[var_id]["XR"],
self.ME_list[var_id]["accuracy"],
self.ME_list[var_id]["dwell_time"],
self.ME_list[var_id]["background_method"],
self.ME_list[var_id]["calibration_method"],
)
+ os.linesep
)
else:
vars_long_name += (
"{0:s}, {1:s}, dwell_time={2:.2f} s".format(
var_id_long_name, unit, self.ME_list[var_id]["dwell_time"]
)
+ os.linesep
)
vars_long_name += "numflag_" + var_id_long_name + ", no unit" + os.linesep
for var_id in self.var_ids_mz_name:
vars_long_name += var_id + os.linesep
vars_long_name += "numflag_" + var_id + ", no unit" + os.linesep
vars_long_name = vars_long_name.strip()
# tags
vars_tag = ""
vars_tag += "status "
for var_id in self.var_ids_env:
for var_id in self.var_ids_env_tag:
var_id = var_id.replace(" ", "_")
vars_tag += "{0:<20s}".format(var_id)
vars_tag += "{0:<20s}".format("numflag_" + var_id)
for var_id in self.var_ids_me:
if len(self.ME_list[var_id]) != 0:
var_id = (self.ME_list[var_id]["short_name"]).replace("me", "mz")
else:
var_id = (
var_id.lower()
.replace(" ", "-")
.replace("/", "")
.replace(",", ".")
.replace("me", "mz")
)
for var_id in self.var_ids_mz:
vars_tag += "{0:<20s}".format(var_id)
vars_tag += "{0:<20s}".format(var_id + "_numflag")
vars_tag = vars_tag.strip()
......@@ -497,6 +436,31 @@ class PtrmsConverter(Converter.Converter):
# - production time
prod_time, rev_day = self.get_prod_time_tags()
# set metdata based on level 0
self.station = self.reader.attributes['Station code']
self.instrument = self.reader.attributes['Instrument type']
self.component = self.reader.attributes['Component']
self.matrix = self.reader.attributes['Matrix']
self.instr_name = self.reader.attributes['Instrument name']
self.method_ref = self.reader.attributes['Method ref']
self.lab = self.reader.attributes['Laboratory code']
self.platform = self.reader.attributes['Platform code']
self.gaw_id = self.reader.attributes['Station GAW-ID']
self.wdca_id = self.reader.attributes['Station WDCA-ID']
self.site = self.reader.attributes['Station name']
self.latitude = self.reader.attributes['Measurement latitude']
self.longitude = self.reader.attributes['Measurement longitude']
self.altitude = self.reader.attributes['Measurement altitude']
self.meas_height = self.reader.attributes['Measurement height']
std_temp = self.reader.attributes['Volume std. temperature']
std_pres = self.reader.attributes['Volume std. pressure']
self.wmo_land_use = self.reader.attributes['Station land use']
self.wmo_setting = self.reader.attributes['Station setting']
self.gaw_type = self.reader.attributes['Station GAW type']
self.wmo_region = self.reader.attributes['Station WMO region']
self.normal_comment_line = int(self.reader.normal_comment_line)
# - output filename
fname = self.get_outfname(start_time, prod_time, lvl, v_start_acq, year)
fname_ext = "nas"
......@@ -548,7 +512,7 @@ class PtrmsConverter(Converter.Converter):
header_size = sz_header_base + nvars # number of lines of the header
# --- fill-in the header
header = Converter.get_header(
header = Converter.Converter.get_header(
self,
lvl,
header_size=header_size,
......@@ -594,13 +558,7 @@ class PtrmsConverter(Converter.Converter):
set_type_code=set_type_code,
method_ref=self.method_ref,
instr_name=self.instr_name,
detection_limit=self.detection_limit,
normal_comment_line=self.normal_comment_line,
mu0=self.mu0,
N0=self.N0,
Pnorm=self.Pnorm,
Inorm=self.Inorm,
Ldrift=self.Ldrift,
)
return fname, header, start_day, ts_day_start
......@@ -655,20 +613,5 @@ def main():
prog.process(infiles, options.outdir, levels=[0])
def test():
"""
Unit test
"""
# SRC_DIR = os.path.dirname(__file__)
prod_id = "GIF_PTRMS_RAW"
SRC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..")
# --- load the raw data
infiles = SRC_DIR + "/tests/SIRTA/inputs/20240315_000004_routine.ptr"
prog = PtrmsConverter(prod_id)
outdir = "/home/aurelien/test/GIF_PTRMS_CAMS"
prog.process(infiles, outdir, levels=[0])
if __name__ == "__main__":
main()
......@@ -63,6 +63,7 @@ class AMESReader(AMESBase):
try:
self.encoding = 'latin-1'
self.get_content_desc()
except Exception as e:
raise Warning('Wrong file format: ', e)
......@@ -171,7 +172,10 @@ class AMESReader(AMESBase):
if lin.startswith("Submitter"):
self.submitter_contacts.append(lin)
continue
if ilin == 12 + self.nvars + 1:
self.normal_comment_line = lin
if ilin >= (12 + self.nvars + 2) and ilin < (self.nlin_header - 1):
# key: value attributes
elts = lin.split(":", 1)
......
......@@ -80,7 +80,7 @@ class Application(object):
self.prod_id = prod_id
object.__init__(self)
def process(self, infiles, outdir=os.path.join(os.path.expanduser("~"), "test")):
def process(self, infiles, conf_file, valve_files, outdir=os.path.join(os.path.expanduser("~"), "test")):
"""
@brief generate all levels upon infiles in outdir
"""
......@@ -96,7 +96,7 @@ class Application(object):
# --- lev0 -> lev1 --- #
print("> processing " + str(infiles))
converter = PtrmsConverter(self.prod_id)
converter.process(infiles, outdir, levels=[1])
converter.process(infiles, conf_file, valve_files, outdir, levels=[1])
# --- move all files to output dir
print("> move into " + outdir)
......@@ -117,11 +117,13 @@ def main():
def test():
SRC_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)),'..')
infile = SRC_DIR + "/tests/SIRTA/inputs/FR0020R.20240315000530.20240708143924.PTR-MS.ion_count.air.1d.5mn.FR01L_PTR-MS_Ionicon_Analytik_H-S-PTR-QMS.FR01L_Ionicon_Analytik_H-S-PTR-QMS_GIF.lev0.nas"
infile = SRC_DIR + "/tests/SIRTA/inputs/FR0020R.20241028000550.20250108081653.PTR-MS.ion_count.air.1d.5mn.FR01L_PTR-MS_Ionicon_Analytik_H-S-PTR-QMS.FR01L_Ionicon_Analytik_H-S-PTR-QMS_GIF.lev0.nas"
conf_file = '/src/algo/ptrms_lib/conf/conf_ptrms.toml'
valve_files = "/tests/SIRTA/inputs/vanne_pos_20241028.csv"
outdir = SRC_DIR + "/tests/SIRTA/results/"
prod_id = 'GIF_PTRMS_RAW'
app = Application(prod_id)
app.process(infile, outdir=outdir)
app.process(infile, conf_file, valve_files, outdir=outdir)
if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment