Skip to content
Snippets Groups Projects
Commit 9383e590 authored by Marc-Antoine Drouin's avatar Marc-Antoine Drouin
Browse files

Merge branch 'read_lev0' into 'main'

feat: add reader for AMES lev0 data

See merge request ipsl/sirta/ptr-ms/cigas-ptr-ms!1
parents 4f7f2554 df5d6189
No related branches found
No related tags found
No related merge requests found
:maxdepth: 2
./readers.md
Readers
.. automodule:: cigas_ptrms.readers.ames
:members:
:undoc-members:
"""Module to read AMES PTR-MS files from EBAS."""
import ast
import datetime as dt
import re
from dataclasses import dataclass, field
from pathlib import Path
import pandas as pd
from cigas_ptrms.utils import dict_values_to_int
AMES_DATES_LINE_REGEX = re.compile(r"(?P<year>\d{4})\s(?P<month>\d{2})\s(?P<day>\d{2})")
def parse_comment(line: str) -> tuple[str, str]:
"""
Parse AMES comment line.
Special and normal comments are separated by a colon.
Parameters
----------
line : str
Line to parse.
Returns
-------
list
key, pairs.
"""
key, value = line.split(":", 1) # because some comments have colons
return key.strip(), value.strip()
def aggregate_comment(lines: list[str]) -> dict[str, list[str]]:
"""
Organize metadata in comments lien header.
Some keys can be repeated and some may be lost.
Parameters
----------
lines : list[str]
Raw lines read in files.
Returns
-------
dict[str, list[str]]
_description_
"""
comments: dict[str, list[str]] = {}
for line in lines:
key, value = parse_comment(line)
if key in comments:
comments[key].append(value)
else:
comments[key] = [value]
return comments
def get_valve_meaning(metadata: dict[str, list[str]] | None) -> dict[int, str]:
"""
Get valve status value meanings.
Parameters
----------
metadata : dict
Metadata dictionary.
Returns
-------
str
Valve status.
Notes
-----
Don't really kno how to parse this.
For the moment we hardcode the values.
"""
# check if metadata where found
if metadata is None:
msg = "No metadata found with valve status"
raise ValueError(msg)
# check we have the right metadata
if "Calibration standard ID" not in metadata:
msg = "Metadata does not contain 'Calibration standard ID'"
raise ValueError(msg)
if "Secondary standard ID" not in metadata:
msg = "Metadata does not contain 'Secondary standard ID'"
raise ValueError(msg)
return {
0: "AMBIENT",
1: "BLANK",
2: "CAL",
3: "TARGET",
}
@dataclass()
class AmesVariableMeta:
"""CLass to store AMES variable metadata."""
scale: float
missing_value: float
name: str
units: str
extra: dict[str, str]
@dataclass()
class Ames1001:
"""
Class to read AMES PTR-MS files from EBAS.
Only compatible with NASA AMES 1001 file format.
Notes
-----
This code is based on NASA AMES specifications _[1]
and EBAS PTR-MS data format _[2].
References
----------
.. [1] NASA AMES file `format specificaion v2.0 <https://espoarchive.nasa.gov/content/Ames_Format_Specification_v20>`
.. [2] EBAS PTR-MS `data format <https://ebas-submit.nilu.no/templates/VOC/PTR-MS_lev0>`
"""
file: Path
nl_header: int = field(init=False)
ames_type: str = field(init=False)
originators: list[str] = field(init=False)
organization: str = field(init=False)
submitter: list[str] = field(init=False)
projects: list[str] = field(init=False)
ivol: int = field(init=False)
nvol: int = field(init=False)
date: dt.datetime = field(init=False)
date_revision: dt.datetime = field(init=False)
index_interval: float = field(init=False)
index_units: str = field(init=False)
n_vars: int = field(init=False)
raw_missing_values: list[str] = field(init=False)
vars_meta: dict[str, AmesVariableMeta] = field(init=False)
nscoml: int = field(init=False)
scom: dict[str, list[str]] | None = field(init=False)
nncoml: int = field(init=False)
ncom: dict[str, list[str]] | None = field(init=False)
valve_meanings: dict[int, str] = field(init=False)
data: pd.DataFrame = field(init=False)
def __post_init__(self):
if not isinstance(self.file, Path):
self.file = Path(self.file)
with self.file.open("r") as fid:
lines = fid.readlines()
# remove end of lines
lines = [line.strip() for line in lines]
# parse nl_header, ames_type
tmp = lines[0].split()
self.nl_header = int(tmp[0])
self.ames_type = tmp[1]
# originiators
self.originators = lines[1].split(";")
# organization
self.organization = lines[2]
# producer
self.submitter = lines[3].split(";")
# projects
self.projects = lines[4].split()
# ivol, nvol
self.ivol, self.nvol = [int(x) for x in lines[5].split()] # noqa: UP027
# dates
dates_regex = list(AMES_DATES_LINE_REGEX.finditer(lines[6]))
if not dates_regex:
err_msg = f"Error parsing dates: {lines[6]}"
raise ValueError(err_msg)
self.date = dt.datetime(
**dict_values_to_int(dates_regex[0].groupdict()) # type: ignore
)
self.date_revision = dt.datetime(
**dict_values_to_int(dates_regex[1].groupdict()) # type: ignore
)
# step between index values (0 means not monotonic)
self.index_interval = float(lines[7])
# index units
self.index_units = lines[8]
# number of variables
self.n_vars = int(lines[9])
# manages variables metadata:
var_scales = [ast.literal_eval(x) for x in lines[10].split()]
self.raw_missing_values = lines[11].split()
var_missings = [ast.literal_eval(x) for x in self.raw_missing_values]
if len(var_scales) != self.n_vars or len(var_missings) != self.n_vars:
err_msg = f"Error parsing variables scales and missing values: {lines[10]} {lines[11]}" # noqa: E501
raise ValueError(err_msg)
var_end_line_n = 12 + self.n_vars
var_raws = lines[12:var_end_line_n]
self.vars_meta = {}
for var_scale, var_missing, var_raw in zip(
var_scales, var_missings, var_raws, strict=True
):
fields = var_raw.split(",")
name = fields[0]
units = fields[1]
extra: dict[str, str] = {}
if len(fields) > 2:
for extra_field in fields[2:]:
extra_key, extra_value = extra_field.split("=")
extra[extra_key] = extra_value
self.vars_meta[name] = AmesVariableMeta(
scale=var_scale,
missing_value=var_missing,
name=name,
units=units,
extra=extra,
)
cur_line = var_end_line_n
# special comments
self.nscoml = int(lines[cur_line])
self.scom = None
if self.nscoml > 0:
scom_lines = lines[cur_line + 1 : cur_line + self.nscoml]
self.scom = aggregate_comment(scom_lines)
else:
cur_line = cur_line + self.nscoml + 1
# normal comments
self.nncoml = int(lines[cur_line])
self.ncom = None
if self.nncoml > 0:
ncom_lines = lines[cur_line + 1 : cur_line + self.nncoml]
self.ncom = aggregate_comment(ncom_lines)
# valve meanings
self.valve_meanings = get_valve_meaning(self.ncom)
# read data
self.data = pd.read_csv(
self.file,
engine="python",
sep=r"\s+",
skiprows=self.nl_header - 1,
na_values=self.raw_missing_values,
)
# convert time string to datetime obkects
self.data["start_time"] = self.date + pd.to_timedelta(
self.data["start_time"], unit="days"
)
self.data["end_time"] = self.date + pd.to_timedelta(
self.data["end_time"], unit="days"
)
"""Module containing various utilities fucntions."""
def dict_values_to_int(d: dict[str, str]) -> dict[str, int]:
"""
Convert all values of a dictionary to integers.
Parameters
----------
d : dict
Dictionary to convert.
Returns
-------
dict
Dictionary with all values converted to integers.
"""
return {k: int(v) for k, v in d.items()}
import pytest
from cigas_ptrms.readers.ames import Ames1001, AmesVariableMeta
@pytest.fixture()
def lev0_file(lev0_dir):
"""Return lev0 file."""
return lev0_dir / "ref_lev0.nas"
def test_ames_variable_meta():
"""Test AMES variable class."""
var = AmesVariableMeta(
scale=1.0,
missing_value=-999.0,
name="test",
units="ppb",
extra={"extra": "extra"},
)
assert var.scale == 1.0
assert var.missing_value == -999.0
assert var.name == "test"
assert var.units == "ppb"
assert var.extra == {"extra": "extra"}
def test_ames1001(lev0_file):
"""Test Ames1001 class."""
ames = Ames1001(lev0_file)
assert ames.file == lev0_file
assert ames.nl_header == 130
......@@ -8,18 +8,16 @@ import pytest
@pytest.fixture()
def root_dir(request) -> Path: # type: ignore
"""Return root directory of tests."""
path = request.config.rootdir
if not path.exists():
path.mkdir(parents=True, exist_ok=True)
return Path(path)
return Path(request.config.rootdir / "tests")
@pytest.fixture()
def data_dir(root_dir: Path) -> Path:
"""Return data directory of tests."""
path = root_dir / "data"
if not path.exists():
path.mkdir(parents=True, exist_ok=True)
return root_dir / "data"
return path
@pytest.fixture()
def lev0_dir(data_dir: Path) -> Path:
"""Return lev0 directory of tests."""
return data_dir / "lev0"
130 1001
Gros, Valerie; Drouin, Marc-Antoine; Foliot, Lorna
FR01L, Laboratoire des Sciences du Climat et de l'Environnement, LSCE-CNRS-CEA, CEA Orme des Merisiers, Bat 701,, 91191, Gif-sur-Yvette, France
Pascal, Nicolas
GAW-WDCRG EMEP ACTRIS
1 1
2024 01 01 2024 06 25
0
days from file reference point
55
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
9999.999999 9999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999 999999.999 9.999
end_time of measurement, days from the file reference point
status, no unit, Status type=calibration standard, Matrix=instrument, Calibration scale=, Comment=See metadata elements "Calibration standard ID" and "Secondary standard ID”
numflag status, no unit, Status type=calibration standard, Matrix=instrument, Calibration scale=, Comment=See metadata elements "Calibration standard ID" and "Secondary standard ID”
temperature, K, Location=inlet, Matrix=instrument, Calibration scale=
numflag temperature, no unit, Location=inlet, Matrix=instrument, Calibration scale=
pressure, hPa, Location=reaction chamber, Matrix=instrument, Calibration scale=
numflag pressure, no unit, Location=reaction chamber, Matrix=instrument, Calibration scale=
mass_21_count_rate, 1/s
numflag mass_21_count_rate, no unit
mass_25_count_rate, 1/s
numflag mass_25_count_rate, no unit
mass_30_count_rate, 1/s
numflag mass_30_count_rate, no unit
methanal_count_rate, 1/s
numflag methanal_count_rate, no unit
mass_32_count_rate, 1/s
numflag mass_32_count_rate, no unit
methanol_count_rate, 1/s
numflag methanol_count_rate, no unit
mass_37_count_rate, 1/s
numflag mass_37_count_rate, no unit
acetonitrile_count_rate, 1/s
numflag acetonitrile_count_rate, no unit
ethanal_count_rate, 1/s
numflag ethanal_count_rate, no unit
mass_47_organic_compounds_count_rate, 1/s
numflag mass_47_organic_compounds_count_rate, no unit
mass_55_count_rate, 1/s
numflag mass_55_count_rate, no unit
mass_57_organic_compounds_count_rate, 1/s
numflag mass_57_organic_compounds_count_rate, no unit
mass_59_organic_compounds_count_rate, 1/s
numflag mass_59_organic_compounds_count_rate, no unit
mass_61_organic_compounds_count_rate, 1/s
numflag mass_61_organic_compounds_count_rate, no unit
dimethylsulfide_count_rate, 1/s
numflag dimethylsulfide_count_rate, no unit
mass_69_organic_compounds_count_rate, 1/s
numflag mass_69_organic_compounds_count_rate, no unit
mass_71_organic_compounds_count_rate, 1/s
numflag mass_71_organic_compounds_count_rate, no unit
mass_73_organic_compounds_count_rate, 1/s
numflag mass_73_organic_compounds_count_rate, no unit
methyl_acetate_count_rate, 1/s
numflag methyl_acetate_count_rate, no unit
benzene_count_rate, 1/s
numflag benzene_count_rate, no unit
mass_93_organic_compounds_count_rate, 1/s
numflag mass_93_organic_compounds_count_rate, no unit
mass_107_organic_compounds_count_rate, 1/s
numflag mass_107_organic_compounds_count_rate, no unit
mass_121_organic_compounds_count_rate, 1/s
numflag mass_121_organic_compounds_count_rate, no unit
monoterpenes_count_rate, 1/s
numflag monoterpenes_count_rate, no unit
0
61
Data definition: EBAS_1.1
Set type code: TI
Timezone: UTC
File name: FR0020R.20240315000530.20240625101118.PTR-MS.ion_count.air.1y.5mn.FR01L_PTR-MS_Ionicon_Analytik_H-S-PTR-QMS.FR01L_Ionicon_Analytik_H-S-PTR-QMS_GIF.lev0.nas
Startdate: 20240315000530
Revision date: 20240625101118
Version: 1
Version description: initial revision, automatically generated, automatically inspected
Statistics: arithmetic mean
Data level: 0
Period code: 1y
Resolution code: 5mn
Sample duration: 5mn
Orig. time res.: 5mn
Station code: FR0020R
Platform code: FR0020S
Station name: SIRTA
Station WDCA-ID: GAWAFR__GIF
Station GAW-ID: GIF
Station GAW-Name: SIRTA
Station land use: Agricultural
Station setting: Suburban
Station GAW type: O
Station WMO region: 6
Measurement latitude: 48.709
Measurement longitude: 2.159
Measurement altitude: 162.0 m
Measurement height: 6.0 m
Component: ion_count
Unit: 1/s
Matrix: air
Laboratory code: FR01L
Instrument type: PTR-MS
Instrument name: PTR-MS_Ionicon_Analytik_H-S-PTR-QMS
Instrument manufacturer: Ionicon Analytik
Instrument model: High-Sensitivity PTR-QMS
Instrument serial number: 68-08-04
Method ref: FR01L_Ionicon_Analytik_H-S-PTR-QMS_GIF
Standard method: None
Calibration scale: NPL
Calibration standard ID: "Status calibration standard: 1, Blank; Status calibration standard: 2, Manufacturer: NPL (passivated aluminum cylinder), Batch: D914017, Concentrations: {ethanal=1.04,methanol=0.88,ethanol=0.97,isoprene=0.705,acetone=0.99,dimethylsulfide=0.80,acetonitrile=1.24,3-buten-2-one=1.03,butan-2-one=1.10,benzene=0.95,1.2.4-trifluorobenzene=1.05,toluene=1.12,m-xylene=1.12,1.2.4-trimethylbenzene=1.23,3-carene=1.12, 1.2.4-trichlorobenzene=1.36}"
Secondary standard ID: "Status calibration standard: 3, Target"
Inlet type: Hat or hood
Inlet tube material: PFA
Inlet tube outer diameter: 12.7 mm
Inlet tube inner diameter: 9.5 mm
Inlet tube length: 5.95 m
Flow rate: 8.0 l/min
Volume std. temperature: 273.15K
Volume std. pressure: 1013.25hPa
Detection limit expl.: detection limit equals 3 times the square root of the blank signal, divided by the unnormalized measured sensitivity
Measurement uncertainty expl.: based on the method described in the draft version of the ACTRIS PTRMS guidelines 2022
Zero/negative values code: Zero/negative possible
Zero/negative values: Zero and neg. values may appear due to statistical variations at very low concentrations
Originator: Gros, Valerie, valerie.gros@lsce.ipsl.fr, Laboratoire des Sciences du Climat et de l'Environnement, LSCE, SIRTA, CEA Orme des Merisiers, Bat 701, 91191, Gif-sur-Yvette, France
Originator: Drouin, Marc-Antoine, marc-antoine.drouin@lmd.ipsl.fr, Laboratoire de Meteorologie Dynamique, LMD/IPSL, École Polytechnique, Institut Polytechnique de Paris, ENS Université PSL Sorbonne Université-CNRS, 91120, Palaiseau, France
Originator: Foliot, Lorna, lorna.foliot@lsce.ipsl.fr, Laboratoire des Sciences du Climat et de l'Environnement, LSCE, SIRTA, CEA Orme des Merisiers, Bat 701, 91191, Gif-sur-Yvette, France
Submitter: Pascal, Nicolas, nicolas.pascal@univ-lille.fr, AERIS/ICARE Data and Services Center, AERIS/ICARE,, Universite Lille 1, Bat. M3 Extension - Avenue Carl Gauss, 59650, Villeneuve d'Ascq, France
Acknowledgement: For using this data for any kind of publications, you must contact the data originator(s), in terms of acknowledgment
Comment: Negative points within the detection limit kept for low values variability ; mass_69 (isoprene) is the sum of isoprene and furan compounds ; mass_47 (ethanol) is the sum of ethanol and formic acid compounds
start_time end_time status flag_status T_inlet flag_T_inlet p_det flag_p_det mz21 flag_mz21 mz25 flag_mz25 mz30 flag_mz30 CH2OH+ flag_CH2OH+ mz32 flag_mz32 CH4OH+ flag_CH4OH+ mz37 flag_mz37 C2H3NH+ flag_C2H3NH+ C2H4OH+ flag_C2H4OH+ mz47 flag_mz47 mz55 flag_mz55 mz57 flag_mz57 C3H6OH+ flag_C3H6OH+ mz61 flag_mz61 C2H6SH+ flag_C2H6SH+ mz69 flag_mz69 C4H6OH+ flag_C4H6OH+ mz73 flag_mz73 C3H6O2H+ flag_C3H6O2H+ C6H6H+ flag_C6H6H+ mz93 flag_mz93 mz107 flag_mz107 C9H12H+ flag_C9H12H+ C10H16H+ flag_C10H16H+
74.007535 74.011007 0 0.000 273.150 0.000 1112.436 0.000 14919.970 0.000 0.000 0.000 8799.978 0.000 60.788 0.000 70469.790 0.000 661.147 0.000 468439.300 0.459 12.398 0.000 143.762 0.000 166.462 0.000 1044.297 0.000 44.790 0.000 84.681 0.000 29.795 0.000 19.395 0.000 24.795 0.000 9.398 0.000 28.395 0.000 9.798 0.000 11.298 0.000 32.891 0.000 17.995 0.000 7.999 0.000 1.800 0.000
74.011250 74.014722 1 0.000 273.150 0.686 1091.290 0.686 14929.970 0.686 0.000 0.686 8669.978 0.686 61.188 0.686 74349.800 0.686 605.447 0.686 402679.200 0.686 7.199 0.686 112.781 0.686 146.362 0.686 768.147 0.686 21.895 0.686 19.895 0.686 29.595 0.686 13.498 0.686 24.395 0.686 5.999 0.686 19.695 0.686 8.798 0.686 2.499 0.686 29.395 0.686 14.398 0.686 6.699 0.686 1.999 0.686
74.018681 74.022153 2 0.000 273.150 0.687 1091.290 0.687 14679.970 0.687 0.000 0.687 8679.975 0.687 62.491 0.687 70749.810 0.687 856.748 0.687 405959.200 0.687 789.949 0.687 806.450 0.687 175.562 0.687 793.246 0.687 58.690 0.687 795.447 0.687 32.792 0.687 376.124 0.687 229.762 0.687 726.447 0.687 856.548 0.687 12.998 0.687 462.124 0.687 582.647 0.687 529.648 0.687 426.924 0.687 164.462 0.687
74.022396 74.025868 3 0.000 273.150 0.687 1091.290 0.687 15429.970 0.687 0.000 0.687 8539.974 0.687 61.390 0.687 70409.800 0.687 686.247 0.687 411249.200 0.687 257.731 0.687 362.424 0.687 160.862 0.687 810.147 0.687 57.790 0.687 284.024 0.687 32.593 0.687 152.762 0.687 106.881 0.687 197.562 0.687 251.961 0.687 7.798 0.687 169.462 0.687 229.762 0.687 177.162 0.687 113.481 0.687 64.182 0.687
74.026111 74.029583 9999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999 999999.999 0.999
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment