Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • aqdl/denoising
1 result
Show changes
Commits on Source (2)
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
import sys
from scipy.signal import savgol_filter
import copy
import plotly.graph_objects as go
import json
from scipy.interpolate import PchipInterpolator
from denoising import (dataset, outliers)
......@@ -8,6 +15,7 @@ from denoising import (dataset, outliers)
def main():
# Settings
srcdir = Path('/home/wrfadmin/PycharmProjects/hapads/denoising/data')
dstdir = Path('/home/wrfadmin/test')
sensor_data_1_txt = srcdir.joinpath('sensor_data.txt')
sensor_data_2_txt = srcdir.joinpath('sensor_data_dataset_2.txt')
reference_data_1_txt = srcdir.joinpath('weather_station_data.txt')
......@@ -30,37 +38,157 @@ def main():
# detection, the suggested value of L is close to 0.
L = -0.1
wname = 'haar'
plot = 0
comment = 1
# Load datasets
reference_data = dataset.from_csv(paths=[reference_data_1_txt, reference_data_2_txt])
# Apply offset to reference data
reference_data = dataset.apply_offset(reference_data, refence_offset)
sensor_data = dataset.from_csv(paths=[sensor_data_1_txt, sensor_data_2_txt])
scor = sensor_data[['pm10', 'pm1', 'pm2_5']].values
# # OK
# dataset.check_values(scor)
scor2 = copy.deepcopy(scor)
# Clean sensor data using threshold
scor2 = outliers.apply_threshold(sensor_data[['pm10', 'pm1', 'pm2_5']].values, threshold=thd)
scor2 = outliers.apply_threshold(scor2, threshold=thd)
####################################################################################################################
####################################################################################################################
####################################################################################################################
# Set minumum target PM value based on cumulative data
reftst = scor2.min(axis=1)
scor3 = outliers.apply_delta_relative_threshold(dataset=scor2[:, -1], reftst=reftst, threshold=thd2)
# TODO: Not exactly how it is done in the notebook. Lets check that this is the same as scor3 = deepcopy(scor2[:, -1]) !!!!!
scor3 = copy.deepcopy(scor2)
scor3 = outliers.apply_delta_relative_threshold(dataset=scor3[:, -1], reftst=reftst, threshold=thd2)
####################################################################################################################
####################################################################################################################
####################################################################################################################
signal = np.expand_dims(scor3, axis=0)
test = outliers.apply_wavelets_despiking(signal, sfr, wid, L, wname)
# OK !!!
tE = outliers.get_wavelet_spikes(signal, sfr, wid, L, wname)
scor4 = copy.deepcopy(scor3)
for k in range(len(tE)):
scor4[tE[k]] = (np.array([scor4[tE[k] - 1], scor4[tE[k] + 1]])).mean()
# dataset.check_values(scor4)
# Interpolate reference at sensor time
reference_interpolator = PchipInterpolator(x=reference_data.values[:, 0], y=reference_data.values[:, 5])
refint = reference_interpolator(sensor_data.values[:, 0])
# dataset.check_values(refint)
# NOTE: Call this scor5 to avoid overwritting scor4 for control porposes
scor5 = savgol_filter(x=scor4,
window_length=5, # window size used for filtering
polyorder=2) # order of fitted polynomial
# dataset.check_values(scor5)
corr_matrix = np.corrcoef(refint, scor5)
corr = corr_matrix[0, 1]
R_sq = corr ** 2
# How well does the reint match the observations scor5
print(f'Coefficient of determination: {R_sq}')
rmse5 = dataset.rmse(refint, scor5)
rmse3 = dataset.rmse(refint, scor3)
rmse2 = dataset.rmse(refint, scor2[:, 2])
rmse1 = dataset.rmse(refint, sensor_data.values[:, 5])
print(f'RMSE - scor5: {rmse5}')
print(f'RMSE - scor3: {rmse3}')
print(f'RMSE - scor2: {rmse2}')
print(f'RMSE - scor1: {rmse1}')
# Display results
plt.figure(figsize=(30, 8))
plt.plot(sensor_data.values[:, 0][0:1140], reftst[0:1140], 'blue', linewidth=2)
plt.plot(sensor_data.values[:, 0][0:1140], scor3[0:1140], 'r', linewidth=0.5)
plt.plot(sensor_data.values[tE, 0][0:48], scor3[tE][0:48], 'ro', mfc='none')
plt.plot(sensor_data.values[:, 0][0:1140], scor4[0:1140], 'c')
plt.xlabel('Time')
plt.ylabel('Concentration')
plt.savefig(dstdir.joinpath('figure_1.png'))
plt.show()
# f1 = go.Figure(
# data=[
# go.Scatter(x=sensor_data.values[:, 0][0:1140], y=reftst[0:1140], name="reftst"),
# go.Scatter(x=sensor_data.values[:, 0][0:1140], y=scor3[0:1140], name="scor3"),
# go.Scatter(x=sensor_data.values[tE, 0][0:48], y=scor3[tE][0:48], name="scor3-peaks", mode='markers', marker=dict(color='red')),
# go.Scatter(x=sensor_data.values[:, 0][0:1140], y=scor4[0:1140], name="scor3")
# ],
# layout={"xaxis": {"title": "Time"}, "yaxis": {"title": "Concentration"}, "title": ""}
# )
# f1.show()
plt.figure(figsize=(30, 8))
plt.plot(sensor_data.values[:, 0][0:1140], scor2[:, 0][0:1140], 'k', linewidth=2)
plt.plot(sensor_data.values[:, 0][0:1140], scor2[:, 1][0:1140], 'cyan', linewidth=1)
plt.plot(sensor_data.values[:, 0][0:1140], scor3[0:1140], 'b', linewidth=0.5)
plt.plot(sensor_data.values[:, 0][0:1140], scor2[:, 2][0:1140], 'r')
plt.xlabel('Time')
plt.ylabel('Concentration')
plt.savefig(dstdir.joinpath('figure_2.png'))
plt.show()
plt.figure(figsize=(30, 8))
plt.plot(sensor_data.values[:, 0][0:1140], scor3[0:1140], 'b', linewidth=2)
plt.plot(sensor_data.values[:, 0][0:1140], scor5[0:1140], 'g', linewidth=2)
plt.plot(sensor_data.values[:, 0][0:1140], refint[0:1140], 'r')
plt.xlabel('Time')
plt.ylabel('Concentration')
plt.savefig(dstdir.joinpath('figure_3.png'))
plt.show()
plt.figure(figsize=(15, 15))
plt.plot(refint, scor5, 'o')
plt.xlim([0, refint.max()])
plt.ylim([0, scor5.max()])
plt.xlabel('Interpolated')
plt.ylabel('Despiked')
plt.title(f'Coefficient of determination R²: {R_sq}')
plt.savefig(dstdir.joinpath('figure_4.png'))
plt.show()
# Save corrected results
tstamp = copy.deepcopy(sensor_data.values[:, 0])
xs = [sensor_data.values[:, 8], sensor_data.values[:, 1], sensor_data.values[:, 2]/100.0, scor5]
xr = np.zeros((len(tstamp), 4))
xr[:, 0] = PchipInterpolator(reference_data.values[:, 0], reference_data.values[:, 7])(tstamp)
xr[:, 1] = PchipInterpolator(reference_data.values[:, 0], reference_data.values[:, 1])(tstamp)
xr[:, 2] = PchipInterpolator(reference_data.values[:, 0], reference_data.values[:, 2])(tstamp)
xr[:, 3] = refint
scor1 = sensor_data.values[:, 3:5]
res = dict(xs=xs, xr=xr, scor1=scor1, scor2=scor2, ref=refint, env=reftst, rmse1=rmse1, rmse2=rmse2, rmse3=rmse3, rmse4=rmse5)
# with open(dstdir.joinpath('results.json'), 'w') as outfile:
# outfile.write(json.dumps(res, indent=4))
pass
......
import math
import numpy as np
from pathlib import Path
import pandas as pd
def check_values(dataset):
print(f'{dataset.min(axis=0)=}')
print(f'{dataset.mean(axis=0)=}')
print(f'{dataset.max(axis=0)=}')
print(f'{dataset.std(axis=0, ddof=1)=}')
print(f'{dataset.sum(axis=0)=}')
def from_csv(paths: list[Path], names=['timestamp', 'rh', 'pa', 'pm10', 'pm1', 'pm2_5', 'no2', 'temp0', 'temp1']) -> pd.DataFrame:
dataset_list = [pd.read_csv(path, skiprows=3, header=None, sep=' ', names=names) for path in paths]
# Merge datasets
dataset = pd.concat(dataset_list)
for col in dataset.columns.tolist():
dataset[col] = dataset[col].astype(float)
# Reset the shit index, otherwise you will get duplicate!
return dataset.reset_index(drop=True)
......@@ -12,7 +24,19 @@ def apply_offset(dataset, offset):
dataset.timestamp += offset
return dataset
def rmse(y_actual, y_predicted):
MSE = np.square(np.subtract(y_actual,y_predicted)).mean()
return math.sqrt(MSE)
def evaluate_results(refint, scor5, scor4, scor3, scor2, sensor_data, reftst, tE, save_to=None):
corr_matrix = np.corrcoef(refint, scor5)
corr = corr_matrix[0, 1]
R_sq = corr ** 2
# How well does the reint match the observations scor5
print(f'Coefficient of determination: {R_sq}')
def sensor_data():
pass
\ No newline at end of file
......@@ -2,7 +2,8 @@ import math
import numpy as np
import pywt
from denoising import wavelets
from typing import (List, Literal)
from typing import Literal
......@@ -33,7 +34,7 @@ def apply_threshold(dataset: np.array, threshold):
else:
loc1 = max(lt)
loc2 = min(gt)
loc = [loc1, loc2]
loc = np.array([loc1, loc2])
dataset[idtmp, jj] = dataset[loc, jj].mean()
return dataset
......@@ -47,9 +48,9 @@ def apply_delta_relative_threshold(dataset, reftst, threshold):
return dataset
def apply_wavelets_despiking(signal: np.array, sfr, wid: np.array, L: float,
wname: Literal['bior1.5','bior1.3','sym2','db2','haar'],
option:Literal['drop', 'reset']='reset'):
def get_wavelet_spikes(signal: np.array, sfr, wid: np.array, L: float,
wname: Literal['bior1.5','bior1.3','sym2','db2','haar'],
option:Literal['drop', 'reset']='reset'):
# Number of time samples
nt = signal.shape[1]
# Make sure signal is zero-mean
......@@ -73,46 +74,25 @@ def apply_wavelets_despiking(signal: np.array, sfr, wid: np.array, L: float,
# [ms] The refractory period -- can not resolve spikes that are closer than refract
refract = 1.5 * wid[1]
# print(f'{refract=}')
refract = round(refract * sfr)
# print(f'{refract=}')
merge = wid.mean()
merge = round(merge * sfr)
# print(f'{merge=}')
# discard spikes that are located at first and last sample
is_spike[0, [0, -1]] = False
ind_ones = np.argmax(is_spike[0, :] == True)
ind_ones_is_empty = not ind_ones.any()
if not ind_ones_is_empty:
# there will be 1 followed by -1 for each spike
temp = np.diff(is_spike)
# nominal number of spikes
nsp = np.count_nonzero(temp == 1)
# index of the beginning of a spike
lead_t = np.where(temp == 1)[1]
# index of the end of the spike
lag_t = np.where(temp == -1)[1]
# print(f'{temp=}, {nsp=}, {lead_t=}, {lag_t=}')
tE = np.zeros(nsp, dtype=int)
for i in range(nsp):
tE[i] = np.ceil(np.array([lead_t[i], lag_t[i]]).mean())
# print(f'{tE[i]=}') # ok
# print(tE.mean())
# print(tE.min())
# print(tE.max())
# print(tE.std(ddof=1))
# print(len(tE))
for i in range(len(tE) - 1):
diff = tE[i + 1] - tE[i]
# if (diff < refract) and (diff > merge):
......@@ -123,10 +103,11 @@ def apply_wavelets_despiking(signal: np.array, sfr, wid: np.array, L: float,
elif diff <= merge:
# merge
tE[i] = np.ceil(np.array([tE[i], tE[i + 1]]).mean())
print(tE[i])
# discard
# TODO: How to implement????
tE[i + 1] = []
return tE
def get_spike_indexes(L, c, nt, nw, option, w):
......@@ -171,21 +152,14 @@ def get_spike_indexes(L, c, nt, nw, option, w):
ct[i, ind] = c[i, ind]
else:
mj = np.mean(np.abs(c[i, index])) # mean of the signal coefficients
# print(f'{mj=}')
ps = len(index) / nt # prior of spikes
# print(f'{len(index)=}')
# print(f'{ps=}')
pn = 1 - ps # prior of noise
# print(f'{pn=}')
# hypothesis testing
# eqn (7)
loge = L + math.log(pn / ps)
# eqn (6)
dth = mj / 2 + sigmaj ** 2 / mj * loge # ecision threshold
# print(f'{dth=}')
dth = mj / 2 + sigmaj ** 2 / mj * loge # decision threshold
# TODO: Check that the following line makes sense in python
dth = np.abs(dth) * (dth >= 0) # make DTh>=0
# print(f'{dth=}')
ind = np.where(np.abs(c[i, :]) > dth)[0]
ct[i, ind] = c[i, ind]
# print(f'{dth=}, {ind=}, {ct[i,ind]=}')
......@@ -198,7 +172,8 @@ def get_spike_indexes(L, c, nt, nw, option, w):
# make a union with coefficients from previous scales
Index = np.logical_or(io, Index).astype(int)
io = Index
return io
return Index
......