Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
denoising
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
aqdl
denoising
Compare revisions
486ea2c19dc7ea8cd0125a81dac1f40a861fe3e2 to 26dcca8ede8197e90d4f1ede0f59bd86b54c7a3c
Compare revisions
Changes are shown as if the
source
revision was being merged into the
target
revision.
Learn more about comparing revisions.
Source
aqdl/denoising
Select target project
No results found
26dcca8ede8197e90d4f1ede0f59bd86b54c7a3c
Select Git revision
Branches
develop
main
Swap
Target
aqdl/denoising
Select target project
aqdl/denoising
1 result
486ea2c19dc7ea8cd0125a81dac1f40a861fe3e2
Select Git revision
Branches
develop
main
Show changes
Only incoming changes from source
Include changes to target since source was created
Compare
Commits on Source (2)
Fix numerical precision bug. Peak idenfitication working.
· 95f1ee2c
Islen Vallejo
authored
2 years ago
95f1ee2c
Plotting working
· 26dcca8e
Islen Vallejo
authored
2 years ago
26dcca8e
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
denoising/__main__.py
+139
-11
139 additions, 11 deletions
denoising/__main__.py
denoising/dataset.py
+26
-2
26 additions, 2 deletions
denoising/dataset.py
denoising/outliers.py
+11
-36
11 additions, 36 deletions
denoising/outliers.py
with
176 additions
and
49 deletions
denoising/__main__.py
View file @
26dcca8e
import
matplotlib.pyplot
as
plt
import
numpy
as
np
from
pathlib
import
Path
import
sys
from
scipy.signal
import
savgol_filter
import
copy
import
plotly.graph_objects
as
go
import
json
from
scipy.interpolate
import
PchipInterpolator
from
denoising
import
(
dataset
,
outliers
)
...
...
@@ -8,6 +15,7 @@ from denoising import (dataset, outliers)
def
main
():
# Settings
srcdir
=
Path
(
'
/home/wrfadmin/PycharmProjects/hapads/denoising/data
'
)
dstdir
=
Path
(
'
/home/wrfadmin/test
'
)
sensor_data_1_txt
=
srcdir
.
joinpath
(
'
sensor_data.txt
'
)
sensor_data_2_txt
=
srcdir
.
joinpath
(
'
sensor_data_dataset_2.txt
'
)
reference_data_1_txt
=
srcdir
.
joinpath
(
'
weather_station_data.txt
'
)
...
...
@@ -30,37 +38,157 @@ def main():
# detection, the suggested value of L is close to 0.
L
=
-
0.1
wname
=
'
haar
'
plot
=
0
comment
=
1
# Load datasets
reference_data
=
dataset
.
from_csv
(
paths
=
[
reference_data_1_txt
,
reference_data_2_txt
])
# Apply offset to reference data
reference_data
=
dataset
.
apply_offset
(
reference_data
,
refence_offset
)
sensor_data
=
dataset
.
from_csv
(
paths
=
[
sensor_data_1_txt
,
sensor_data_2_txt
])
scor
=
sensor_data
[[
'
pm10
'
,
'
pm1
'
,
'
pm2_5
'
]].
values
# # OK
# dataset.check_values(scor)
scor2
=
copy
.
deepcopy
(
scor
)
# Clean sensor data using threshold
scor2
=
outliers
.
apply_threshold
(
s
ensor_data
[[
'
pm10
'
,
'
pm1
'
,
'
pm2_5
'
]].
values
,
threshold
=
thd
)
scor2
=
outliers
.
apply_threshold
(
s
cor2
,
threshold
=
thd
)
####################################################################################################################
####################################################################################################################
####################################################################################################################
# Set minumum target PM value based on cumulative data
reftst
=
scor2
.
min
(
axis
=
1
)
scor3
=
outliers
.
apply_delta_relative_threshold
(
dataset
=
scor2
[:,
-
1
],
reftst
=
reftst
,
threshold
=
thd2
)
# TODO: Not exactly how it is done in the notebook. Lets check that this is the same as scor3 = deepcopy(scor2[:, -1]) !!!!!
scor3
=
copy
.
deepcopy
(
scor2
)
scor3
=
outliers
.
apply_delta_relative_threshold
(
dataset
=
scor3
[:,
-
1
],
reftst
=
reftst
,
threshold
=
thd2
)
####################################################################################################################
####################################################################################################################
####################################################################################################################
signal
=
np
.
expand_dims
(
scor3
,
axis
=
0
)
test
=
outliers
.
apply_wavelets_despiking
(
signal
,
sfr
,
wid
,
L
,
wname
)
# OK !!!
tE
=
outliers
.
get_wavelet_spikes
(
signal
,
sfr
,
wid
,
L
,
wname
)
scor4
=
copy
.
deepcopy
(
scor3
)
for
k
in
range
(
len
(
tE
)):
scor4
[
tE
[
k
]]
=
(
np
.
array
([
scor4
[
tE
[
k
]
-
1
],
scor4
[
tE
[
k
]
+
1
]])).
mean
()
# dataset.check_values(scor4)
# Interpolate reference at sensor time
reference_interpolator
=
PchipInterpolator
(
x
=
reference_data
.
values
[:,
0
],
y
=
reference_data
.
values
[:,
5
])
refint
=
reference_interpolator
(
sensor_data
.
values
[:,
0
])
# dataset.check_values(refint)
# NOTE: Call this scor5 to avoid overwritting scor4 for control porposes
scor5
=
savgol_filter
(
x
=
scor4
,
window_length
=
5
,
# window size used for filtering
polyorder
=
2
)
# order of fitted polynomial
# dataset.check_values(scor5)
corr_matrix
=
np
.
corrcoef
(
refint
,
scor5
)
corr
=
corr_matrix
[
0
,
1
]
R_sq
=
corr
**
2
# How well does the reint match the observations scor5
print
(
f
'
Coefficient of determination:
{
R_sq
}
'
)
rmse5
=
dataset
.
rmse
(
refint
,
scor5
)
rmse3
=
dataset
.
rmse
(
refint
,
scor3
)
rmse2
=
dataset
.
rmse
(
refint
,
scor2
[:,
2
])
rmse1
=
dataset
.
rmse
(
refint
,
sensor_data
.
values
[:,
5
])
print
(
f
'
RMSE - scor5:
{
rmse5
}
'
)
print
(
f
'
RMSE - scor3:
{
rmse3
}
'
)
print
(
f
'
RMSE - scor2:
{
rmse2
}
'
)
print
(
f
'
RMSE - scor1:
{
rmse1
}
'
)
# Display results
plt
.
figure
(
figsize
=
(
30
,
8
))
plt
.
plot
(
sensor_data
.
values
[:,
0
][
0
:
1140
],
reftst
[
0
:
1140
],
'
blue
'
,
linewidth
=
2
)
plt
.
plot
(
sensor_data
.
values
[:,
0
][
0
:
1140
],
scor3
[
0
:
1140
],
'
r
'
,
linewidth
=
0.5
)
plt
.
plot
(
sensor_data
.
values
[
tE
,
0
][
0
:
48
],
scor3
[
tE
][
0
:
48
],
'
ro
'
,
mfc
=
'
none
'
)
plt
.
plot
(
sensor_data
.
values
[:,
0
][
0
:
1140
],
scor4
[
0
:
1140
],
'
c
'
)
plt
.
xlabel
(
'
Time
'
)
plt
.
ylabel
(
'
Concentration
'
)
plt
.
savefig
(
dstdir
.
joinpath
(
'
figure_1.png
'
))
plt
.
show
()
# f1 = go.Figure(
# data=[
# go.Scatter(x=sensor_data.values[:, 0][0:1140], y=reftst[0:1140], name="reftst"),
# go.Scatter(x=sensor_data.values[:, 0][0:1140], y=scor3[0:1140], name="scor3"),
# go.Scatter(x=sensor_data.values[tE, 0][0:48], y=scor3[tE][0:48], name="scor3-peaks", mode='markers', marker=dict(color='red')),
# go.Scatter(x=sensor_data.values[:, 0][0:1140], y=scor4[0:1140], name="scor3")
# ],
# layout={"xaxis": {"title": "Time"}, "yaxis": {"title": "Concentration"}, "title": ""}
# )
# f1.show()
plt
.
figure
(
figsize
=
(
30
,
8
))
plt
.
plot
(
sensor_data
.
values
[:,
0
][
0
:
1140
],
scor2
[:,
0
][
0
:
1140
],
'
k
'
,
linewidth
=
2
)
plt
.
plot
(
sensor_data
.
values
[:,
0
][
0
:
1140
],
scor2
[:,
1
][
0
:
1140
],
'
cyan
'
,
linewidth
=
1
)
plt
.
plot
(
sensor_data
.
values
[:,
0
][
0
:
1140
],
scor3
[
0
:
1140
],
'
b
'
,
linewidth
=
0.5
)
plt
.
plot
(
sensor_data
.
values
[:,
0
][
0
:
1140
],
scor2
[:,
2
][
0
:
1140
],
'
r
'
)
plt
.
xlabel
(
'
Time
'
)
plt
.
ylabel
(
'
Concentration
'
)
plt
.
savefig
(
dstdir
.
joinpath
(
'
figure_2.png
'
))
plt
.
show
()
plt
.
figure
(
figsize
=
(
30
,
8
))
plt
.
plot
(
sensor_data
.
values
[:,
0
][
0
:
1140
],
scor3
[
0
:
1140
],
'
b
'
,
linewidth
=
2
)
plt
.
plot
(
sensor_data
.
values
[:,
0
][
0
:
1140
],
scor5
[
0
:
1140
],
'
g
'
,
linewidth
=
2
)
plt
.
plot
(
sensor_data
.
values
[:,
0
][
0
:
1140
],
refint
[
0
:
1140
],
'
r
'
)
plt
.
xlabel
(
'
Time
'
)
plt
.
ylabel
(
'
Concentration
'
)
plt
.
savefig
(
dstdir
.
joinpath
(
'
figure_3.png
'
))
plt
.
show
()
plt
.
figure
(
figsize
=
(
15
,
15
))
plt
.
plot
(
refint
,
scor5
,
'
o
'
)
plt
.
xlim
([
0
,
refint
.
max
()])
plt
.
ylim
([
0
,
scor5
.
max
()])
plt
.
xlabel
(
'
Interpolated
'
)
plt
.
ylabel
(
'
Despiked
'
)
plt
.
title
(
f
'
Coefficient of determination R²:
{
R_sq
}
'
)
plt
.
savefig
(
dstdir
.
joinpath
(
'
figure_4.png
'
))
plt
.
show
()
# Save corrected results
tstamp
=
copy
.
deepcopy
(
sensor_data
.
values
[:,
0
])
xs
=
[
sensor_data
.
values
[:,
8
],
sensor_data
.
values
[:,
1
],
sensor_data
.
values
[:,
2
]
/
100.0
,
scor5
]
xr
=
np
.
zeros
((
len
(
tstamp
),
4
))
xr
[:,
0
]
=
PchipInterpolator
(
reference_data
.
values
[:,
0
],
reference_data
.
values
[:,
7
])(
tstamp
)
xr
[:,
1
]
=
PchipInterpolator
(
reference_data
.
values
[:,
0
],
reference_data
.
values
[:,
1
])(
tstamp
)
xr
[:,
2
]
=
PchipInterpolator
(
reference_data
.
values
[:,
0
],
reference_data
.
values
[:,
2
])(
tstamp
)
xr
[:,
3
]
=
refint
scor1
=
sensor_data
.
values
[:,
3
:
5
]
res
=
dict
(
xs
=
xs
,
xr
=
xr
,
scor1
=
scor1
,
scor2
=
scor2
,
ref
=
refint
,
env
=
reftst
,
rmse1
=
rmse1
,
rmse2
=
rmse2
,
rmse3
=
rmse3
,
rmse4
=
rmse5
)
# with open(dstdir.joinpath('results.json'), 'w') as outfile:
# outfile.write(json.dumps(res, indent=4))
pass
...
...
This diff is collapsed.
Click to expand it.
denoising/dataset.py
View file @
26dcca8e
import
math
import
numpy
as
np
from
pathlib
import
Path
import
pandas
as
pd
def
check_values
(
dataset
):
print
(
f
'
{
dataset
.
min
(
axis
=
0
)
=
}
'
)
print
(
f
'
{
dataset
.
mean
(
axis
=
0
)
=
}
'
)
print
(
f
'
{
dataset
.
max
(
axis
=
0
)
=
}
'
)
print
(
f
'
{
dataset
.
std
(
axis
=
0
,
ddof
=
1
)
=
}
'
)
print
(
f
'
{
dataset
.
sum
(
axis
=
0
)
=
}
'
)
def
from_csv
(
paths
:
list
[
Path
],
names
=
[
'
timestamp
'
,
'
rh
'
,
'
pa
'
,
'
pm10
'
,
'
pm1
'
,
'
pm2_5
'
,
'
no2
'
,
'
temp0
'
,
'
temp1
'
])
->
pd
.
DataFrame
:
dataset_list
=
[
pd
.
read_csv
(
path
,
skiprows
=
3
,
header
=
None
,
sep
=
'
'
,
names
=
names
)
for
path
in
paths
]
# Merge datasets
dataset
=
pd
.
concat
(
dataset_list
)
for
col
in
dataset
.
columns
.
tolist
():
dataset
[
col
]
=
dataset
[
col
].
astype
(
float
)
# Reset the shit index, otherwise you will get duplicate!
return
dataset
.
reset_index
(
drop
=
True
)
...
...
@@ -12,7 +24,19 @@ def apply_offset(dataset, offset):
dataset
.
timestamp
+=
offset
return
dataset
def
rmse
(
y_actual
,
y_predicted
):
MSE
=
np
.
square
(
np
.
subtract
(
y_actual
,
y_predicted
)).
mean
()
return
math
.
sqrt
(
MSE
)
def
evaluate_results
(
refint
,
scor5
,
scor4
,
scor3
,
scor2
,
sensor_data
,
reftst
,
tE
,
save_to
=
None
):
corr_matrix
=
np
.
corrcoef
(
refint
,
scor5
)
corr
=
corr_matrix
[
0
,
1
]
R_sq
=
corr
**
2
# How well does the reint match the observations scor5
print
(
f
'
Coefficient of determination:
{
R_sq
}
'
)
def
sensor_data
():
pass
\ No newline at end of file
This diff is collapsed.
Click to expand it.
denoising/outliers.py
View file @
26dcca8e
...
...
@@ -2,7 +2,8 @@ import math
import
numpy
as
np
import
pywt
from
denoising
import
wavelets
from
typing
import
(
List
,
Literal
)
from
typing
import
Literal
...
...
@@ -33,7 +34,7 @@ def apply_threshold(dataset: np.array, threshold):
else
:
loc1
=
max
(
lt
)
loc2
=
min
(
gt
)
loc
=
[
loc1
,
loc2
]
loc
=
np
.
array
(
[
loc1
,
loc2
]
)
dataset
[
idtmp
,
jj
]
=
dataset
[
loc
,
jj
].
mean
()
return
dataset
...
...
@@ -47,9 +48,9 @@ def apply_delta_relative_threshold(dataset, reftst, threshold):
return
dataset
def
apply
_wavelet
s_de
spik
ing
(
signal
:
np
.
array
,
sfr
,
wid
:
np
.
array
,
L
:
float
,
wname
:
Literal
[
'
bior1.5
'
,
'
bior1.3
'
,
'
sym2
'
,
'
db2
'
,
'
haar
'
],
option
:
Literal
[
'
drop
'
,
'
reset
'
]
=
'
reset
'
):
def
get
_wavelet
_
spik
es
(
signal
:
np
.
array
,
sfr
,
wid
:
np
.
array
,
L
:
float
,
wname
:
Literal
[
'
bior1.5
'
,
'
bior1.3
'
,
'
sym2
'
,
'
db2
'
,
'
haar
'
],
option
:
Literal
[
'
drop
'
,
'
reset
'
]
=
'
reset
'
):
# Number of time samples
nt
=
signal
.
shape
[
1
]
# Make sure signal is zero-mean
...
...
@@ -73,46 +74,25 @@ def apply_wavelets_despiking(signal: np.array, sfr, wid: np.array, L: float,
# [ms] The refractory period -- can not resolve spikes that are closer than refract
refract
=
1.5
*
wid
[
1
]
# print(f'{refract=}')
refract
=
round
(
refract
*
sfr
)
# print(f'{refract=}')
merge
=
wid
.
mean
()
merge
=
round
(
merge
*
sfr
)
# print(f'{merge=}')
# discard spikes that are located at first and last sample
is_spike
[
0
,
[
0
,
-
1
]]
=
False
ind_ones
=
np
.
argmax
(
is_spike
[
0
,
:]
==
True
)
ind_ones_is_empty
=
not
ind_ones
.
any
()
if
not
ind_ones_is_empty
:
# there will be 1 followed by -1 for each spike
temp
=
np
.
diff
(
is_spike
)
# nominal number of spikes
nsp
=
np
.
count_nonzero
(
temp
==
1
)
# index of the beginning of a spike
lead_t
=
np
.
where
(
temp
==
1
)[
1
]
# index of the end of the spike
lag_t
=
np
.
where
(
temp
==
-
1
)[
1
]
# print(f'{temp=}, {nsp=}, {lead_t=}, {lag_t=}')
tE
=
np
.
zeros
(
nsp
,
dtype
=
int
)
for
i
in
range
(
nsp
):
tE
[
i
]
=
np
.
ceil
(
np
.
array
([
lead_t
[
i
],
lag_t
[
i
]]).
mean
())
# print(f'{tE[i]=}') # ok
# print(tE.mean())
# print(tE.min())
# print(tE.max())
# print(tE.std(ddof=1))
# print(len(tE))
for
i
in
range
(
len
(
tE
)
-
1
):
diff
=
tE
[
i
+
1
]
-
tE
[
i
]
# if (diff < refract) and (diff > merge):
...
...
@@ -123,10 +103,11 @@ def apply_wavelets_despiking(signal: np.array, sfr, wid: np.array, L: float,
elif
diff
<=
merge
:
# merge
tE
[
i
]
=
np
.
ceil
(
np
.
array
([
tE
[
i
],
tE
[
i
+
1
]]).
mean
())
print
(
tE
[
i
])
# discard
# TODO: How to implement????
tE
[
i
+
1
]
=
[]
return
tE
def
get_spike_indexes
(
L
,
c
,
nt
,
nw
,
option
,
w
):
...
...
@@ -171,21 +152,14 @@ def get_spike_indexes(L, c, nt, nw, option, w):
ct
[
i
,
ind
]
=
c
[
i
,
ind
]
else
:
mj
=
np
.
mean
(
np
.
abs
(
c
[
i
,
index
]))
# mean of the signal coefficients
# print(f'{mj=}')
ps
=
len
(
index
)
/
nt
# prior of spikes
# print(f'{len(index)=}')
# print(f'{ps=}')
pn
=
1
-
ps
# prior of noise
# print(f'{pn=}')
# hypothesis testing
# eqn (7)
loge
=
L
+
math
.
log
(
pn
/
ps
)
# eqn (6)
dth
=
mj
/
2
+
sigmaj
**
2
/
mj
*
loge
# ecision threshold
# print(f'{dth=}')
dth
=
mj
/
2
+
sigmaj
**
2
/
mj
*
loge
# decision threshold
# TODO: Check that the following line makes sense in python
dth
=
np
.
abs
(
dth
)
*
(
dth
>=
0
)
# make DTh>=0
# print(f'{dth=}')
ind
=
np
.
where
(
np
.
abs
(
c
[
i
,
:])
>
dth
)[
0
]
ct
[
i
,
ind
]
=
c
[
i
,
ind
]
# print(f'{dth=}, {ind=}, {ct[i,ind]=}')
...
...
@@ -198,7 +172,8 @@ def get_spike_indexes(L, c, nt, nw, option, w):
# make a union with coefficients from previous scales
Index
=
np
.
logical_or
(
io
,
Index
).
astype
(
int
)
io
=
Index
return
io
return
Index
...
...
This diff is collapsed.
Click to expand it.