Commit f2f5d696 authored by Jean-Marie Lepioufle's avatar Jean-Marie Lepioufle
Browse files

first

parents
´´´shell
docker pull jmll/jupyter:0.1
docker run -p 8888:8888 jmll/jupyter:0.1
'''
from typing import Dict
from datetime import datetime
from precx.utils import get_random_alphanumeric_string
class class_model():
def __init__(self, params: dict):
self.name = params["name"]
self.params = params['param']
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Torch is using " + str(self.device))
self.filename_core = get_random_alphanumeric_string(20)
def build(self):
if self.name in dictionnary_model:
self.model = dictionnary_model[self.name](**self.params)
self.model.to(self.device)
else:
raise Exception("Model " + self.name +" not found.")
#def load_model(self, path: str):
def save(self, path: str) -> None:
if not os.path.exists(path):
os.mkdir(path)
model_path = os.path.join(path, self.filename_core + "_model.pth")
params_path = os.path.join(path, self.filename_core + ".json")
torch.save(self.model.state_dict(), model_path)
with open(params_path, "w+") as p:
json.dump(self.params, p)
#import numpy as np
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, MaxAbsScaler
import torch
class class_ts():
def __init__(self, path: str, date: list, id_start: int, id_stop: int, target: list, gap_length: int, horizon_length: int, features: list, history_length: int, ):
self.gap_length = gap_length
self.horizon_length = horizon_length
self.history_length = history_length
self.SCALED = False
self.features = features
self.target = target
tmp = pd.read_csv(path).sort_values(by=date)
self.df_features = tmp.loc[id_start: id_stop, features]
self.df_target = tmp.loc[id_start:id_stop, target]
del tmp
self.scaling_dict = {
"StandardScaler": StandardScaler(),
"RobustScaler": RobustScaler(),
"MinMaxScaler": MinMaxScaler(),
"MaxAbsScaler": MaxAbsScaler()}
# https://pytorch.org/docs/stable/data.html#torch.utils.data.Dataset
def __getitem__(self, id):
#features
tmp = self.df_features.iloc[id: self.history_length + id]
features_data = torch.from_numpy(tmp.to_numpy()).float()
del tmp
#target
tmp = self.df_target.iloc[(self.gap_length + self.history_length + id): (self.gap_length + self.horizon_length + self.history_length + id)]
target_data = torch.from_numpy(tmp.to_numpy()).float()
return features_data, target_data
# https://pytorch.org/docs/stable/data.html#torch.utils.data.Dataset
def __len__(self) -> int:
return (
len(self.df_features.index) - self.history_length - self.gap_length - self.horizon_length - 1
)
def scale(self, scaling = None):
if scaling is not None:
if self.SCALED == False:
self.scaling_fun_features = self.scaling_dict[scaling]
self.scaling_fun_features.fit(self.df_features)
tmp_df = self.scaling_fun_features.transform(self.df_features)
self.df_features = pd.DataFrame(tmp_df, index=self.df_features.index, columns=self.df_features.columns)
del tmp_df
self.scaling_fun_target = self.scaling_dict[scaling]
self.scaling_fun_target.fit(self.df_target)
tmp_df = self.scaling_fun_target.transform(self.df_target)
self.df_target = pd.DataFrame(tmp_df, index=self.df_target.index, columns=self.df_target.columns)
self.SCALED = True
else:
print('df already scaled')
else:
print("scaling parameters required")
def unscale(self, newdata=None, datatype = None):
print(datatype)
if self.SCALED == True:
tmp_df = self.scaling_fun_features.inverse_transform(self.df_features)
self.df_features = pd.DataFrame(tmp_df, index=self.df_features.index, columns=self.df_features.columns)
del tmp_df
tmp_df = self.scaling_fun_target.inverse_transform(self.df_target)
self.df_target = pd.DataFrame(tmp_df, index=self.df_target.index, columns=self.df_target.columns)
self.SCALED = False
del tmp_df
elif newdata is not None:
if isinstance(newdata, torch.Tensor):
newdata_np = newdata.numpy()
elif isinstance(newdata, pd.Series) or isinstance(newdata, pd.DataFrame):
newdata_np = newdata.values
elif isinstance(newdata, np.ndarray):
newdata_np = newdata
else:
print('instance of newdata not known')
if datatype == 'target' :
tmp = self.scaling_fun_target.inverse_transform(newdata_np)
if isinstance(newdata, torch.Tensor):
res = torch.from_numpy(tmp)
elif isinstance(newdata, pd.Series) or isinstance(newdata, pd.DataFrame):
res = pd.DataFrame(tmp_df, index=newdata.index, columns=newdata.columns)
elif isinstance(newdata, np.ndarray):
res = tmp
if datatype == 'features':
tmp = self.scaling_fun_features.inverse_transform(newdata_np)
if isinstance(newdata, torch.Tensor):
res = torch.from_numpy(tmp)
elif isinstance(newdata, pd.Series) or isinstance(newdata, pd.DataFrame):
res = pd.DataFrame(tmp_df, index=newdata.index, columns=newdata.columns)
elif isinstance(newdata, np.ndarray):
res = tmp
else:
print('datatype either target or features')
return res
else:
print('df already unscaled')
import math
import torch
# https://discuss.pytorch.org/t/rmse-loss-function/16540/3
class RMSELoss(torch.nn.Module):
def __init__(self):
super().__init__()
self.mse = torch.nn.MSELoss()
def forward(self, target: torch.Tensor, output: torch.Tensor):
return torch.sqrt(self.mse(target, output))
class MAPELoss(torch.nn.Module):
def __init__(self):
super().__init__()
def forward(self, target: torch.Tensor, output: torch.Tensor):
return torch.mean(torch.abs((target - output) / target))
# Source: https://arxiv.org/abs/1907.00235
# Compute the negative log likelihood of Gaussian Distribution
class GaussianLoss(torch.nn.Module):
def __init__(self, mu, sigma):
super(GaussianLoss, self).__init__()
self.mu = mu
self.sigma = sigma
def forward(self, x):
loss = - tdist.Normal(self.mu, self.sigma).log_prob(x)
return torch.sum(loss) / (loss.size(0) * loss.size(1))
# source: https://medium.com/the-artificial-impostor/quantile-regression-part-2-6fdbc26b2629
class QuantileLoss(torch.nn.Module):
def __init__(self, quantiles):
super().__init__()
self.quantiles = quantiles
def forward(self, preds, target):
assert not target.requires_grad
assert preds.size(0) == target.size(0)
losses = []
for i, q in enumerate(self.quantiles):
errors = target - preds[:, i]
losses.append(
torch.max(
(q - 1) * errors,
q * errors
).unsqueeze(1))
loss = torch.mean(
torch.sum(torch.cat(losses, dim=1), dim=1))
return loss
# Dilate loss function from
# Source: https://github.com/manjot4/NIPS-Reproducibility-Challenge
import numpy as np
import torch
from numba import jit
from torch.autograd import Function
class DilateLoss(torch.nn.Module):
def __init__(self, gamma=0.001, alpha=0.5):
super().__init__()
self.gamma = gamma
self.alpha = alpha
self.device = "cuda" if torch.cuda.is_available() else "cpu"
def forward(self, targets: torch.Tensor, outputs: torch.Tensor):
"""
:targets: tensor of dimension (batch_size, out_seq_len, 1)
:outputs: tensor of dimension (batch_size, out_seq_len, 1)
:returns a tuple of dimension (torch.Tensor)
"""
outputs = outputs.float()
targets = targets.float()
# outputs, targets: shape (batch_size, N_output, 1)
print("The shape of targets is :")
print(targets.shape)
if len(targets.size()) < 2:
print("begin fixed loss func")
targets = targets.unsqueeze(0)
outputs = outputs.unsqueeze(0)
outputs = outputs.unsqueeze(2)
target = targets.unsqueeze(2)
batch_size, N_output = outputs.shape[0:2]
loss_shape = 0
softdtw_batch = SoftDTWBatch.apply
D = torch.zeros((batch_size, N_output, N_output)).to(self.device)
for k in range(batch_size):
Dk = pairwise_distances(target[k, :, :].view(-1, 1), outputs[k, :, :].view(-1, 1))
D[k:k + 1, :, :] = Dk
loss_shape = softdtw_batch(D, self.gamma)
path_dtw = PathDTWBatch.apply
path = path_dtw(D, self.gamma)
Omega = pairwise_distances(torch.range(1, N_output).view(N_output, 1)).to(self.device)
loss_temporal = torch.sum(path * Omega) / (N_output * N_output)
loss = self.alpha * loss_shape + (1 - self.alpha) * loss_temporal
return loss
def pairwise_distances(x, y=None):
'''
Input: x is a Nxd matrix
y is an optional Mxd matirx
Output: dist is a NxM matrix where dist[i,j] is the square norm between x[i,:] and y[j,:]
if y is not given then use 'y=x'.
i.e. dist[i,j] = ||x[i,:]-y[j,:]||^2
'''
x_norm = (x**2).sum(1).view(-1, 1)
if y is not None:
y_t = torch.transpose(y, 0, 1)
y_norm = (y**2).sum(1).view(1, -1)
else:
y_t = torch.transpose(x, 0, 1)
y_norm = x_norm.view(1, -1)
dist = x_norm + y_norm - 2.0 * torch.mm(x, y_t)
return torch.clamp(dist, 0.0, float('inf'))
@jit(nopython=True)
def compute_softdtw(D, gamma):
N = D.shape[0]
M = D.shape[1]
R = np.zeros((N + 2, M + 2)) + 1e8
R[0, 0] = 0
for j in range(1, M + 1):
for i in range(1, N + 1):
r0 = -R[i - 1, j - 1] / gamma
r1 = -R[i - 1, j] / gamma
r2 = -R[i, j - 1] / gamma
rmax = max(max(r0, r1), r2)
rsum = np.exp(r0 - rmax) + np.exp(r1 - rmax) + np.exp(r2 - rmax)
softmin = - gamma * (np.log(rsum) + rmax)
R[i, j] = D[i - 1, j - 1] + softmin
return R
@jit(nopython=True)
def compute_softdtw_backward(D_, R, gamma):
N = D_.shape[0]
M = D_.shape[1]
D = np.zeros((N + 2, M + 2))
E = np.zeros((N + 2, M + 2))
D[1:N + 1, 1:M + 1] = D_
E[-1, -1] = 1
R[:, -1] = -1e8
R[-1, :] = -1e8
R[-1, -1] = R[-2, -2]
for j in range(M, 0, -1):
for i in range(N, 0, -1):
a0 = (R[i + 1, j] - R[i, j] - D[i + 1, j]) / gamma
b0 = (R[i, j + 1] - R[i, j] - D[i, j + 1]) / gamma
c0 = (R[i + 1, j + 1] - R[i, j] - D[i + 1, j + 1]) / gamma
a = np.exp(a0)
b = np.exp(b0)
c = np.exp(c0)
E[i, j] = E[i + 1, j] * a + E[i, j + 1] * b + E[i + 1, j + 1] * c
return E[1:N + 1, 1:M + 1]
class SoftDTWBatch(Function):
@staticmethod
def forward(ctx, D, gamma=1.0): # D.shape: [batch_size, N , N]
dev = D.device
batch_size, N, N = D.shape
gamma = torch.FloatTensor([gamma]).to(dev)
D_ = D.detach().cpu().numpy()
g_ = gamma.item()
total_loss = 0
R = torch.zeros((batch_size, N + 2, N + 2)).to(dev)
for k in range(0, batch_size): # loop over all D in the batch
Rk = torch.FloatTensor(compute_softdtw(D_[k, :, :], g_)).to(dev)
R[k:k + 1, :, :] = Rk
total_loss = total_loss + Rk[-2, -2]
ctx.save_for_backward(D, R, gamma)
return total_loss / batch_size
@staticmethod
def backward(ctx, grad_output):
dev = grad_output.device
D, R, gamma = ctx.saved_tensors
batch_size, N, N = D.shape
D_ = D.detach().cpu().numpy()
R_ = R.detach().cpu().numpy()
g_ = gamma.item()
E = torch.zeros((batch_size, N, N)).to(dev)
for k in range(batch_size):
Ek = torch.FloatTensor(compute_softdtw_backward(D_[k, :, :], R_[k, :, :], g_)).to(dev)
E[k:k + 1, :, :] = Ek
return grad_output * E, None
@jit(nopython=True)
def my_max(x, gamma):
# use the log-sum-exp trick
max_x = np.max(x)
exp_x = np.exp((x - max_x) / gamma)
Z = np.sum(exp_x)
return gamma * np.log(Z) + max_x, exp_x / Z
@jit(nopython=True)
def my_min(x, gamma):
min_x, argmax_x = my_max(-x, gamma)
return - min_x, argmax_x
@jit(nopython=True)
def my_max_hessian_product(p, z, gamma):
return (p * z - p * np.sum(p * z)) / gamma
@jit(nopython=True)
def my_min_hessian_product(p, z, gamma):
return - my_max_hessian_product(p, z, gamma)
@jit(nopython=True)
def dtw_grad(theta, gamma):
m = theta.shape[0]
n = theta.shape[1]
V = np.zeros((m + 1, n + 1))
V[:, 0] = 1e10
V[0, :] = 1e10
V[0, 0] = 0
Q = np.zeros((m + 2, n + 2, 3))
for i in range(1, m + 1):
for j in range(1, n + 1):
# theta is indexed starting from 0.
v, Q[i, j] = my_min(np.array([V[i, j - 1],
V[i - 1, j - 1],
V[i - 1, j]]), gamma)
V[i, j] = theta[i - 1, j - 1] + v
E = np.zeros((m + 2, n + 2))
E[m + 1, :] = 0
E[:, n + 1] = 0
E[m + 1, n + 1] = 1
Q[m + 1, n + 1] = 1
for i in range(m, 0, -1):
for j in range(n, 0, -1):
E[i, j] = Q[i, j + 1, 0] * E[i, j + 1] + \
Q[i + 1, j + 1, 1] * E[i + 1, j + 1] + \
Q[i + 1, j, 2] * E[i + 1, j]
return V[m, n], E[1:m + 1, 1:n + 1], Q, E
@jit(nopython=True)
def dtw_hessian_prod(theta, Z, Q, E, gamma):
m = Z.shape[0]
n = Z.shape[1]
V_dot = np.zeros((m + 1, n + 1))
V_dot[0, 0] = 0
Q_dot = np.zeros((m + 2, n + 2, 3))
for i in range(1, m + 1):
for j in range(1, n + 1):
# theta is indexed starting from 0.
V_dot[i, j] = Z[i - 1, j - 1] + \
Q[i, j, 0] * V_dot[i, j - 1] + \
Q[i, j, 1] * V_dot[i - 1, j - 1] + \
Q[i, j, 2] * V_dot[i - 1, j]
v = np.array([V_dot[i, j - 1], V_dot[i - 1, j - 1], V_dot[i - 1, j]])
Q_dot[i, j] = my_min_hessian_product(Q[i, j], v, gamma)
E_dot = np.zeros((m + 2, n + 2))
for j in range(n, 0, -1):
for i in range(m, 0, -1):
E_dot[i, j] = Q_dot[i, j + 1, 0] * E[i, j + 1] + \
Q[i, j + 1, 0] * E_dot[i, j + 1] + \
Q_dot[i + 1, j + 1, 1] * E[i + 1, j + 1] + \
Q[i + 1, j + 1, 1] * E_dot[i + 1, j + 1] + \
Q_dot[i + 1, j, 2] * E[i + 1, j] + \
Q[i + 1, j, 2] * E_dot[i + 1, j]
return V_dot[m, n], E_dot[1:m + 1, 1:n + 1]
class PathDTWBatch(Function):
@staticmethod
def forward(ctx, D, gamma): # D.shape: [batch_size, N , N]
batch_size, N, N = D.shape
device = D.device
D_cpu = D.detach().cpu().numpy()
gamma_gpu = torch.FloatTensor([gamma]).to(device)
grad_gpu = torch.zeros((batch_size, N, N)).to(device)
Q_gpu = torch.zeros((batch_size, N + 2, N + 2, 3)).to(device)
E_gpu = torch.zeros((batch_size, N + 2, N + 2)).to(device)
for k in range(0, batch_size): # loop over all D in the batch
_, grad_cpu_k, Q_cpu_k, E_cpu_k = dtw_grad(D_cpu[k, :, :], gamma)
grad_gpu[k, :, :] = torch.FloatTensor(grad_cpu_k).to(device)
Q_gpu[k, :, :, :] = torch.FloatTensor(Q_cpu_k).to(device)
E_gpu[k, :, :] = torch.FloatTensor(E_cpu_k).to(device)
ctx.save_for_backward(grad_gpu, D, Q_gpu, E_gpu, gamma_gpu)
return torch.mean(grad_gpu, dim=0)
@staticmethod
def backward(ctx, grad_output):
device = grad_output.device
grad_gpu, D_gpu, Q_gpu, E_gpu, gamma = ctx.saved_tensors
D_cpu = D_gpu.detach().cpu().numpy()
Q_cpu = Q_gpu.detach().cpu().numpy()
E_cpu = E_gpu.detach().cpu().numpy()
gamma = gamma.detach().cpu().numpy()[0]
Z = grad_output.detach().cpu().numpy()
batch_size, N, N = D_cpu.shape
Hessian = torch.zeros((batch_size, N, N)).to(device)
for k in range(0, batch_size):
_, hess_k = dtw_hessian_prod(D_cpu[k, :, :], Z, Q_cpu[k, :, :, :], E_cpu[k, :, :], gamma)
Hessian[k:k + 1, :, :] = torch.FloatTensor(hess_k).to(device)
return Hessian, None
#from precx.models.linear_regression import simple_decode
#from precx.models.transformer_basic import greedy_decode
decoder_dict = {"greedy_decode": greedy_decode, "simple_decode": simple_decode}
#from precx.models.transformer.multi_head_base import MultiAttnHeadSimple
#from precx.models.transformer.transformer_basic import SimpleTransformer, CustomTransformerDecoder
#from precx.models.transformer.transformer_xl import TransformerXL
#from precx.models.transformer.dummy_torch import DummyTorchModel
#from precx.models.lstm.lstm import LSTM_mts
#from precx.models.linear_regression.linear_regression import SimpleLinearModel
#from precx.models.da_rnn.model import DARNN
#from precx.models.autoencoder.basic_ae import AE
import torch
"""
Utility dictionaries to map a string to a class
"""
dictionnary_model = {
#"MultiAttnHeadSimple": MultiAttnHeadSimple,
#"SimpleTransformer": SimpleTransformer,
#"TransformerXL": TransformerXL,
#"DummyTorchModel": DummyTorchModel,
"LSTM_mts": LSTM_mts #,
#"SimpleLinearModel": SimpleLinearModel,
#"CustomTransformerDecoder": CustomTransformerDecoder,
#"DARNN": DARNN,
#"BasicAE": AE
}
from torch.optim import Adam, SGD
from precx.optim.optim import BertAdam
from torch.nn import MSELoss, SmoothL1Loss, PoissonNLLLoss, L1Loss
from precx.optim.optim import RMSELoss, MAPELoss
from precx.optim.dilate_loss import DilateLoss
optim_dict = {"Adam": Adam, "SGD": SGD, "BertAdam": BertAdam}
criterion_dict = {
"MSE": MSELoss,
"SmoothL1Loss": SmoothL1Loss,
"PoissonNLLLoss": PoissonNLLLoss,
"RMSE": RMSELoss,
"MAPE": MAPELoss,
"DilateLoss": DilateLoss,
"L1": L1Loss}
evaluation_dict = {"NSE": "", "MSE": ""}
import torch
from typing import Type
# TODO move decode example
class SimpleLinearModel(torch.nn.Module):
"""
A very simple baseline model to resolve some of the
difficulties with bugs in the various train/validation loops
in code. Has only two layers.
"""
def __init__(self, seq_length: int, n_time_series: int, output_seq_len=1):
super().__init__()
self.forecast_history = seq_length
self.n_time_series = n_time_series
self.initial_layer = torch.nn.Linear(n_time_series, 1)
self.output_layer = torch.nn.Linear(seq_length, output_seq_len)
self.output_len = output_seq_len
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""
x: A tensor of dimension (B, L, M) where
B is the batch size, L is the length of the
"""
x = self.initial_layer(x)
x = x.permute(0, 2, 1)
x = self.output_layer(x)
return x.view(-1, self.output_len)
def simple_decode(model: Type[torch.nn.Module],
src: torch.Tensor,
max_seq_len: int,
real_target: torch.Tensor,
start_symbol=None,
output_len=1,
device='cpu',
unsqueeze_dim=1,
meta_data=None,
use_real_target: bool = True) -> torch.Tensor:
"""
:model a PyTorch model to be used for decoding
:src the source tensor
:the max length sequence to return
:real_target the actual target values we want to forecast (don't worry they are masked)
:start_symbol used to match the function signature of greedy_decode not ever used here though.
:output_len potentially used to forecast multiple steps at once. Not implemented yet though.
:device used to to match function signature
:returns a torch.Tensor of dimension (B, max_seq_len, M)
"""
real_target = real_target.float()
real_target2 = real_target.clone()
# Use last value
ys = src[:, -1, :].unsqueeze(unsqueeze_dim)
for i in range(0, max_seq_len, output_len):
with torch.no_grad():
if meta_data: