diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..98fa2d5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +utest_experiment/* +heatmap.png +last_test_metrics.csv +preds.csv +.ipynb_checkpoints/ +data/ +.DS_Store +*.pkl diff --git a/README.md b/README.md index bc43d28..324c4c2 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ # Graph WaveNet for Deep Spatial-Temporal Graph Modeling This is the original pytorch implementation of Graph WaveNet in the following paper: -[Graph WaveNet for Deep Spatial-Temporal Graph Modeling, IJCAI 2019] (https://arxiv.org/abs/1906.00121). +[Graph WaveNet for Deep Spatial-Temporal Graph Modeling, IJCAI 2019] (https://arxiv.org/abs/1906.00121), +with modifications presented in [Incrementally Improving Graph WaveNet Performance on Traffic Prediction] (https://arxiv.org/abs/1912.07390): +

@@ -14,9 +16,9 @@ This is the original pytorch implementation of Graph WaveNet in the following pa ## Data Preparation -### Step1: Download METR-LA and PEMS-BAY data from [Google Drive](https://drive.google.com/open?id=10FOTa6HXPqX8Pf5WRoRwcFnW9BrNZEIX) or [Baidu Yun](https://pan.baidu.com/s/14Yy9isAIZYdU__OYEQGa_g) links provided by [DCRNN](https://github.com/liyaguang/DCRNN). +1) Download METR-LA and PEMS-BAY data from [Google Drive](https://drive.google.com/open?id=10FOTa6HXPqX8Pf5WRoRwcFnW9BrNZEIX) or [Baidu Yun](https://pan.baidu.com/s/14Yy9isAIZYdU__OYEQGa_g) links provided by [DCRNN](https://github.com/liyaguang/DCRNN). -### Step2: +2) ``` # Create data directories @@ -29,40 +31,42 @@ python generate_training_data.py --output_dir=data/METR-LA --traffic_df_filename python generate_training_data.py --output_dir=data/PEMS-BAY --traffic_df_filename=data/pems-bay.h5 ``` -## Experiments -Train models configured in Table 3 of the paper. +## Train Commands +Note: train.py saves metrics to a directory specified by the `--save` arg in metrics.csv and test_metrics.csv + +Model that gets (3.00 - 3.02 Test MAE, ~2.73 Validation MAE) +``` +python train.py --cat_feat_gc --fill_zeroes --do_graph_conv --addaptadj --randomadj --es_patience 20 --save logs/baseline_v2 +``` + +Finetuning (2.99 - 3.00 MAE) +``` +python generate_training_data.py --seq_length_y 6 --output_dir data/METR-LA_12_6 +python train.py --data data/METR-LA_12_6 --cat_feat_gc --fill_zeroes --do_graph_conv --addaptadj --randomadj --es_patience 20 --save logs/front_6 +python train.py --checkpoint logs/front_6/best_model.pth --cat_feat_gc --fill_zeroes --do_graph_conv --addaptadj --randomadj --es_patience 20 --save logs/finetuned + +``` +Original Graph Wavenet Model (3.04-3.07 MAE) +``` +python train.py --clip 5 --lr_decay_rate 1. --nhid 32 --do_graph_conv --addaptadj --randomadj --save logs/baseline ``` -ep=100 -dv=cuda:0 -mkdir experiment -mkdir experiment/metr - -#identity -expid=1 -python train.py --device $dv --gcn_bool --adjtype identity --epoch $ep --expid $expid --save ./experiment/metr/metr > ./experiment/metr/train-$expid.log -rm ./experiment/metr/metr_epoch* - -#forward-only -expid=2 -python train.py --device $dv --gcn_bool --adjtype transition --epoch $ep --expid $expid --save ./experiment/metr/metr > ./experiment/metr/train-$expid.log -rm ./experiment/metr/metr_epoch* - -#adaptive-only -expid=3 -python train.py --device $dv --gcn_bool --adjtype transition --aptonly --addaptadj --randomadj --epoch $ep --expid $expid --save ./experiment/metr/metr > ./experiment/metr/train-$expid.log -rm ./experiment/metr/metr_epoch* - -#forward-backward -expid=4 -python train.py --device $dv --gcn_bool --adjtype doubletransition --epoch $ep --expid $expid --save ./experiment/metr/metr > ./experiment/metr/train-$expid.log -rm ./experiment/metr/metr_epoch* - -#forward-backward-adaptive -expid=5 -python train.py --device $dv --gcn_bool --adjtype doubletransition --addaptadj --randomadj --epoch $ep --expid $expid --save ./experiment/metr/metr > ./experiment/metr/train-$expid.log -rm ./experiment/metr/metr_epoch* +You can also train from a jupyter notebook with +```{python} +from train import main +from durbango import pickle_load +args = pickle_load('baseline_args.pkl') # manipulate these in python +args.lr_decay_rate = .97 +args.clip = 3 +args.save = 'logs/from_jupyter' +main(args) # takes roughly an hour depending on nhid, and early_stopping ``` +Train models configured in Table 3 of the original GraphWavenet paper by using the `--adjtype, --addaptadj, --aptonly` command line argument. +These flags are (somewhat) documented in util.py. + +Run unitests with `pytest` +### Possible Improvements +* move redundant `.transpose(1,3)` to dataloader or `load_dataset` diff --git a/baseline_args.pkl b/baseline_args.pkl new file mode 100644 index 0000000..b68ca90 Binary files /dev/null and b/baseline_args.pkl differ diff --git a/engine.py b/engine.py index 9fcd933..f2651df 100644 --- a/engine.py +++ b/engine.py @@ -1,43 +1,42 @@ import torch.optim as optim from model import * import util -class trainer(): - def __init__(self, scaler, in_dim, seq_length, num_nodes, nhid , dropout, lrate, wdecay, device, supports, gcn_bool, addaptadj, aptinit): - self.model = gwnet(device, num_nodes, dropout, supports=supports, gcn_bool=gcn_bool, addaptadj=addaptadj, aptinit=aptinit, in_dim=in_dim, out_dim=seq_length, residual_channels=nhid, dilation_channels=nhid, skip_channels=nhid * 8, end_channels=nhid * 16) - self.model.to(device) + +class Trainer(): + def __init__(self, model: GWNet, scaler, lrate, wdecay, clip=3, lr_decay_rate=.97): + self.model = model + self.optimizer = optim.Adam(self.model.parameters(), lr=lrate, weight_decay=wdecay) - self.loss = util.masked_mae self.scaler = scaler - self.clip = 5 + self.clip = clip + self.scheduler = optim.lr_scheduler.LambdaLR( + self.optimizer, lr_lambda=lambda epoch: lr_decay_rate ** epoch) + + @classmethod + def from_args(cls, model, scaler, args): + return cls(model, scaler, args.learning_rate, args.weight_decay, clip=args.clip, + lr_decay_rate=args.lr_decay_rate) def train(self, input, real_val): self.model.train() self.optimizer.zero_grad() input = nn.functional.pad(input,(1,0,0,0)) - output = self.model(input) - output = output.transpose(1,3) - #output = [batch_size,12,num_nodes,1] - real = torch.unsqueeze(real_val,dim=1) + output = self.model(input).transpose(1,3) # now, output = [batch_size,1,num_nodes, seq_length] predict = self.scaler.inverse_transform(output) - - loss = self.loss(predict, real, 0.0) - loss.backward() + assert predict.shape[1] == 1 + mae, mape, rmse = util.calc_metrics(predict.squeeze(1), real_val, null_val=0.0) + mae.backward() if self.clip is not None: torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) self.optimizer.step() - mape = util.masked_mape(predict,real,0.0).item() - rmse = util.masked_rmse(predict,real,0.0).item() - return loss.item(),mape,rmse + return mae.item(),mape.item(),rmse.item() def eval(self, input, real_val): self.model.eval() input = nn.functional.pad(input,(1,0,0,0)) - output = self.model(input) - output = output.transpose(1,3) - #output = [batch_size,12,num_nodes,1] + output = self.model(input).transpose(1,3) # [batch_size,seq_length,num_nodes,1] real = torch.unsqueeze(real_val,dim=1) predict = self.scaler.inverse_transform(output) - loss = self.loss(predict, real, 0.0) - mape = util.masked_mape(predict,real,0.0).item() - rmse = util.masked_rmse(predict,real,0.0).item() - return loss.item(),mape,rmse + predict = torch.clamp(predict, min=0., max=70.) + mae, mape, rmse = [x.item() for x in util.calc_metrics(predict, real, null_val=0.0)] + return mae, mape, rmse diff --git a/exp_results.py b/exp_results.py new file mode 100644 index 0000000..c86dce1 --- /dev/null +++ b/exp_results.py @@ -0,0 +1,38 @@ +"""Utilities for comparing metrics saved by train.py""" +import pandas as pd +import os +from glob import glob +import matplotlib.pyplot as plt + + +def summary(d): + try: + tr_val = pd.read_csv(f'{d}/metrics.csv', index_col=0) + tr_ser = tr_val.loc[tr_val.valid_loss.idxmin()] + tr_ser['best_epoch'] = tr_val.valid_loss.idxmin() + tr_ser['min_train_loss'] = tr_val.train_loss.min() + except FileNotFoundError: + tr_ser = pd.Series() + try: + tmet = pd.read_csv(f'{d}/test_metrics.csv', index_col=0) + tmean = tmet.add_prefix('test_').mean() + + except FileNotFoundError: + tmean = pd.Series() + tab = pd.concat([tr_ser, tmean]).round(3) + return tab + +def loss_curve(d): + if 'logs' not in d: d = f'logs/{d}' + tr_val = pd.read_csv(f'{d}/metrics.csv', index_col=0) + return tr_val[['train_loss', 'valid_loss']] + + +def plot_loss_curve(log_dir): + d = loss_curve(log_dir) + ax = d.plot() + plt.axhline(d.valid_loss.min()) + print(d.valid_loss.idxmin()) + +def make_results_table(): + return pd.DataFrame({os.path.basename(c): summary(c) for c in glob('logs/*')}).T.sort_values('valid_loss') diff --git a/gen_adj_mx.py b/gen_adj_mx.py new file mode 100644 index 0000000..39253d8 --- /dev/null +++ b/gen_adj_mx.py @@ -0,0 +1,63 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import numpy as np +import pandas as pd +import pickle + + +def get_adjacency_matrix(distance_df, sensor_ids, normalized_k=0.1): + """ + + :param distance_df: data frame with three columns: [from, to, distance]. + :param sensor_ids: list of sensor ids. + :param normalized_k: entries that become lower than normalized_k after normalization are set to zero for sparsity. + :return: + """ + num_sensors = len(sensor_ids) + dist_mx = np.zeros((num_sensors, num_sensors), dtype=np.float32) + dist_mx[:] = np.inf + # Builds sensor id to index map. + sensor_id_to_ind = {} + for i, sensor_id in enumerate(sensor_ids): + sensor_id_to_ind[sensor_id] = i + + # Fills cells in the matrix with distances. + for row in distance_df.values: + if row[0] not in sensor_id_to_ind or row[1] not in sensor_id_to_ind: + continue + dist_mx[sensor_id_to_ind[row[0]], sensor_id_to_ind[row[1]]] = row[2] + + # Calculates the standard deviation as theta. + distances = dist_mx[~np.isinf(dist_mx)].flatten() + std = distances.std() + adj_mx = np.exp(-np.square(dist_mx / std)) + # Make the adjacent matrix symmetric by taking the max. + # adj_mx = np.maximum.reduce([adj_mx, adj_mx.T]) + + # Sets entries that lower than a threshold, i.e., k, to zero for sparsity. + adj_mx[adj_mx < normalized_k] = 0 + return sensor_ids, sensor_id_to_ind, adj_mx + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--sensor_ids_filename', type=str, default='data/sensor_graph/graph_sensor_ids.txt', + help='File containing sensor ids separated by comma.') + parser.add_argument('--distances_filename', type=str, default='data/sensor_graph/distances_la_2012.csv', + help='CSV file containing sensor distances with three columns: [from, to, distance].') + parser.add_argument('--normalized_k', type=float, default=0.1, + help='Entries that become lower than normalized_k after normalization are set to zero for sparsity.') + parser.add_argument('--output_pkl_filename', type=str, default='data/sensor_graph/adj_mat.pkl', + help='Path of the output file.') + args = parser.parse_args() + + with open(args.sensor_ids_filename) as f: + sensor_ids = f.read().strip().split(',') + distance_df = pd.read_csv(args.distances_filename, dtype={'from': 'str', 'to': 'str'}) + _, sensor_id_to_ind, adj_mx = get_adjacency_matrix(distance_df, sensor_ids, args.normalized_k) + # Save to pickle file. + with open(args.output_pkl_filename, 'wb') as f: + pickle.dump([sensor_ids, sensor_id_to_ind, adj_mx], f, protocol=2) diff --git a/model.py b/model.py index 3710878..c01ef04 100644 --- a/model.py +++ b/model.py @@ -1,208 +1,188 @@ import torch import torch.nn as nn +from torch.nn import BatchNorm2d, Conv1d, Conv2d, ModuleList, Parameter import torch.nn.functional as F -from torch.autograd import Variable -import sys -class nconv(nn.Module): - def __init__(self): - super(nconv,self).__init__() +def nconv(x, A): + """Multiply x by adjacency matrix along source node axis""" + return torch.einsum('ncvl,vw->ncwl', (x, A)).contiguous() - def forward(self,x, A): - x = torch.einsum('ncvl,vw->ncwl',(x,A)) - return x.contiguous() - -class linear(nn.Module): - def __init__(self,c_in,c_out): - super(linear,self).__init__() - self.mlp = torch.nn.Conv2d(c_in, c_out, kernel_size=(1, 1), padding=(0,0), stride=(1,1), bias=True) - - def forward(self,x): - return self.mlp(x) - -class gcn(nn.Module): - def __init__(self,c_in,c_out,dropout,support_len=3,order=2): - super(gcn,self).__init__() - self.nconv = nconv() - c_in = (order*support_len+1)*c_in - self.mlp = linear(c_in,c_out) +class GraphConvNet(nn.Module): + def __init__(self, c_in, c_out, dropout, support_len=3, order=2): + super().__init__() + c_in = (order * support_len + 1) * c_in + self.final_conv = Conv2d(c_in, c_out, (1, 1), padding=(0, 0), stride=(1, 1), bias=True) self.dropout = dropout self.order = order - def forward(self,x,support): + def forward(self, x, support: list): out = [x] for a in support: - x1 = self.nconv(x,a) + x1 = nconv(x, a) out.append(x1) for k in range(2, self.order + 1): - x2 = self.nconv(x1,a) + x2 = nconv(x1, a) out.append(x2) x1 = x2 - h = torch.cat(out,dim=1) - h = self.mlp(h) + h = torch.cat(out, dim=1) + h = self.final_conv(h) h = F.dropout(h, self.dropout, training=self.training) return h -class gwnet(nn.Module): - def __init__(self, device, num_nodes, dropout=0.3, supports=None, gcn_bool=True, addaptadj=True, aptinit=None, in_dim=2,out_dim=12,residual_channels=32,dilation_channels=32,skip_channels=256,end_channels=512,kernel_size=2,blocks=4,layers=2): - super(gwnet, self).__init__() +class GWNet(nn.Module): + def __init__(self, device, num_nodes, dropout=0.3, supports=None, do_graph_conv=True, + addaptadj=True, aptinit=None, in_dim=2, out_dim=12, + residual_channels=32, dilation_channels=32, cat_feat_gc=False, + skip_channels=256, end_channels=512, kernel_size=2, blocks=4, layers=2, + apt_size=10): + super().__init__() self.dropout = dropout self.blocks = blocks self.layers = layers - self.gcn_bool = gcn_bool + self.do_graph_conv = do_graph_conv + self.cat_feat_gc = cat_feat_gc self.addaptadj = addaptadj - self.filter_convs = nn.ModuleList() - self.gate_convs = nn.ModuleList() - self.residual_convs = nn.ModuleList() - self.skip_convs = nn.ModuleList() - self.bn = nn.ModuleList() - self.gconv = nn.ModuleList() - self.start_conv = nn.Conv2d(in_channels=in_dim, - out_channels=residual_channels, - kernel_size=(1,1)) - self.supports = supports + if self.cat_feat_gc: + self.start_conv = nn.Conv2d(in_channels=1, # hard code to avoid errors + out_channels=residual_channels, + kernel_size=(1, 1)) + self.cat_feature_conv = nn.Conv2d(in_channels=in_dim - 1, + out_channels=residual_channels, + kernel_size=(1, 1)) + else: + self.start_conv = nn.Conv2d(in_channels=in_dim, + out_channels=residual_channels, + kernel_size=(1, 1)) + self.fixed_supports = supports or [] receptive_field = 1 - self.supports_len = 0 - if supports is not None: - self.supports_len += len(supports) - - if gcn_bool and addaptadj: + self.supports_len = len(self.fixed_supports) + if do_graph_conv and addaptadj: if aptinit is None: - if supports is None: - self.supports = [] - self.nodevec1 = nn.Parameter(torch.randn(num_nodes, 10).to(device), requires_grad=True).to(device) - self.nodevec2 = nn.Parameter(torch.randn(10, num_nodes).to(device), requires_grad=True).to(device) - self.supports_len +=1 + nodevecs = torch.randn(num_nodes, apt_size), torch.randn(apt_size, num_nodes) else: - if supports is None: - self.supports = [] - m, p, n = torch.svd(aptinit) - initemb1 = torch.mm(m[:, :10], torch.diag(p[:10] ** 0.5)) - initemb2 = torch.mm(torch.diag(p[:10] ** 0.5), n[:, :10].t()) - self.nodevec1 = nn.Parameter(initemb1, requires_grad=True).to(device) - self.nodevec2 = nn.Parameter(initemb2, requires_grad=True).to(device) - self.supports_len += 1 - + nodevecs = self.svd_init(apt_size, aptinit) + self.supports_len += 1 + self.nodevec1, self.nodevec2 = [Parameter(n.to(device), requires_grad=True) for n in nodevecs] + depth = list(range(blocks * layers)) + # 1x1 convolution for residual and skip connections (slightly different see docstring) + self.residual_convs = ModuleList([Conv1d(dilation_channels, residual_channels, (1, 1)) for _ in depth]) + self.skip_convs = ModuleList([Conv1d(dilation_channels, skip_channels, (1, 1)) for _ in depth]) + self.bn = ModuleList([BatchNorm2d(residual_channels) for _ in depth]) + self.graph_convs = ModuleList([GraphConvNet(dilation_channels, residual_channels, dropout, support_len=self.supports_len) + for _ in depth]) + self.filter_convs = ModuleList() + self.gate_convs = ModuleList() for b in range(blocks): additional_scope = kernel_size - 1 - new_dilation = 1 + D = 1 # dilation for i in range(layers): # dilated convolutions - self.filter_convs.append(nn.Conv2d(in_channels=residual_channels, - out_channels=dilation_channels, - kernel_size=(1,kernel_size),dilation=new_dilation)) - - self.gate_convs.append(nn.Conv1d(in_channels=residual_channels, - out_channels=dilation_channels, - kernel_size=(1, kernel_size), dilation=new_dilation)) - - # 1x1 convolution for residual connection - self.residual_convs.append(nn.Conv1d(in_channels=dilation_channels, - out_channels=residual_channels, - kernel_size=(1, 1))) - - # 1x1 convolution for skip connection - self.skip_convs.append(nn.Conv1d(in_channels=dilation_channels, - out_channels=skip_channels, - kernel_size=(1, 1))) - self.bn.append(nn.BatchNorm2d(residual_channels)) - new_dilation *=2 + self.filter_convs.append(Conv2d(residual_channels, dilation_channels, (1, kernel_size), dilation=D)) + self.gate_convs.append(Conv1d(residual_channels, dilation_channels, (1, kernel_size), dilation=D)) + D *= 2 receptive_field += additional_scope additional_scope *= 2 - if self.gcn_bool: - self.gconv.append(gcn(dilation_channels,residual_channels,dropout,support_len=self.supports_len)) - - - - self.end_conv_1 = nn.Conv2d(in_channels=skip_channels, - out_channels=end_channels, - kernel_size=(1,1), - bias=True) - - self.end_conv_2 = nn.Conv2d(in_channels=end_channels, - out_channels=out_dim, - kernel_size=(1,1), - bias=True) - self.receptive_field = receptive_field - - - def forward(self, input): - in_len = input.size(3) - if in_len dilate -|----| * ----|-- 1x1 -- + --> *input* - # |-- conv -- sigm --| | + # | |-dil_conv -- tanh --| | + # ---| * ----|-- 1x1 -- + --> *x_in* + # |-dil_conv -- sigm --| | # 1x1 # | # ---------------------------------------> + -------------> *skip* - - #(dilation, init_dilation) = self.dilations[i] - - #residual = dilation_func(x, dilation, init_dilation, i) residual = x # dilated convolution - filter = self.filter_convs[i](residual) - filter = torch.tanh(filter) - gate = self.gate_convs[i](residual) - gate = torch.sigmoid(gate) + filter = torch.tanh(self.filter_convs[i](residual)) + gate = torch.sigmoid(self.gate_convs[i](residual)) x = filter * gate - # parametrized skip connection - - s = x - s = self.skip_convs[i](s) - try: - skip = skip[:, :, :, -s.size(3):] + s = self.skip_convs[i](x) # what are we skipping?? + try: # if i > 0 this works + skip = skip[:, :, :, -s.size(3):] # TODO(SS): Mean/Max Pool? except: skip = 0 skip = s + skip + if i == (self.blocks * self.layers - 1): # last X getting ignored anyway + break - - if self.gcn_bool and self.supports is not None: - if self.addaptadj: - x = self.gconv[i](x, new_supports) - else: - x = self.gconv[i](x,self.supports) + if self.do_graph_conv: + graph_out = self.graph_convs[i](x, adjacency_matrices) + x = x + graph_out if self.cat_feat_gc else graph_out else: x = self.residual_convs[i](x) - - x = x + residual[:, :, :, -x.size(3):] - - + x = x + residual[:, :, :, -x.size(3):] # TODO(SS): Mean/Max Pool? x = self.bn[i](x) - x = F.relu(skip) + x = F.relu(skip) # ignore last X? x = F.relu(self.end_conv_1(x)) - x = self.end_conv_2(x) + x = self.end_conv_2(x) # downsample to (bs, seq_length, 207, nfeatures) return x diff --git a/requirements.txt b/requirements.txt index 6ac2976..af87cb5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,9 @@ +torch>=1.3.1 matplotlib numpy scipy pandas -torch argparse +durbango +fastprogress +pytest diff --git a/test.py b/test.py index 3d0d15e..f874512 100644 --- a/test.py +++ b/test.py @@ -1,111 +1,42 @@ import util -import argparse from model import * import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns -parser = argparse.ArgumentParser() -parser.add_argument('--device',type=str,default='cuda:3',help='') -parser.add_argument('--data',type=str,default='data/METR-LA',help='data path') -parser.add_argument('--adjdata',type=str,default='data/sensor_graph/adj_mx.pkl',help='adj data path') -parser.add_argument('--adjtype',type=str,default='doubletransition',help='adj type') -parser.add_argument('--gcn_bool',action='store_true',help='whether to add graph convolution layer') -parser.add_argument('--aptonly',action='store_true',help='whether only adaptive adj') -parser.add_argument('--addaptadj',action='store_true',help='whether add adaptive adj') -parser.add_argument('--randomadj',action='store_true',help='whether random initialize adaptive adj') -parser.add_argument('--seq_length',type=int,default=12,help='') -parser.add_argument('--nhid',type=int,default=32,help='') -parser.add_argument('--in_dim',type=int,default=2,help='inputs dimension') -parser.add_argument('--num_nodes',type=int,default=207,help='number of nodes') -parser.add_argument('--batch_size',type=int,default=64,help='batch size') -parser.add_argument('--learning_rate',type=float,default=0.001,help='learning rate') -parser.add_argument('--dropout',type=float,default=0.3,help='dropout rate') -parser.add_argument('--weight_decay',type=float,default=0.0001,help='weight decay rate') -parser.add_argument('--checkpoint',type=str,help='') -parser.add_argument('--plotheatmap',type=str,default='True',help='') - -args = parser.parse_args() - - - - -def main(): +def main(args, save_pred_path='preds.csv', save_metrics_path='last_test_metrics.csv', loader='test', **model_kwargs): device = torch.device(args.device) - - _, _, adj_mx = util.load_adj(args.adjdata,args.adjtype) - supports = [torch.tensor(i).to(device) for i in adj_mx] - if args.randomadj: - adjinit = None - else: - adjinit = supports[0] - - if args.aptonly: - supports = None - - model = gwnet(device, args.num_nodes, args.dropout, supports=supports, gcn_bool=args.gcn_bool, addaptadj=args.addaptadj, aptinit=adjinit) - model.to(device) + adjinit, supports = util.make_graph_inputs(args, device) + model = GWNet.from_args(args, device, supports, adjinit, **model_kwargs) model.load_state_dict(torch.load(args.checkpoint)) + model.to(device) model.eval() - - - print('model load successfully') - - dataloader = util.load_dataset(args.data, args.batch_size, args.batch_size, args.batch_size) - scaler = dataloader['scaler'] - outputs = [] - realy = torch.Tensor(dataloader['y_test']).to(device) + print('model loaded successfully') + data = util.load_dataset(args.data, args.batch_size, args.batch_size, args.batch_size, n_obs=args.n_obs, fill_zeroes=args.fill_zeroes) + scaler = data['scaler'] + realy = torch.Tensor(data[f'y_{loader}']).to(device) realy = realy.transpose(1,3)[:,0,:,:] - - for iter, (x, y) in enumerate(dataloader['test_loader'].get_iterator()): - testx = torch.Tensor(x).to(device) - testx = testx.transpose(1,3) - with torch.no_grad(): - preds = model(testx).transpose(1,3) - outputs.append(preds.squeeze()) - - yhat = torch.cat(outputs,dim=0) - yhat = yhat[:realy.size(0),...] - - - amae = [] - amape = [] - armse = [] - for i in range(12): - pred = scaler.inverse_transform(yhat[:,:,i]) - real = realy[:,:,i] - metrics = util.metric(pred,real) - log = 'Evaluate best model on test data for horizon {:d}, Test MAE: {:.4f}, Test MAPE: {:.4f}, Test RMSE: {:.4f}' - print(log.format(i+1, metrics[0], metrics[1], metrics[2])) - amae.append(metrics[0]) - amape.append(metrics[1]) - armse.append(metrics[2]) - - log = 'On average over 12 horizons, Test MAE: {:.4f}, Test MAPE: {:.4f}, Test RMSE: {:.4f}' - print(log.format(np.mean(amae),np.mean(amape),np.mean(armse))) - - - if args.plotheatmap == "True": - adp = F.softmax(F.relu(torch.mm(model.nodevec1, model.nodevec2)), dim=1) - device = torch.device('cpu') - adp.to(device) - adp = adp.cpu().detach().numpy() - adp = adp*(1/np.max(adp)) - df = pd.DataFrame(adp) - sns.heatmap(df, cmap="RdYlBu") - plt.savefig("./emb"+ '.pdf') - - y12 = realy[:,99,11].cpu().detach().numpy() - yhat12 = scaler.inverse_transform(yhat[:,99,11]).cpu().detach().numpy() - - y3 = realy[:,99,2].cpu().detach().numpy() - yhat3 = scaler.inverse_transform(yhat[:,99,2]).cpu().detach().numpy() - - df2 = pd.DataFrame({'real12':y12,'pred12':yhat12, 'real3': y3, 'pred3':yhat3}) - df2.to_csv('./wave.csv',index=False) + met_df, yhat = util.calc_tstep_metrics(model, device, data[f'{loader}_loader'], scaler, realy, args.seq_length) + df2 = util.make_pred_df(realy, yhat, scaler, args.seq_length) + met_df.to_csv(save_metrics_path) + df2.to_csv(save_pred_path, index=False) + if args.plotheatmap: plot_learned_adj_matrix(model) + return met_df, df2 + +def plot_learned_adj_matrix(model): + adp = F.softmax(F.relu(torch.mm(model.nodevec1, model.nodevec2)), dim=1) + adp = adp.cpu().detach().numpy() + adp = adp / np.max(adp) + df = pd.DataFrame(adp) + sns.heatmap(df, cmap="RdYlBu") + plt.savefig("heatmap.png") if __name__ == "__main__": - main() + parser = util.get_shared_arg_parser() + parser.add_argument('--checkpoint', type=str, help='') + parser.add_argument('--plotheatmap', action='store_true') + args = parser.parse_args() + main(args) diff --git a/test_args.pkl b/test_args.pkl new file mode 100644 index 0000000..0f8dbb8 Binary files /dev/null and b/test_args.pkl differ diff --git a/test_gwnet.py b/test_gwnet.py new file mode 100644 index 0000000..4e8022f --- /dev/null +++ b/test_gwnet.py @@ -0,0 +1,49 @@ +from train import main +import test +import unittest +from durbango import pickle_load +import pandas as pd +import os +import torch +import util +import shutil + +TRAIN_ARGS = 'test_args.pkl' +TEST_ARGS = 'test_script_args.pkl' +SAVE_DIR = 'utest_experiment/' +ARG_UPDATES = {'epochs': 1, 'n_iters': 1, 'batch_size': 2, 'n_obs': 2, + 'device': util.DEFAULT_DEVICE, 'save': SAVE_DIR, 'addaptadj': True, + 'apt_size': 2, 'nhid': 1, 'lr_decay_rate': 1., + 'in_dim': 1, 'cat_feat_gc': True, 'clip': 1, 'es_patience': 10, + 'checkpoint': '', 'fill_zeroes': False} + +MODEL_KWARGS = {'end_channels': 4, 'skip_channels': 2} +def modify_args(args, updates): + for k,v in updates.items(): + setattr(args, k, v) + return args + +class TestTrain(unittest.TestCase): + + @classmethod + def setUpClass(cls): + if os.path.exists(SAVE_DIR): + shutil.rmtree(SAVE_DIR) + os.makedirs(SAVE_DIR) + + def test_1_epoch(self): + args = modify_args(pickle_load(TRAIN_ARGS), ARG_UPDATES) + args.fp16 = '' + engine = main(args, **MODEL_KWARGS) + df = pd.read_csv(f'{args.save}/metrics.csv', index_col=0) + self.assertEqual(df.shape, (args.epochs, 6)) + test_df = pd.read_csv(f'{args.save}/test_metrics.csv', index_col=0) + self.assertEqual(test_df.shape, (12, 3)) + test_args = modify_args(pickle_load(TEST_ARGS), ARG_UPDATES) + test_args.checkpoint = SAVE_DIR + '/best_model.pth' + state_dict = torch.load(test_args.checkpoint) + self.assertTrue('nodevec1' in state_dict) + self.assertTrue(os.path.exists(test_args.checkpoint)) + new_met, new_preds = test.main(test_args, **MODEL_KWARGS) + deltas = test_df.mean() - new_met.mean() + self.assertGreaterEqual(.01, deltas.abs().max()) diff --git a/test_script_args.pkl b/test_script_args.pkl new file mode 100644 index 0000000..d276803 Binary files /dev/null and b/test_script_args.pkl differ diff --git a/train.py b/train.py index 9e1cf63..eca218c 100644 --- a/train.py +++ b/train.py @@ -1,175 +1,107 @@ import torch import numpy as np -import argparse +import pandas as pd import time import util -import matplotlib.pyplot as plt -from engine import trainer +from engine import Trainer +import os +from durbango import pickle_save +from fastprogress import progress_bar -parser = argparse.ArgumentParser() -parser.add_argument('--device',type=str,default='cuda:3',help='') -parser.add_argument('--data',type=str,default='data/METR-LA',help='data path') -parser.add_argument('--adjdata',type=str,default='data/sensor_graph/adj_mx.pkl',help='adj data path') -parser.add_argument('--adjtype',type=str,default='doubletransition',help='adj type') -parser.add_argument('--gcn_bool',action='store_true',help='whether to add graph convolution layer') -parser.add_argument('--aptonly',action='store_true',help='whether only adaptive adj') -parser.add_argument('--addaptadj',action='store_true',help='whether add adaptive adj') -parser.add_argument('--randomadj',action='store_true',help='whether random initialize adaptive adj') -parser.add_argument('--seq_length',type=int,default=12,help='') -parser.add_argument('--nhid',type=int,default=32,help='') -parser.add_argument('--in_dim',type=int,default=2,help='inputs dimension') -parser.add_argument('--num_nodes',type=int,default=207,help='number of nodes') -parser.add_argument('--batch_size',type=int,default=64,help='batch size') -parser.add_argument('--learning_rate',type=float,default=0.001,help='learning rate') -parser.add_argument('--dropout',type=float,default=0.3,help='dropout rate') -parser.add_argument('--weight_decay',type=float,default=0.0001,help='weight decay rate') -parser.add_argument('--epochs',type=int,default=100,help='') -parser.add_argument('--print_every',type=int,default=50,help='') -#parser.add_argument('--seed',type=int,default=99,help='random seed') -parser.add_argument('--save',type=str,default='./garage/metr',help='save path') -parser.add_argument('--expid',type=int,default=1,help='experiment id') +from model import GWNet +from util import calc_tstep_metrics +from exp_results import summary -args = parser.parse_args() - - - -def main(): - #set seed - #torch.manual_seed(args.seed) - #np.random.seed(args.seed) - #load data +def main(args, **model_kwargs): device = torch.device(args.device) - sensor_ids, sensor_id_to_ind, adj_mx = util.load_adj(args.adjdata,args.adjtype) - dataloader = util.load_dataset(args.data, args.batch_size, args.batch_size, args.batch_size) - scaler = dataloader['scaler'] - supports = [torch.tensor(i).to(device) for i in adj_mx] - - print(args) - - if args.randomadj: - adjinit = None - else: - adjinit = supports[0] - - if args.aptonly: - supports = None - - - - engine = trainer(scaler, args.in_dim, args.seq_length, args.num_nodes, args.nhid, args.dropout, - args.learning_rate, args.weight_decay, device, supports, args.gcn_bool, args.addaptadj, - adjinit) - - - print("start training...",flush=True) - his_loss =[] - val_time = [] - train_time = [] - for i in range(1,args.epochs+1): - #if i % 10 == 0: - #lr = max(0.000002,args.learning_rate * (0.1 ** (i // 10))) - #for g in engine.optimizer.param_groups: - #g['lr'] = lr - train_loss = [] - train_mape = [] - train_rmse = [] - t1 = time.time() - dataloader['train_loader'].shuffle() - for iter, (x, y) in enumerate(dataloader['train_loader'].get_iterator()): - trainx = torch.Tensor(x).to(device) - trainx= trainx.transpose(1, 3) - trainy = torch.Tensor(y).to(device) - trainy = trainy.transpose(1, 3) - metrics = engine.train(trainx, trainy[:,0,:,:]) - train_loss.append(metrics[0]) - train_mape.append(metrics[1]) - train_rmse.append(metrics[2]) - if iter % args.print_every == 0 : - log = 'Iter: {:03d}, Train Loss: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f}' - print(log.format(iter, train_loss[-1], train_mape[-1], train_rmse[-1]),flush=True) - t2 = time.time() - train_time.append(t2-t1) - #validation - valid_loss = [] - valid_mape = [] - valid_rmse = [] - - - s1 = time.time() - for iter, (x, y) in enumerate(dataloader['val_loader'].get_iterator()): - testx = torch.Tensor(x).to(device) - testx = testx.transpose(1, 3) - testy = torch.Tensor(y).to(device) - testy = testy.transpose(1, 3) - metrics = engine.eval(testx, testy[:,0,:,:]) - valid_loss.append(metrics[0]) - valid_mape.append(metrics[1]) - valid_rmse.append(metrics[2]) - s2 = time.time() - log = 'Epoch: {:03d}, Inference Time: {:.4f} secs' - print(log.format(i,(s2-s1))) - val_time.append(s2-s1) - mtrain_loss = np.mean(train_loss) - mtrain_mape = np.mean(train_mape) - mtrain_rmse = np.mean(train_rmse) - - mvalid_loss = np.mean(valid_loss) - mvalid_mape = np.mean(valid_mape) - mvalid_rmse = np.mean(valid_rmse) - his_loss.append(mvalid_loss) - - log = 'Epoch: {:03d}, Train Loss: {:.4f}, Train MAPE: {:.4f}, Train RMSE: {:.4f}, Valid Loss: {:.4f}, Valid MAPE: {:.4f}, Valid RMSE: {:.4f}, Training Time: {:.4f}/epoch' - print(log.format(i, mtrain_loss, mtrain_mape, mtrain_rmse, mvalid_loss, mvalid_mape, mvalid_rmse, (t2 - t1)),flush=True) - torch.save(engine.model.state_dict(), args.save+"_epoch_"+str(i)+"_"+str(round(mvalid_loss,2))+".pth") - print("Average Training Time: {:.4f} secs/epoch".format(np.mean(train_time))) - print("Average Inference Time: {:.4f} secs".format(np.mean(val_time))) - - #testing - bestid = np.argmin(his_loss) - engine.model.load_state_dict(torch.load(args.save+"_epoch_"+str(bestid+1)+"_"+str(round(his_loss[bestid],2))+".pth")) - - - outputs = [] - realy = torch.Tensor(dataloader['y_test']).to(device) - realy = realy.transpose(1,3)[:,0,:,:] - - for iter, (x, y) in enumerate(dataloader['test_loader'].get_iterator()): - testx = torch.Tensor(x).to(device) - testx = testx.transpose(1,3) - with torch.no_grad(): - preds = engine.model(testx).transpose(1,3) - outputs.append(preds.squeeze()) - - yhat = torch.cat(outputs,dim=0) - yhat = yhat[:realy.size(0),...] - - - print("Training finished") - print("The valid loss on best model is", str(round(his_loss[bestid],4))) - - - amae = [] - amape = [] - armse = [] - for i in range(12): - pred = scaler.inverse_transform(yhat[:,:,i]) - real = realy[:,:,i] - metrics = util.metric(pred,real) - log = 'Evaluate best model on test data for horizon {:d}, Test MAE: {:.4f}, Test MAPE: {:.4f}, Test RMSE: {:.4f}' - print(log.format(i+1, metrics[0], metrics[1], metrics[2])) - amae.append(metrics[0]) - amape.append(metrics[1]) - armse.append(metrics[2]) - - log = 'On average over 12 horizons, Test MAE: {:.4f}, Test MAPE: {:.4f}, Test RMSE: {:.4f}' - print(log.format(np.mean(amae),np.mean(amape),np.mean(armse))) - torch.save(engine.model.state_dict(), args.save+"_exp"+str(args.expid)+"_best_"+str(round(his_loss[bestid],2))+".pth") - + data = util.load_dataset(args.data, args.batch_size, args.batch_size, args.batch_size, n_obs=args.n_obs, fill_zeroes=args.fill_zeroes) + scaler = data['scaler'] + aptinit, supports = util.make_graph_inputs(args, device) + + model = GWNet.from_args(args, device, supports, aptinit, **model_kwargs) + if args.checkpoint: + model.load_checkpoint(torch.load(args.checkpoint)) + model.to(device) + engine = Trainer.from_args(model, scaler, args) + metrics = [] + best_model_save_path = os.path.join(args.save, 'best_model.pth') + lowest_mae_yet = 100 # high value, will get overwritten + mb = progress_bar(list(range(1, args.epochs + 1))) + epochs_since_best_mae = 0 + for _ in mb: + train_loss, train_mape, train_rmse = [], [], [] + data['train_loader'].shuffle() + for iter, (x, y) in enumerate(data['train_loader'].get_iterator()): + trainx = torch.Tensor(x).to(device).transpose(1, 3) + trainy = torch.Tensor(y).to(device).transpose(1, 3) + yspeed = trainy[:, 0, :, :] + if yspeed.max() == 0: continue + mae, mape, rmse = engine.train(trainx, yspeed) + train_loss.append(mae) + train_mape.append(mape) + train_rmse.append(rmse) + if args.n_iters is not None and iter >= args.n_iters: + break + engine.scheduler.step() + _, valid_loss, valid_mape, valid_rmse = eval_(data['val_loader'], device, engine) + m = dict(train_loss=np.mean(train_loss), train_mape=np.mean(train_mape), + train_rmse=np.mean(train_rmse), valid_loss=np.mean(valid_loss), + valid_mape=np.mean(valid_mape), valid_rmse=np.mean(valid_rmse)) + + m = pd.Series(m) + metrics.append(m) + if m.valid_loss < lowest_mae_yet: + torch.save(engine.model.state_dict(), best_model_save_path) + lowest_mae_yet = m.valid_loss + epochs_since_best_mae = 0 + else: + epochs_since_best_mae += 1 + met_df = pd.DataFrame(metrics) + mb.comment = f'best val_loss: {met_df.valid_loss.min(): .3f}, current val_loss: {m.valid_loss:.3f}, current train loss: {m.train_loss: .3f}' + met_df.round(6).to_csv(f'{args.save}/metrics.csv') + if epochs_since_best_mae >= args.es_patience: break + # Metrics on test data + engine.model.load_state_dict(torch.load(best_model_save_path)) + realy = torch.Tensor(data['y_test']).transpose(1, 3)[:, 0, :, :].to(device) + test_met_df, yhat = calc_tstep_metrics(engine.model, device, data['test_loader'], scaler, realy, args.seq_length) + test_met_df.round(6).to_csv(os.path.join(args.save, 'test_metrics.csv')) + print(summary(args.save)) + +def eval_(ds, device, engine): + """Run validation.""" + valid_loss = [] + valid_mape = [] + valid_rmse = [] + s1 = time.time() + for (x, y) in ds.get_iterator(): + testx = torch.Tensor(x).to(device).transpose(1, 3) + testy = torch.Tensor(y).to(device).transpose(1, 3) + metrics = engine.eval(testx, testy[:, 0, :, :]) + valid_loss.append(metrics[0]) + valid_mape.append(metrics[1]) + valid_rmse.append(metrics[2]) + total_time = time.time() - s1 + return total_time, valid_loss, valid_mape, valid_rmse if __name__ == "__main__": + parser = util.get_shared_arg_parser() + parser.add_argument('--epochs', type=int, default=100, help='') + parser.add_argument('--clip', type=int, default=3, help='Gradient Clipping') + parser.add_argument('--weight_decay', type=float, default=0.0001, help='weight decay rate') + parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate') + parser.add_argument('--lr_decay_rate', type=float, default=0.97, help='learning rate') + parser.add_argument('--save', type=str, default='experiment', help='save path') + parser.add_argument('--n_iters', default=None, help='quit after this many iterations') + parser.add_argument('--es_patience', type=int, default=20, help='quit if no improvement after this many iterations') + + args = parser.parse_args() t1 = time.time() - main() + if not os.path.exists(args.save): + os.mkdir(args.save) + pickle_save(args, f'{args.save}/args.pkl') + main(args) t2 = time.time() - print("Total time spent: {:.4f}".format(t2-t1)) + mins = (t2 - t1) / 60 + print(f"Total time spent: {mins:.2f} seconds") diff --git a/util.py b/util.py index d41afd7..ef61743 100644 --- a/util.py +++ b/util.py @@ -1,10 +1,14 @@ +import argparse import pickle import numpy as np import os + +import pandas as pd import scipy.sparse as sp import torch from scipy.sparse import linalg +DEFAULT_DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu' class DataLoader(object): def __init__(self, xs, ys, batch_size, pad_with_last_sample=True): @@ -47,16 +51,18 @@ def _wrapper(): return _wrapper() + class StandardScaler(): - """ - Standard the input - """ - def __init__(self, mean, std): + def __init__(self, mean, std, fill_zeroes=False): self.mean = mean self.std = std + self.fill_zeroes = fill_zeroes def transform(self, data): + if self.fill_zeroes: + mask = (data == 0) + data[mask] = self.mean return (data - self.mean) / self.std def inverse_transform(self, data): @@ -121,6 +127,7 @@ def load_pickle(pickle_file): raise return pickle_data +ADJ_CHOICES = ['scalap', 'normlap', 'symnadj', 'transition', 'identity'] def load_adj(pkl_filename, adjtype): sensor_ids, sensor_id_to_ind, adj_mx = load_pickle(pkl_filename) if adjtype == "scalap": @@ -141,13 +148,16 @@ def load_adj(pkl_filename, adjtype): return sensor_ids, sensor_id_to_ind, adj -def load_dataset(dataset_dir, batch_size, valid_batch_size= None, test_batch_size=None): +def load_dataset(dataset_dir, batch_size, valid_batch_size=None, test_batch_size=None, n_obs=None, fill_zeroes=True): data = {} for category in ['train', 'val', 'test']: cat_data = np.load(os.path.join(dataset_dir, category + '.npz')) data['x_' + category] = cat_data['x'] data['y_' + category] = cat_data['y'] - scaler = StandardScaler(mean=data['x_train'][..., 0].mean(), std=data['x_train'][..., 0].std()) + if n_obs is not None: + data['x_' + category] = data['x_' + category][:n_obs] + data['y_' + category] = data['y_' + category][:n_obs] + scaler = StandardScaler(mean=data['x_train'][..., 0].mean(), std=data['x_train'][..., 0].std(), fill_zeroes=fill_zeroes) # Data format for category in ['train', 'val', 'test']: data['x_' + category][..., 0] = scaler.transform(data['x_' + category][..., 0]) @@ -157,55 +167,94 @@ def load_dataset(dataset_dir, batch_size, valid_batch_size= None, test_batch_siz data['scaler'] = scaler return data -def masked_mse(preds, labels, null_val=np.nan): + +def calc_metrics(preds, labels, null_val=0.): if np.isnan(null_val): mask = ~torch.isnan(labels) else: - mask = (labels!=null_val) + mask = (labels != null_val) mask = mask.float() - mask /= torch.mean((mask)) + mask /= torch.mean(mask) mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) - loss = (preds-labels)**2 + mse = (preds - labels) ** 2 + mae = torch.abs(preds-labels) + mape = mae / labels + mae, mape, mse = [mask_and_fillna(l, mask) for l in [mae, mape, mse]] + rmse = torch.sqrt(mse) + return mae, mape, rmse + + +def mask_and_fillna(loss, mask): loss = loss * mask loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) return torch.mean(loss) -def masked_rmse(preds, labels, null_val=np.nan): - return torch.sqrt(masked_mse(preds=preds, labels=labels, null_val=null_val)) +def calc_tstep_metrics(model, device, test_loader, scaler, realy, seq_length) -> pd.DataFrame: + model.eval() + outputs = [] + for _, (x, __) in enumerate(test_loader.get_iterator()): + testx = torch.Tensor(x).to(device).transpose(1, 3) + with torch.no_grad(): + preds = model(testx).transpose(1, 3) + outputs.append(preds.squeeze(1)) + yhat = torch.cat(outputs, dim=0)[:realy.size(0), ...] + test_met = [] -def masked_mae(preds, labels, null_val=np.nan): - if np.isnan(null_val): - mask = ~torch.isnan(labels) - else: - mask = (labels!=null_val) - mask = mask.float() - mask /= torch.mean((mask)) - mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) - loss = torch.abs(preds-labels) - loss = loss * mask - loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) - return torch.mean(loss) + for i in range(seq_length): + pred = scaler.inverse_transform(yhat[:, :, i]) + pred = torch.clamp(pred, min=0., max=70.) + real = realy[:, :, i] + test_met.append([x.item() for x in calc_metrics(pred, real)]) + test_met_df = pd.DataFrame(test_met, columns=['mae', 'mape', 'rmse']).rename_axis('t') + return test_met_df, yhat -def masked_mape(preds, labels, null_val=np.nan): - if np.isnan(null_val): - mask = ~torch.isnan(labels) - else: - mask = (labels!=null_val) - mask = mask.float() - mask /= torch.mean((mask)) - mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask) - loss = torch.abs(preds-labels)/labels - loss = loss * mask - loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss) - return torch.mean(loss) +def _to_ser(arr): + return pd.DataFrame(arr.cpu().detach().numpy()).stack().rename_axis(['obs', 'sensor_id']) + + +def make_pred_df(realy, yhat, scaler, seq_length): + df = pd.DataFrame(dict(y_last=_to_ser(realy[:, :, seq_length - 1]), + yhat_last=_to_ser(scaler.inverse_transform(yhat[:, :, seq_length - 1])), + y_3=_to_ser(realy[:, :, 2]), + yhat_3=_to_ser(scaler.inverse_transform(yhat[:, :, 2])))) + return df -def metric(pred, real): - mae = masked_mae(pred,real,0.0).item() - mape = masked_mape(pred,real,0.0).item() - rmse = masked_rmse(pred,real,0.0).item() - return mae,mape,rmse +def make_graph_inputs(args, device): + sensor_ids, sensor_id_to_ind, adj_mx = load_adj(args.adjdata, args.adjtype) + supports = [torch.tensor(i).to(device) for i in adj_mx] + aptinit = None if args.randomadj else supports[0] # ignored without do_graph_conv and add_apt_adj + if args.aptonly: + if not args.addaptadj and args.do_graph_conv: raise ValueError( + 'WARNING: not using adjacency matrix') + supports = None + return aptinit, supports +def get_shared_arg_parser(): + parser = argparse.ArgumentParser() + parser.add_argument('--device', type=str, default='cuda:0', help='') + parser.add_argument('--data', type=str, default='data/METR-LA', help='data path') + parser.add_argument('--adjdata', type=str, default='data/sensor_graph/adj_mx.pkl', + help='adj data path') + parser.add_argument('--adjtype', type=str, default='doubletransition', help='adj type', choices=ADJ_CHOICES) + parser.add_argument('--do_graph_conv', action='store_true', + help='whether to add graph convolution layer') + parser.add_argument('--aptonly', action='store_true', help='whether only adaptive adj') + parser.add_argument('--addaptadj', action='store_true', help='whether add adaptive adj') + parser.add_argument('--randomadj', action='store_true', + help='whether random initialize adaptive adj') + parser.add_argument('--seq_length', type=int, default=12, help='') + parser.add_argument('--nhid', type=int, default=40, help='Number of channels for internal conv') + parser.add_argument('--in_dim', type=int, default=2, help='inputs dimension') + parser.add_argument('--num_nodes', type=int, default=207, help='number of nodes') + parser.add_argument('--batch_size', type=int, default=64, help='batch size') + parser.add_argument('--dropout', type=float, default=0.3, help='dropout rate') + parser.add_argument('--n_obs', default=None, help='Only use this many observations. For unit testing.') + parser.add_argument('--apt_size', default=10, type=int) + parser.add_argument('--cat_feat_gc', action='store_true') + parser.add_argument('--fill_zeroes', action='store_true') + parser.add_argument('--checkpoint', type=str, help='') + return parser