-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
89 lines (80 loc) · 3.79 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""
modified from https://arxiv.org/abs/1811.00855
SR-GNN: Session-based Recommendation with graph neural networks
"""
from __future__ import division
import numpy as np
from model import *
from utils import *
import pickle
import argparse
import datetime
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', default='sample', help='dataset name: diginetica/yoochoose1_4/yoochoose1_64/sample')
parser.add_argument('--validation', action='store_true', help='validation')
parser.add_argument('--epoch', type=int, default=30, help='number of epochs to train for')
parser.add_argument('--batchSize', type=int, default=100, help='input batch size')
parser.add_argument('--hiddenSize', type=int, default=100, help='hidden state size')
parser.add_argument('--l2', type=float, default=1e-6, help='l2 penalty')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
parser.add_argument('--layers', type=int, default=1, help='sgc propogation layers')
parser.add_argument('--lr_dc', type=float, default=0.1, help='learning rate decay rate')
parser.add_argument('--lr_dc_step', type=int, default=3, help='the number of steps after which the learning rate decay')
opt = parser.parse_args()
## Loading Training Dataset
train_data = pickle.load(open('datasets/' + opt.dataset + '/train.txt', 'rb'))
## Loading Validation or Test set
if opt.validation:
train_data, test_data = split_validation(train_data, 0.1)
else:
test_data = pickle.load(open('datasets/' + opt.dataset + '/test.txt', 'rb'))
if opt.dataset == 'diginetica':
n_node = 43098
elif opt.dataset == 'yoochoose1_64' or opt.dataset == 'yoochoose1_4':
n_node = 37484
else:
n_node = 310
train_data = Data(train_data, shuffle=True)
test_data = Data(test_data, shuffle=False)
model = SGNREC(hidden_size=opt.hiddenSize, out_size=opt.hiddenSize, batch_size=opt.batchSize,
n_node=n_node, lr=opt.lr, l2=opt.l2, layers=opt.layers, lr_dc=opt.lr_dc,
decay=opt.lr_dc_step * len(train_data.inputs) / opt.batchSize)
print(opt)
best_result = [0, 0]
best_epoch = [0, 0]
for epoch in range(opt.epoch):
print('epoch: ', epoch, '===========================================')
slices = train_data.generate_batch(model.batch_size)
fetches = [model.opt, model.loss_train, model.global_step]
print('start training: ', datetime.datetime.now())
loss_ = []
for i, j in zip(slices, np.arange(len(slices))):
adj_in, adj_out, alias, item, mask, targets = train_data.get_slice(i)
_, loss, _ = model.run(fetches, targets, item, adj_in, adj_out, alias, mask)
loss_.append(loss)
loss = np.mean(loss_)
slices = test_data.generate_batch(model.batch_size)
print('start predicting: ', datetime.datetime.now())
hit, mrr, test_loss_ = [], [],[]
for i, j in zip(slices, np.arange(len(slices))):
adj_in, adj_out, alias, item, mask, targets = test_data.get_slice(i)
scores, test_loss = model.run([model.score_test, model.loss_test], targets, item, adj_in, adj_out, alias, mask)
test_loss_.append(test_loss)
index = np.argsort(scores, 1)[:, -20:]
for score, target in zip(index, targets):
hit.append(np.isin(target - 1, score))
if len(np.where(score == target - 1)[0]) == 0:
mrr.append(0)
else:
mrr.append(1 / (20-np.where(score == target - 1)[0][0]))
hit = np.mean(hit)*100
mrr = np.mean(mrr)*100
test_loss = np.mean(test_loss_)
if hit >= best_result[0]:
best_result[0] = hit
best_epoch[0] = epoch
if mrr >= best_result[1]:
best_result[1] = mrr
best_epoch[1]=epoch
print('train_loss:\t%.4f\ttest_loss:\t%4f\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d'%
(loss, test_loss, best_result[0], best_result[1], best_epoch[0], best_epoch[1]))