-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathxgb_online_pred.py
76 lines (32 loc) · 1.15 KB
/
xgb_online_pred.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import xgboost as xgb
import pandas as pd
# load or create your dataset
print('Load data...')
TrainFeature_best_score=pd.read_csv('train_27-29.csv')
TestFeature_best_score=pd.read_csv('test.csv')
sz = TrainFeature_best_score.shape
szpred=TestFeature_best_score.shape
print (sz)
print (szpred)
test_zuhe=list(range(2,szpred[1]))
best_score_zuhe = test_zuhe
print(best_score_zuhe)
train_X = TrainFeature_best_score.ix[:,best_score_zuhe]
train_Y = TrainFeature_best_score.ix[:,0]
print (train_X.shape)
print (train_Y.shape)
print('Start training...')
gbm = xgb.XGBClassifier(learning_rate=0.1,max_depth=8,n_estimators=250, silent=False,objective="binary:logistic")
# xgb_model=clf.fit(train_X, train_Y)
gbm.fit(train_X, train_Y)
pred_X = TestFeature_best_score.ix[:,best_score_zuhe]
print('Start predicting...')
pred_Y = gbm.predict_proba(pred_X)[:,1]
print(pred_Y)
# print('Calculate feature importances...')
# # feature importances
# print('Feature importances:', list(gbm.feature_importances_))
submssion = pd.read_csv('submission.csv')
submssion['prob']=pred_Y
submssion.to_csv('submission_xgb.csv',index=False)
print('预测完成')