mala-lab · xuhongzuo · Mar 2, 2023 · Mar 2, 2023 · Apr 27, 2023 · Apr 27, 2023
diff --git a/README.md b/README.md
@@ -6,17 +6,11 @@
 [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/deep-isolation-forest-for-anomaly-detection/anomaly-detection-on-nb15-dos)](https://paperswithcode.com/sota/anomaly-detection-on-nb15-dos?p=deep-isolation-forest-for-anomaly-detection)
 
 
-This repository is the source code of the paper "Deep Isolation Forest for Anomaly Detection" (see full paper at https://arxiv.org/abs/2206.06602 )   
-Please consider citing our paper if you find this repository useful.  
+This repository is the source code of the paper "**Deep Isolation Forest for Anomaly Detection**" published in TKDE (April 2023).  (see full paper at https://arxiv.org/abs/2206.06602 or https://ieeexplore.ieee.org/document/10108034/ )   
+
+
+### How to use?
 
-```
-@article{xu2022deep,
-  title={Deep Isolation Forest for Anomaly Detection},
-  author={Xu, Hongzuo and Pang, Guansong and Wang, Yijie and Wang, Yongjun},
-  journal={arXiv preprint arXiv:2206.06602},
-  year={2022}
-}
-```
 
 DIF provides easy APIs like the sklearn style.
 We first instantiate the model class by giving the parameters  
@@ -27,7 +21,32 @@ from algorithms.dif import DIF
 model_configs = {'n_ensemble':50, 'n_estimators':6}
 model = DIF(**model_configs)
 model.fit(X_train)
-score = model.predict(X_test)
+score = model.decision_function(X_test)
+```
+
+:boom:**Note:** 
+- DIF is also included in our `DeepOD` python library. Please see https://github.com/xuhongzuo/DeepOD 
+- Please also see the Zhihu blog (in Chinese) https://zhuanlan.zhihu.com/p/625557221 
+
+
+### Citation
+
+Please consider citing our paper if you find this repository useful.  
+
+H. Xu, G. Pang, Y. Wang and Y. Wang, "Deep Isolation Forest for Anomaly Detection," in IEEE Transactions on Knowledge and Data Engineering, doi: 10.1109/TKDE.2023.3270293.
+
 ```
+@ARTICLE{xu2023deep,
+  author={Xu, Hongzuo and Pang, Guansong and Wang, Yijie and Wang, Yongjun},
+  journal={IEEE Transactions on Knowledge and Data Engineering}, 
+  title={Deep Isolation Forest for Anomaly Detection}, 
+  year={2023},
+  volume={},
+  number={},
+  pages={1-14},
+  doi={10.1109/TKDE.2023.3270293}}
+
+```
+
 
 ---
diff --git a/algorithms/dif.py b/algorithms/dif.py
@@ -132,6 +132,9 @@ def __init__(self, network_name='mlp', network_class=None,
             self.network_args['be_size'] = None if self.new_ensemble_method == False else self.n_ensemble
         elif network_name == 'gin':
             self.network_args['activation'] = activation
+        elif network_name == 'dilated_conv':
+            self.network_args['hidden_dim'] = hidden_dim
+            self.network_args['n_emb'] = rep_dim
         if network_class is not None:
             self.Net = network_class
         print(f'network additional parameters: {network_args}')
@@ -336,8 +339,6 @@ def set_seed(seed):
         torch.cuda.manual_seed_all(seed)
         np.random.seed(seed)
         random.seed(seed)
-        torch.backends.cudnn.benchmark = False
-        torch.backends.cudnn.deterministic = True
 
     @staticmethod
     def single_predict_abla(x_reduced, clf):

diff --git a/algorithms/net_torch.py b/algorithms/net_torch.py
@@ -19,6 +19,8 @@ def choose_net(network_name):
         return LSTMNet
     elif network_name == 'gin':
         return GinEncoderGraph
+    elif network_name == 'dilated_conv':
+        return DilatedConvEncoder
     else:
         raise NotImplementedError("")
 
@@ -229,6 +231,73 @@ def forward(self, x):
         return emb
 
 
+class SamePadConv(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, dilation=1, groups=1):
+        super().__init__()
+        self.receptive_field = (kernel_size - 1) * dilation + 1
+        padding = self.receptive_field // 2
+        self.conv = torch.nn.Conv1d(
+            in_channels, out_channels, kernel_size,
+            padding=padding,
+            dilation=dilation,
+            groups=groups
+        )
+        self.remove = 1 if self.receptive_field % 2 == 0 else 0
+
+    def forward(self, x):
+        out = self.conv(x)
+        if self.remove > 0:
+            out = out[:, :, : -self.remove]
+        return out
+
+
+class ConvBlock(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, dilation, final=False):
+        super().__init__()
+        self.conv1 = SamePadConv(in_channels, out_channels, kernel_size, dilation=dilation)
+        self.conv2 = SamePadConv(out_channels, out_channels, kernel_size, dilation=dilation)
+        self.projector = torch.nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels or final else None
+
+    def forward(self, x):
+        residual = x if self.projector is None else self.projector(x)
+        x = F.gelu(x)
+        x = self.conv1(x)
+        x = F.gelu(x)
+        x = self.conv2(x)
+        return x + residual
+
+
+class DilatedConvEncoder(torch.nn.Module):
+    def __init__(self, n_features, hidden_dim=20, n_emb=20, layers=1, kernel_size=3):
+        super().__init__()
+        self.input_fc = torch.nn.Linear(n_features, hidden_dim)
+        channels = [hidden_dim] * layers + [n_emb]
+        self.net = torch.nn.Sequential(*[
+            ConvBlock(
+                channels[i - 1] if i > 0 else hidden_dim,
+                channels[i],
+                kernel_size=kernel_size,
+                dilation=2 ** i,
+                final=(i == len(channels) - 1)
+            )
+            for i in range(len(channels))
+        ])
+        self.repr_dropout = torch.nn.Dropout(p=0.1)
+
+    def forward(self, x):
+        x = self.input_fc(x)
+        x = x.transpose(1, 2)  # B x Ch x T
+        x = self.net(x)
+        # x = self.repr_dropout(x)
+        x = x.transpose(1, 2)
+
+        x = F.max_pool1d(
+            x.transpose(1, 2),
+            kernel_size=x.size(1)
+        ).transpose(1, 2).squeeze(1)
+        return x
+
+
 class GinEncoderGraph(torch.nn.Module):
     def __init__(self, n_features, n_hidden, n_emb, n_layers,
                  pooling='sum', activation='relu'):

diff --git a/...keepFifthMARS/156_TkeepFifthMARS_test.csv → ...keepFifthMARS/156_TkeepFifthMARS_test.csv b/...keepFifthMARS/156_TkeepFifthMARS_test.csv → ...keepFifthMARS/156_TkeepFifthMARS_test.csv
diff --git a/...eepFifthMARS/156_TkeepFifthMARS_train.csv → ...eepFifthMARS/156_TkeepFifthMARS_train.csv b/...eepFifthMARS/156_TkeepFifthMARS_train.csv → ...eepFifthMARS/156_TkeepFifthMARS_train.csv
diff --git a/...keepFirstMARS/157_TkeepFirstMARS_test.csv → ...keepFirstMARS/157_TkeepFirstMARS_test.csv b/...keepFirstMARS/157_TkeepFirstMARS_test.csv → ...keepFirstMARS/157_TkeepFirstMARS_test.csv
diff --git a/...eepFirstMARS/157_TkeepFirstMARS_train.csv → ...eepFirstMARS/157_TkeepFirstMARS_train.csv b/...eepFirstMARS/157_TkeepFirstMARS_train.csv → ...eepFirstMARS/157_TkeepFirstMARS_train.csv
diff --git a/...keepForthMARS/158_TkeepForthMARS_test.csv → ...keepForthMARS/158_TkeepForthMARS_test.csv b/...keepForthMARS/158_TkeepForthMARS_test.csv → ...keepForthMARS/158_TkeepForthMARS_test.csv
diff --git a/...eepForthMARS/158_TkeepForthMARS_train.csv → ...eepForthMARS/158_TkeepForthMARS_train.csv b/...eepForthMARS/158_TkeepForthMARS_train.csv → ...eepForthMARS/158_TkeepForthMARS_train.csv
diff --git a/...epSecondMARS/159_TkeepSecondMARS_test.csv → ...epSecondMARS/159_TkeepSecondMARS_test.csv b/...epSecondMARS/159_TkeepSecondMARS_test.csv → ...epSecondMARS/159_TkeepSecondMARS_test.csv
diff --git a/...pSecondMARS/159_TkeepSecondMARS_train.csv → ...pSecondMARS/159_TkeepSecondMARS_train.csv b/...pSecondMARS/159_TkeepSecondMARS_train.csv → ...pSecondMARS/159_TkeepSecondMARS_train.csv
diff --git a/...keepThirdMARS/160_TkeepThirdMARS_test.csv → ...keepThirdMARS/160_TkeepThirdMARS_test.csv b/...keepThirdMARS/160_TkeepThirdMARS_test.csv → ...keepThirdMARS/160_TkeepThirdMARS_test.csv
diff --git a/...eepThirdMARS/160_TkeepThirdMARS_train.csv → ...eepThirdMARS/160_TkeepThirdMARS_train.csv b/...eepThirdMARS/160_TkeepThirdMARS_train.csv → ...eepThirdMARS/160_TkeepThirdMARS_train.csv