From 934e7c9c8ed2d1b885fdb9e68f44061ce998c115 Mon Sep 17 00:00:00 2001
From: Dzmitry Kamarouski <dzkamarouski@gmail.com>
Date: Sat, 29 Dec 2018 16:12:25 +0300
Subject: [PATCH 1/4] Update bottom_pool.cpp

---
 models/py_utils/_cpools/src/bottom_pool.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/models/py_utils/_cpools/src/bottom_pool.cpp b/models/py_utils/_cpools/src/bottom_pool.cpp
index bd6c65a..7c7f3d1 100644
--- a/models/py_utils/_cpools/src/bottom_pool.cpp
+++ b/models/py_utils/_cpools/src/bottom_pool.cpp
@@ -41,8 +41,8 @@ std::vector<at::Tensor> pool_backward(
     int32_t height  = input.size(2);
     int32_t width   = input.size(3);
 
-    auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
-    auto max_ind = at::zeros(torch::CUDA(at::kLong),  {batch, channel, width});
+    auto max_val = at::zeros({batch, channel, width}, torch::CUDA(at::kFloat));
+    auto max_ind = at::zeros({batch, channel, width}, torch::CUDA(at::kLong));
 
     auto input_temp = input.select(2, 0);
     max_val.copy_(input_temp);
@@ -54,8 +54,8 @@ std::vector<at::Tensor> pool_backward(
     output_temp.copy_(grad_output_temp);
 
     auto un_max_ind = max_ind.unsqueeze(2);
-    auto gt_mask    = at::zeros(torch::CUDA(at::kByte),  {batch, channel, width});
-    auto max_temp   = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
+    auto gt_mask    = at::zeros({batch, channel, width}, torch::CUDA(at::kByte));
+    auto max_temp   = at::zeros({batch, channel, width}, torch::CUDA(at::kFloat));
     for (int32_t ind = 0; ind < height - 1; ++ind) {
         input_temp = input.select(2, ind + 1);
         at::gt_out(gt_mask, input_temp, max_val);

From 87abfdebcb419f8f2913e1e1140bef36a528515a Mon Sep 17 00:00:00 2001
From: Dzmitry Kamarouski <dzkamarouski@gmail.com>
Date: Sat, 29 Dec 2018 16:12:53 +0300
Subject: [PATCH 2/4] Update left_pool.cpp

---
 models/py_utils/_cpools/src/left_pool.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/models/py_utils/_cpools/src/left_pool.cpp b/models/py_utils/_cpools/src/left_pool.cpp
index fbc5d98..3005d81 100644
--- a/models/py_utils/_cpools/src/left_pool.cpp
+++ b/models/py_utils/_cpools/src/left_pool.cpp
@@ -41,8 +41,8 @@ std::vector<at::Tensor> pool_backward(
     int32_t height  = input.size(2);
     int32_t width   = input.size(3);
 
-    auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
-    auto max_ind = at::zeros(torch::CUDA(at::kLong),  {batch, channel, height});
+    auto max_val = at::zeros({batch, channel, width}, torch::CUDA(at::kFloat));
+    auto max_ind = at::zeros({batch, channel, width}, torch::CUDA(at::kLong));
 
     auto input_temp = input.select(3, width - 1);
     max_val.copy_(input_temp);
@@ -54,8 +54,8 @@ std::vector<at::Tensor> pool_backward(
     output_temp.copy_(grad_output_temp);
 
     auto un_max_ind = max_ind.unsqueeze(3);
-    auto gt_mask    = at::zeros(torch::CUDA(at::kByte),  {batch, channel, height});
-    auto max_temp   = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
+    auto gt_mask    = at::zeros({batch, channel, width}, torch::CUDA(at::kByte));
+    auto max_temp   = at::zeros({batch, channel, width}, torch::CUDA(at::kFloat));
     for (int32_t ind = 1; ind < width; ++ind) {
         input_temp = input.select(3, width - ind - 1);
         at::gt_out(gt_mask, input_temp, max_val);

From a9b75d1a174e77d9d45a12185cdd260cdd33271a Mon Sep 17 00:00:00 2001
From: Dzmitry Kamarouski <dzkamarouski@gmail.com>
Date: Sat, 29 Dec 2018 16:13:13 +0300
Subject: [PATCH 3/4] Update right_pool.cpp

---
 models/py_utils/_cpools/src/right_pool.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/models/py_utils/_cpools/src/right_pool.cpp b/models/py_utils/_cpools/src/right_pool.cpp
index 36c5c85..4896945 100644
--- a/models/py_utils/_cpools/src/right_pool.cpp
+++ b/models/py_utils/_cpools/src/right_pool.cpp
@@ -41,8 +41,8 @@ std::vector<at::Tensor> pool_backward(
     int32_t height  = input.size(2);
     int32_t width   = input.size(3);
 
-    auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
-    auto max_ind = at::zeros(torch::CUDA(at::kLong),  {batch, channel, height});
+    auto max_val = at::zeros({batch, channel, width}, torch::CUDA(at::kFloat));
+    auto max_ind = at::zeros({batch, channel, width}, torch::CUDA(at::kLong));
 
     auto input_temp = input.select(3, 0);
     max_val.copy_(input_temp);
@@ -54,8 +54,8 @@ std::vector<at::Tensor> pool_backward(
     output_temp.copy_(grad_output_temp);
 
     auto un_max_ind = max_ind.unsqueeze(3);
-    auto gt_mask    = at::zeros(torch::CUDA(at::kByte),  {batch, channel, height});
-    auto max_temp   = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
+    auto gt_mask    = at::zeros({batch, channel, width}, torch::CUDA(at::kByte));
+    auto max_temp   = at::zeros({batch, channel, width}, torch::CUDA(at::kFloat));
     for (int32_t ind = 0; ind < width - 1; ++ind) {
         input_temp = input.select(3, ind + 1);
         at::gt_out(gt_mask, input_temp, max_val);

From 11673c71974356adbe5a76db136838ee56212703 Mon Sep 17 00:00:00 2001
From: Dzmitry Kamarouski <dzkamarouski@gmail.com>
Date: Sat, 29 Dec 2018 16:13:37 +0300
Subject: [PATCH 4/4] Update top_pool.cpp

---
 models/py_utils/_cpools/src/top_pool.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/models/py_utils/_cpools/src/top_pool.cpp b/models/py_utils/_cpools/src/top_pool.cpp
index 4ac287f..6e7a863 100644
--- a/models/py_utils/_cpools/src/top_pool.cpp
+++ b/models/py_utils/_cpools/src/top_pool.cpp
@@ -40,9 +40,10 @@ std::vector<at::Tensor> top_pool_backward(
     int32_t channel = input.size(1);
     int32_t height  = input.size(2);
     int32_t width   = input.size(3);
+    
+    auto max_val = at::zeros({batch, channel, width}, torch::CUDA(at::kFloat));
+    auto max_ind = at::zeros({batch, channel, width}, torch::CUDA(at::kLong));
 
-    auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
-    auto max_ind = at::zeros(torch::CUDA(at::kLong),  {batch, channel, width});
 
     auto input_temp = input.select(2, height - 1);
     max_val.copy_(input_temp);
@@ -54,8 +55,8 @@ std::vector<at::Tensor> top_pool_backward(
     output_temp.copy_(grad_output_temp);
 
     auto un_max_ind = max_ind.unsqueeze(2);
-    auto gt_mask    = at::zeros(torch::CUDA(at::kByte),  {batch, channel, width});
-    auto max_temp   = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
+    auto gt_mask    = at::zeros({batch, channel, width}, torch::CUDA(at::kByte));
+    auto max_temp   = at::zeros({batch, channel, width}, torch::CUDA(at::kFloat));
     for (int32_t ind = 1; ind < height; ++ind) {
         input_temp = input.select(2, height - ind - 1);
         at::gt_out(gt_mask, input_temp, max_val);