ggerganov · abhilash1910 · Dec 13, 2024 · Dec 9, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/ggml/src/ggml-sycl/common.cpp b/ggml/src/ggml-sycl/common.cpp
@@ -11,6 +11,7 @@
 //
 
 #include "common.hpp"
+#include "ggml-impl.h"
 
 int get_current_device_id() {
   return dpct::dev_mgr::instance().current_device_id();
@@ -28,11 +29,7 @@ void* ggml_sycl_host_malloc(size_t size) try {
 
   if (err != 0) {
     // clear the error
-    fprintf(
-        stderr,
-        "WARNING: failed to allocate %.2f MB of pinned memory: %s\n",
-        size / 1024.0 / 1024.0,
-        "syclGetErrorString is not supported");
+    GGML_LOG_ERROR("WARNING: failed to allocate %.2f MB of pinned memory: %s\n", size / 1024.0 / 1024.0,    "syclGetErrorString is not supported");
     return nullptr;
   }
 
@@ -66,18 +63,12 @@ int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block
 void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
                                  const ggml_tensor *src1, ggml_tensor *dst,
                                  const ggml_sycl_op_flatten_t op) try {
-    const int64_t nrows0 = ggml_nrows(src0);
 
     const bool use_src1 = src1 != nullptr;
-    const int64_t nrows1 = use_src1 ? ggml_nrows(src1) : 1;
 
     GGML_ASSERT(!use_src1 || src1->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
     GGML_ASSERT(              dst->backend != GGML_BACKEND_TYPE_GPU_SPLIT);
 
-    ggml_tensor_extra_gpu * src0_extra =            (ggml_tensor_extra_gpu *) src0->extra;
-    ggml_tensor_extra_gpu * src1_extra = use_src1 ? (ggml_tensor_extra_gpu *) src1->extra : nullptr;
-    ggml_tensor_extra_gpu * dst_extra  =            (ggml_tensor_extra_gpu *)  dst->extra;
-
     // dd = data device
     float * src0_ddf = (float *) src0->data;
     float * src1_ddf = use_src1 ? (float *) src1->data : nullptr;

diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp
@@ -626,6 +626,7 @@ struct bin_bcast_sycl {
                     });
             }
         }
+        (void) ctx;
     }
 };
 

diff --git a/ggml/src/ggml-sycl/concat.cpp b/ggml/src/ggml-sycl/concat.cpp
@@ -47,7 +47,7 @@ static void concat_f32_dim1(const float *x, const float *y, float *dst,
   // operation
   int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
                    item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
-  if (item_ct1.get_group(1) < ne01) { // src0
+  if (item_ct1.get_group(1) < (size_t) ne01) { // src0
     int offset_src =
         nidx + item_ct1.get_group(1) * ne0 + item_ct1.get_group(0) * ne0 * ne01;
     dst[offset_dst] = x[offset_src];
@@ -70,7 +70,7 @@ static void concat_f32_dim2(const float *x, const float *y, float *dst,
   // operation
   int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
                    item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
-  if (item_ct1.get_group(0) < ne02) { // src0
+  if (item_ct1.get_group(0) < (size_t) ne02) { // src0
     int offset_src = nidx + item_ct1.get_group(1) * ne0 +
                      item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
     dst[offset_dst] = x[offset_src];

diff --git a/ggml/src/ggml-sycl/convert.cpp b/ggml/src/ggml-sycl/convert.cpp
@@ -424,7 +424,7 @@ static void convert_unary(const void * __restrict__ vx, dst_t * __restrict__ y,
     const int64_t global_id = item_ct1.get_local_id(2) + work_group_size * item_ct1.get_group(2);
 
     // make each work-item deal with more elements since sycl global range can not exceed max int
-    const src_t * x = (src_t *) vx;
+    const src_t * x = (const src_t *) vx;
     for (int64_t i = global_id; i < k; i += work_group_size * item_ct1.get_group_range(2)) {
         y[i] = x[i];
     }

diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp
@@ -1237,7 +1237,7 @@ namespace dpct
 
             std::map<byte_t *, allocation>::iterator get_map_iterator(const void *ptr)
             {
-                auto it = m_map.upper_bound((byte_t *)ptr);
+                auto it = m_map.upper_bound(const_cast<byte_t *>(reinterpret_cast<const byte_t *>(ptr)));
                 if (it == m_map.end())
                 {
                     // Not a virtual pointer.

diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp
@@ -237,7 +237,7 @@ void upscale_f32(const float  *x, float *dst, const int nb00, const int nb01,
     int i02 = i12 / sf2;
     int i03 = i13 / sf3;
 
-    dst[index] = *(float *)((char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
+    dst[index] = *(const float *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
 }
 
 void pad_f32(const float  *x, float *dst, const int ne0, const int ne00, const int ne01, const int ne02,
@@ -251,8 +251,7 @@ void pad_f32(const float  *x, float *dst, const int ne0, const int ne00, const i
     // operation
     int offset_dst = nidx + item_ct1.get_group(1) * ne0 +
                      item_ct1.get_group(0) * ne0 * item_ct1.get_group_range(1);
-    if (nidx < ne00 && item_ct1.get_group(1) < ne01 &&
-        item_ct1.get_group(0) < ne02) {
+    if (nidx < ne00 && item_ct1.get_group(1) < (size_t) ne01 && item_ct1.get_group(0) < (size_t) ne02) {
         int offset_src = nidx + item_ct1.get_group(1) * ne00 +
                          item_ct1.get_group(0) * ne00 * ne01;
             dst[offset_dst] = x[offset_src];
@@ -523,6 +522,7 @@ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, const ggml_tensor
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -538,6 +538,7 @@ inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
                                     const ggml_tensor *src1, ggml_tensor *dst,
@@ -553,6 +554,7 @@ inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -567,6 +569,7 @@ inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -582,6 +585,7 @@ inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -598,6 +602,7 @@ inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -613,6 +618,7 @@ inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_t
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -628,6 +634,7 @@ inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -643,6 +650,7 @@ inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -658,6 +666,7 @@ inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_ten
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -673,6 +682,7 @@ inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -688,6 +698,7 @@ inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -703,6 +714,7 @@ inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -718,6 +730,7 @@ inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -733,6 +746,7 @@ inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -752,6 +766,7 @@ inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -767,6 +782,7 @@ inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -790,6 +806,7 @@ inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_ten
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -808,6 +825,7 @@ inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor
     (void) src1;
     (void) dst;
     (void) src1_dd;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@@ -828,6 +846,7 @@ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor
     acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), src1->ne[0], src1->ne[1], src1->ne[2], nb1, nb2, offset, main_stream);
 
     (void) dst;
+    (void) ctx;
 }
 
 inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,

diff --git a/ggml/src/ggml-sycl/gemm.hpp b/ggml/src/ggml-sycl/gemm.hpp
@@ -23,6 +23,10 @@
 #include "dnnl.hpp"
 #include "dnnl_sycl.hpp"
 
+
+// TODO: Remove this when needed
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored "-Wcast-qual"
 class DnnlGemmWrapper {
 public:
     using dt = dnnl::memory::data_type;
@@ -96,6 +100,7 @@ class DnnlGemmWrapper {
     }
 };
 
+#    pragma clang diagnostic pop
 #endif
 
 #endif // GGML_SYCL_GEMM_HPP
-Original file line number
+Diff line change
@@ Expand Up / @@ -626,6 +626,7 @@ struct bin_bcast_sycl { @@
                         });
                 }
             }
+            (void) ctx;
         }
     };
@@ Expand Down @@