Added new translated scripts

Files in the finished_scripts have been tested for basic functionality, files in the translated_scripts folder have not been manually tested yet and have been translated in bulk with a bash script
daphne-eu · Dec 16, 2024 · 2c14e14 · 2c14e14
1 parent ab157d9
commit 2c14e14
Show file tree

Hide file tree

Showing 63 changed files with 2,358 additions and 0 deletions.
diff --git a/scripts/finished_scripts/confusionMatrix.daph b/scripts/finished_scripts/confusionMatrix.daph
@@ -0,0 +1,68 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Computes the confusion matrix for input vectors of predictions
+# and actual labels. We return both the counts and relative frequency
+# (normalized by sum of true labels)
+#
+# .. code-block::
+#
+#                   True Labels
+#                     1    2
+#                 1   TP | FP
+#   Predictions      ----+----
+#                 2   FN | TN
+#
+# INPUT:
+# ------------------------------------------------------------------------------
+# P              vector of predictions (1-based, recoded)
+# Y              vector of actual labels (1-based, recoded)
+# ------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ------------------------------------------------------------------------------
+# confusionSum   the confusion matrix as absolute counts
+# confusionAvg   the confusion matrix as relative frequencies
+# ------------------------------------------------------------------------------
+
+def m_confusionMatrix(P:matrix<f64>, Y:matrix<f64>) -> matrix<f64>, matrix<f64> {
+  dim = max(aggMax(Y), aggMax(P));
+
+  if (ncol(P) > 1 || ncol(Y) > 1) {
+    stop("confusionMatrix: Invalid input number of cols should be 1 in both P [" + as.si64(ncol(P)) + "] and Y [" + as.si64(ncol(Y)) + "]");
+  }
+
+
+  if (nrow(P) != nrow(Y)) {
+    stop("confusionMatrix: The number of rows have to be equal in both P [" + as.si64(nrow(P)) + "] and Y [" + as.si64(nrow(Y)) + "]");
+  }
+
+
+  if (aggMin(P) < 1 || aggMin(Y) < 1) {
+    stop("confusionMatrix: All Values in P and Y should be abore or equal to 1, min(P):" + aggMin(P) + " min(Y):" + aggMin(Y));
+  }
+
+  confusionSum = ctable(P, Y, dim, dim);
+  # max to avoid division by 0, in case a colum contain no entries.
+  confusionAvg = confusionSum / max(1, sum(confusionSum, 1));
+  return as.matrix<f64>(confusionSum), as.matrix<f64>(confusionAvg);
+}
+
diff --git a/scripts/finished_scripts/dist.daph b/scripts/finished_scripts/dist.daph
@@ -0,0 +1,41 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Returns Euclidean distance matrix (distances between N n-dimensional points)
+#
+# INPUT:
+# --------------------------------------------------------------------------------
+# X       Matrix to calculate the distance inside
+# --------------------------------------------------------------------------------
+#
+# OUTPUT:
+# -----------------------------------------------------------------------------------------------
+# Y      Euclidean distance matrix
+# -----------------------------------------------------------------------------------------------
+
+def m_dist(X:matrix<f64>) -> matrix<f64> {
+  n = as.si64(nrow(X));
+  s = sum(X ^ 2, 0);
+  Y = sqrt((-2.0) * X @ t(X) + s + t(s));
+  Y = replace(Y, nan, 0);
+  return as.matrix<f64>(Y);
+}
+
diff --git a/scripts/finished_scripts/getAccuracy.daph b/scripts/finished_scripts/getAccuracy.daph
@@ -0,0 +1,55 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This builtin function compute the weighted and simple accuracy for given predictions
+#
+# INPUT:
+# --------------------------------------------------------------------------------------
+# y           Ground truth (Actual Labels)
+# yhat        Predictions (Predicted labels)
+# isWeighted  Flag for weighted or non-weighted accuracy calculation
+# --------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# --------------------------------------------------------------------------------------------
+# accuracy  accuracy of the predicted labels
+# --------------------------------------------------------------------------------------------
+
+def m_getAccuracy(y:matrix<f64>, yhat:matrix<f64>, isWeighted:bool /*= false*/) -> f64 {
+  accuracy = 0.0;
+  if ((isWeighted == false)) {
+    sum = sum(y == yhat);
+    accuracy = (sum / as.si64(nrow(y))) * 100;
+  } else {
+    n = as.si64(nrow(y));
+    classes = ctable(y, 1, aggMax(y), 1);
+    resp = fill(as.f64(0), as.si64(nrow(y)), as.si64(nrow(classes)));
+    resp = as.matrix<f64>(resp + t(seq(as.f64(1), as.si64(nrow(classes)), 1 <= as.si64(nrow(classes)) ? 1 : -1)));
+    respY = resp == y;
+    respYhat = resp == yhat;
+    pred = respY * respYhat;
+    classes = replace(classes, 0, 1);
+    accuracy = as.f64(mean(sum(pred, 1) / t(classes)) * 100);
+  }
+
+  return as.f64(accuracy);
+}
+
diff --git a/scripts/finished_scripts/normalize.daph b/scripts/finished_scripts/normalize.daph
@@ -0,0 +1,48 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Min-max normalization (a.k.a. min-max scaling) to range [0,1]. For matrices 
+# of positive values, this normalization preserves the input sparsity.
+#
+# INPUT:
+# ---------------------------------------------------------------------------------------
+# X     Input feature matrix of shape n-by-m
+# ---------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ---------------------------------------------------------------------------------------
+# Y     Modified output feature matrix of shape n-by-m
+# cmin  Column minima of shape 1-by-m
+# cmax  Column maxima of shape 1-by-m
+# ---------------------------------------------------------------------------------------
+
+import "normalizeApply.daph";
+
+def m_normalize(X:matrix<f64>) -> matrix<f64>, matrix<f64>, matrix<f64> {
+  # compute feature ranges for transformations
+  cmin = aggMin(X, 1);
+  cmax = aggMax(X, 1);
+
+  # normalize features to range [0,1]
+  Y = normalizeApply.m_normalizeApply(X, cmin, cmax);
+
+  return Y, cmin, cmax;
+}
diff --git a/scripts/finished_scripts/normalizeApply.daph b/scripts/finished_scripts/normalizeApply.daph
@@ -0,0 +1,47 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Min-max normalization (a.k.a. min-max scaling) to range [0,1], given 
+# existing min-max ranges. For matrices of positive values, this normalization 
+# preserves the input sparsity. The validity of the provided min-max range
+# and post-processing is under control of the caller. 
+#
+# INPUT:
+# ------------------------------------------------
+# X     Input feature matrix of shape n-by-m
+# cmin  Column min of shape 1-by-m
+# cmax  Column max of shape 1-by-m
+# ------------------------------------------------
+#
+# OUTPUT:
+# ------------------------------------------------
+# Y     Modified output feature matrix of shape n-by-m
+# ------------------------------------------------
+
+def m_normalizeApply(X:matrix<f64>, cmin:matrix<f64>, cmax:matrix<f64>) -> matrix<f64> {
+  diff = (cmax - cmin);
+  # avoid division by zero and divide by 1 instead
+  diff = replace(diff, 0, 1);
+  # normalize features to given range ([0,1] if indeed min/max)
+  Y = (X - cmin) / diff;
+  return as.matrix<f64>(Y);
+}
+
diff --git a/scripts/finished_scripts/raSelection.daph b/scripts/finished_scripts/raSelection.daph
@@ -0,0 +1,6 @@
+def m_raSelection(X:matrix<f64>, col:si64, op:str, val:f64) -> matrix<f64> {
+  I = op == "==" ? X[, col - 1] == val : op == "!=" ? X[, col - 1] != val : op == "<" ? X[, col - 1] < val : op == ">" ? X[, col - 1] > val : op == "<=" ? X[, col - 1] <= val : X[, col - 1] >= val;
+  Y = X[[I, ]];
+  return as.matrix<f64>(Y);
+}
+
diff --git a/scripts/finished_scripts/scale.daph b/scripts/finished_scripts/scale.daph
@@ -0,0 +1,74 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This function scales and center individual features in the input
+# matrix (column wise.) using z-score to scale the values.
+# The transformation is sometimes also called scale and shift,
+# but it is shifted first and then subsequently scaled.
+#
+# The method is not resistant to inputs containing NaN nor overflows
+# of doubles, but handle it by guaranteeing that no extra NaN values
+# are introduced and columns that contain NaN will not be scaled or shifted.
+#
+# INPUT:
+# --------------------------------------------------------------------------------------
+# X       Input feature matrix
+# center  Indicates to center the feature matrix
+# scale   Indicates to scale the feature matrix according to z-score
+# --------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# -------------------------------------------------------------------------------------------
+# Out          Output feature matrix scaled and shifted
+# Centering    The column means of the input, subtracted if Center was TRUE
+# ScaleFactor  The scaling of the values, to make each dimension have similar value ranges
+# -------------------------------------------------------------------------------------------
+
+def m_scale(X:matrix<f64>, center:bool /*= true*/, scale:bool /*= true*/) -> matrix<f64>, matrix<f64>, matrix<f64> {
+  # Allocate the Centering and ScaleFactor as empty matrices,
+  # to return something on the function call.
+  Centering = fill(as.f64(0), 1, 1);
+  ScaleFactor = fill(as.f64(0), 1, 1);
+
+  if (center) {
+    Centering = mean(X, 1);
+    # Replace entries with Nan with 0 to avoid introducing more NaN values.
+    Centering = replace(Centering, nan, 0);
+    X = as.matrix<f64>(X - Centering);
+  }
+
+
+  if (scale) {
+    N = as.si64(nrow(X));
+    ScaleFactor = sqrt(sum(X ^ 2, 1) / (N - 1));
+
+    # Replace entries in the scale factor that are 0 and NaN with 1.
+    # To avoid division by 0 or NaN, introducing NaN to the ouput.
+    ScaleFactor = replace(ScaleFactor, nan, 1);
+    ScaleFactor = replace(ScaleFactor, 0, 1);
+    X = as.matrix<f64>(X / ScaleFactor);
+  }
+
+  # assign output to the returned value.
+  Out = X;
+  return as.matrix<f64>(Out), as.matrix<f64>(Centering), as.matrix<f64>(ScaleFactor);
+}
+
diff --git a/scripts/finished_scripts/scaleApply.daph b/scripts/finished_scripts/scaleApply.daph
@@ -0,0 +1,51 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This function scales and center individual features in the input matrix (column wise.) using the input matrices.
+#
+# INPUT:
+# ------------------------------------------------------------------------------------------
+# X            Input feature matrix
+# Centering    The column means to subtract from X (not done if empty)
+# ScaleFactor  The column scaling to multiply with X (not done if empty)
+# ------------------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ------------------------------------------------------------------------------------
+# Y     Output feature matrix with K columns
+# ------------------------------------------------------------------------------------
+
+def m_scaleApply(X:matrix<f64>, Centering:matrix<f64>, ScaleFactor:matrix<f64>) -> matrix<f64> {
+  Y = [0.0];
+  if (as.si64(nrow(Centering)) > 0 && as.si64(ncol(Centering)) > 0) {
+    Y = X - Centering;
+  } else {
+    Y = as.matrix<f64>(X);
+  }
+
+
+  if (as.si64(nrow(ScaleFactor)) > 0 && as.si64(ncol(ScaleFactor)) > 0) {
+    Y = as.matrix<f64>(Y / ScaleFactor);
+  }
+
+  return as.matrix<f64>(Y);
+}
+