Skip to content

Commit

Permalink
Added new translated scripts
Browse files Browse the repository at this point in the history
Files in the finished_scripts have been tested for basic functionality,
files in the translated_scripts folder have not been manually tested yet
and have been translated in bulk with a bash script
  • Loading branch information
Garic152 committed Dec 16, 2024
1 parent ab157d9 commit 2c14e14
Show file tree
Hide file tree
Showing 63 changed files with 2,358 additions and 0 deletions.
68 changes: 68 additions & 0 deletions scripts/finished_scripts/confusionMatrix.daph
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

# Computes the confusion matrix for input vectors of predictions
# and actual labels. We return both the counts and relative frequency
# (normalized by sum of true labels)
#
# .. code-block::
#
# True Labels
# 1 2
# 1 TP | FP
# Predictions ----+----
# 2 FN | TN
#
# INPUT:
# ------------------------------------------------------------------------------
# P vector of predictions (1-based, recoded)
# Y vector of actual labels (1-based, recoded)
# ------------------------------------------------------------------------------
#
# OUTPUT:
# ------------------------------------------------------------------------------
# confusionSum the confusion matrix as absolute counts
# confusionAvg the confusion matrix as relative frequencies
# ------------------------------------------------------------------------------

def m_confusionMatrix(P:matrix<f64>, Y:matrix<f64>) -> matrix<f64>, matrix<f64> {
dim = max(aggMax(Y), aggMax(P));

if (ncol(P) > 1 || ncol(Y) > 1) {
stop("confusionMatrix: Invalid input number of cols should be 1 in both P [" + as.si64(ncol(P)) + "] and Y [" + as.si64(ncol(Y)) + "]");
}


if (nrow(P) != nrow(Y)) {
stop("confusionMatrix: The number of rows have to be equal in both P [" + as.si64(nrow(P)) + "] and Y [" + as.si64(nrow(Y)) + "]");
}


if (aggMin(P) < 1 || aggMin(Y) < 1) {
stop("confusionMatrix: All Values in P and Y should be abore or equal to 1, min(P):" + aggMin(P) + " min(Y):" + aggMin(Y));
}

confusionSum = ctable(P, Y, dim, dim);
# max to avoid division by 0, in case a colum contain no entries.
confusionAvg = confusionSum / max(1, sum(confusionSum, 1));
return as.matrix<f64>(confusionSum), as.matrix<f64>(confusionAvg);
}

41 changes: 41 additions & 0 deletions scripts/finished_scripts/dist.daph
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

# Returns Euclidean distance matrix (distances between N n-dimensional points)
#
# INPUT:
# --------------------------------------------------------------------------------
# X Matrix to calculate the distance inside
# --------------------------------------------------------------------------------
#
# OUTPUT:
# -----------------------------------------------------------------------------------------------
# Y Euclidean distance matrix
# -----------------------------------------------------------------------------------------------

def m_dist(X:matrix<f64>) -> matrix<f64> {
n = as.si64(nrow(X));
s = sum(X ^ 2, 0);
Y = sqrt((-2.0) * X @ t(X) + s + t(s));
Y = replace(Y, nan, 0);
return as.matrix<f64>(Y);
}

55 changes: 55 additions & 0 deletions scripts/finished_scripts/getAccuracy.daph
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

# This builtin function compute the weighted and simple accuracy for given predictions
#
# INPUT:
# --------------------------------------------------------------------------------------
# y Ground truth (Actual Labels)
# yhat Predictions (Predicted labels)
# isWeighted Flag for weighted or non-weighted accuracy calculation
# --------------------------------------------------------------------------------------
#
# OUTPUT:
# --------------------------------------------------------------------------------------------
# accuracy accuracy of the predicted labels
# --------------------------------------------------------------------------------------------

def m_getAccuracy(y:matrix<f64>, yhat:matrix<f64>, isWeighted:bool /*= false*/) -> f64 {
accuracy = 0.0;
if ((isWeighted == false)) {
sum = sum(y == yhat);
accuracy = (sum / as.si64(nrow(y))) * 100;
} else {
n = as.si64(nrow(y));
classes = ctable(y, 1, aggMax(y), 1);
resp = fill(as.f64(0), as.si64(nrow(y)), as.si64(nrow(classes)));
resp = as.matrix<f64>(resp + t(seq(as.f64(1), as.si64(nrow(classes)), 1 <= as.si64(nrow(classes)) ? 1 : -1)));
respY = resp == y;
respYhat = resp == yhat;
pred = respY * respYhat;
classes = replace(classes, 0, 1);
accuracy = as.f64(mean(sum(pred, 1) / t(classes)) * 100);
}

return as.f64(accuracy);
}

48 changes: 48 additions & 0 deletions scripts/finished_scripts/normalize.daph
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

# Min-max normalization (a.k.a. min-max scaling) to range [0,1]. For matrices
# of positive values, this normalization preserves the input sparsity.
#
# INPUT:
# ---------------------------------------------------------------------------------------
# X Input feature matrix of shape n-by-m
# ---------------------------------------------------------------------------------------
#
# OUTPUT:
# ---------------------------------------------------------------------------------------
# Y Modified output feature matrix of shape n-by-m
# cmin Column minima of shape 1-by-m
# cmax Column maxima of shape 1-by-m
# ---------------------------------------------------------------------------------------

import "normalizeApply.daph";

def m_normalize(X:matrix<f64>) -> matrix<f64>, matrix<f64>, matrix<f64> {
# compute feature ranges for transformations
cmin = aggMin(X, 1);
cmax = aggMax(X, 1);

# normalize features to range [0,1]
Y = normalizeApply.m_normalizeApply(X, cmin, cmax);

return Y, cmin, cmax;
}
47 changes: 47 additions & 0 deletions scripts/finished_scripts/normalizeApply.daph
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

# Min-max normalization (a.k.a. min-max scaling) to range [0,1], given
# existing min-max ranges. For matrices of positive values, this normalization
# preserves the input sparsity. The validity of the provided min-max range
# and post-processing is under control of the caller.
#
# INPUT:
# ------------------------------------------------
# X Input feature matrix of shape n-by-m
# cmin Column min of shape 1-by-m
# cmax Column max of shape 1-by-m
# ------------------------------------------------
#
# OUTPUT:
# ------------------------------------------------
# Y Modified output feature matrix of shape n-by-m
# ------------------------------------------------

def m_normalizeApply(X:matrix<f64>, cmin:matrix<f64>, cmax:matrix<f64>) -> matrix<f64> {
diff = (cmax - cmin);
# avoid division by zero and divide by 1 instead
diff = replace(diff, 0, 1);
# normalize features to given range ([0,1] if indeed min/max)
Y = (X - cmin) / diff;
return as.matrix<f64>(Y);
}

6 changes: 6 additions & 0 deletions scripts/finished_scripts/raSelection.daph
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def m_raSelection(X:matrix<f64>, col:si64, op:str, val:f64) -> matrix<f64> {
I = op == "==" ? X[, col - 1] == val : op == "!=" ? X[, col - 1] != val : op == "<" ? X[, col - 1] < val : op == ">" ? X[, col - 1] > val : op == "<=" ? X[, col - 1] <= val : X[, col - 1] >= val;
Y = X[[I, ]];
return as.matrix<f64>(Y);
}

74 changes: 74 additions & 0 deletions scripts/finished_scripts/scale.daph
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

# This function scales and center individual features in the input
# matrix (column wise.) using z-score to scale the values.
# The transformation is sometimes also called scale and shift,
# but it is shifted first and then subsequently scaled.
#
# The method is not resistant to inputs containing NaN nor overflows
# of doubles, but handle it by guaranteeing that no extra NaN values
# are introduced and columns that contain NaN will not be scaled or shifted.
#
# INPUT:
# --------------------------------------------------------------------------------------
# X Input feature matrix
# center Indicates to center the feature matrix
# scale Indicates to scale the feature matrix according to z-score
# --------------------------------------------------------------------------------------
#
# OUTPUT:
# -------------------------------------------------------------------------------------------
# Out Output feature matrix scaled and shifted
# Centering The column means of the input, subtracted if Center was TRUE
# ScaleFactor The scaling of the values, to make each dimension have similar value ranges
# -------------------------------------------------------------------------------------------

def m_scale(X:matrix<f64>, center:bool /*= true*/, scale:bool /*= true*/) -> matrix<f64>, matrix<f64>, matrix<f64> {
# Allocate the Centering and ScaleFactor as empty matrices,
# to return something on the function call.
Centering = fill(as.f64(0), 1, 1);
ScaleFactor = fill(as.f64(0), 1, 1);

if (center) {
Centering = mean(X, 1);
# Replace entries with Nan with 0 to avoid introducing more NaN values.
Centering = replace(Centering, nan, 0);
X = as.matrix<f64>(X - Centering);
}


if (scale) {
N = as.si64(nrow(X));
ScaleFactor = sqrt(sum(X ^ 2, 1) / (N - 1));

# Replace entries in the scale factor that are 0 and NaN with 1.
# To avoid division by 0 or NaN, introducing NaN to the ouput.
ScaleFactor = replace(ScaleFactor, nan, 1);
ScaleFactor = replace(ScaleFactor, 0, 1);
X = as.matrix<f64>(X / ScaleFactor);
}

# assign output to the returned value.
Out = X;
return as.matrix<f64>(Out), as.matrix<f64>(Centering), as.matrix<f64>(ScaleFactor);
}

51 changes: 51 additions & 0 deletions scripts/finished_scripts/scaleApply.daph
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

# This function scales and center individual features in the input matrix (column wise.) using the input matrices.
#
# INPUT:
# ------------------------------------------------------------------------------------------
# X Input feature matrix
# Centering The column means to subtract from X (not done if empty)
# ScaleFactor The column scaling to multiply with X (not done if empty)
# ------------------------------------------------------------------------------------------
#
# OUTPUT:
# ------------------------------------------------------------------------------------
# Y Output feature matrix with K columns
# ------------------------------------------------------------------------------------

def m_scaleApply(X:matrix<f64>, Centering:matrix<f64>, ScaleFactor:matrix<f64>) -> matrix<f64> {
Y = [0.0];
if (as.si64(nrow(Centering)) > 0 && as.si64(ncol(Centering)) > 0) {
Y = X - Centering;
} else {
Y = as.matrix<f64>(X);
}


if (as.si64(nrow(ScaleFactor)) > 0 && as.si64(ncol(ScaleFactor)) > 0) {
Y = as.matrix<f64>(Y / ScaleFactor);
}

return as.matrix<f64>(Y);
}

Loading

0 comments on commit 2c14e14

Please sign in to comment.