-
Notifications
You must be signed in to change notification settings - Fork 64
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Files in the finished_scripts have been tested for basic functionality, files in the translated_scripts folder have not been manually tested yet and have been translated in bulk with a bash script
- Loading branch information
Showing
63 changed files
with
2,358 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#------------------------------------------------------------- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
# Computes the confusion matrix for input vectors of predictions | ||
# and actual labels. We return both the counts and relative frequency | ||
# (normalized by sum of true labels) | ||
# | ||
# .. code-block:: | ||
# | ||
# True Labels | ||
# 1 2 | ||
# 1 TP | FP | ||
# Predictions ----+---- | ||
# 2 FN | TN | ||
# | ||
# INPUT: | ||
# ------------------------------------------------------------------------------ | ||
# P vector of predictions (1-based, recoded) | ||
# Y vector of actual labels (1-based, recoded) | ||
# ------------------------------------------------------------------------------ | ||
# | ||
# OUTPUT: | ||
# ------------------------------------------------------------------------------ | ||
# confusionSum the confusion matrix as absolute counts | ||
# confusionAvg the confusion matrix as relative frequencies | ||
# ------------------------------------------------------------------------------ | ||
|
||
def m_confusionMatrix(P:matrix<f64>, Y:matrix<f64>) -> matrix<f64>, matrix<f64> { | ||
dim = max(aggMax(Y), aggMax(P)); | ||
|
||
if (ncol(P) > 1 || ncol(Y) > 1) { | ||
stop("confusionMatrix: Invalid input number of cols should be 1 in both P [" + as.si64(ncol(P)) + "] and Y [" + as.si64(ncol(Y)) + "]"); | ||
} | ||
|
||
|
||
if (nrow(P) != nrow(Y)) { | ||
stop("confusionMatrix: The number of rows have to be equal in both P [" + as.si64(nrow(P)) + "] and Y [" + as.si64(nrow(Y)) + "]"); | ||
} | ||
|
||
|
||
if (aggMin(P) < 1 || aggMin(Y) < 1) { | ||
stop("confusionMatrix: All Values in P and Y should be abore or equal to 1, min(P):" + aggMin(P) + " min(Y):" + aggMin(Y)); | ||
} | ||
|
||
confusionSum = ctable(P, Y, dim, dim); | ||
# max to avoid division by 0, in case a colum contain no entries. | ||
confusionAvg = confusionSum / max(1, sum(confusionSum, 1)); | ||
return as.matrix<f64>(confusionSum), as.matrix<f64>(confusionAvg); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#------------------------------------------------------------- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
# Returns Euclidean distance matrix (distances between N n-dimensional points) | ||
# | ||
# INPUT: | ||
# -------------------------------------------------------------------------------- | ||
# X Matrix to calculate the distance inside | ||
# -------------------------------------------------------------------------------- | ||
# | ||
# OUTPUT: | ||
# ----------------------------------------------------------------------------------------------- | ||
# Y Euclidean distance matrix | ||
# ----------------------------------------------------------------------------------------------- | ||
|
||
def m_dist(X:matrix<f64>) -> matrix<f64> { | ||
n = as.si64(nrow(X)); | ||
s = sum(X ^ 2, 0); | ||
Y = sqrt((-2.0) * X @ t(X) + s + t(s)); | ||
Y = replace(Y, nan, 0); | ||
return as.matrix<f64>(Y); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#------------------------------------------------------------- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
# This builtin function compute the weighted and simple accuracy for given predictions | ||
# | ||
# INPUT: | ||
# -------------------------------------------------------------------------------------- | ||
# y Ground truth (Actual Labels) | ||
# yhat Predictions (Predicted labels) | ||
# isWeighted Flag for weighted or non-weighted accuracy calculation | ||
# -------------------------------------------------------------------------------------- | ||
# | ||
# OUTPUT: | ||
# -------------------------------------------------------------------------------------------- | ||
# accuracy accuracy of the predicted labels | ||
# -------------------------------------------------------------------------------------------- | ||
|
||
def m_getAccuracy(y:matrix<f64>, yhat:matrix<f64>, isWeighted:bool /*= false*/) -> f64 { | ||
accuracy = 0.0; | ||
if ((isWeighted == false)) { | ||
sum = sum(y == yhat); | ||
accuracy = (sum / as.si64(nrow(y))) * 100; | ||
} else { | ||
n = as.si64(nrow(y)); | ||
classes = ctable(y, 1, aggMax(y), 1); | ||
resp = fill(as.f64(0), as.si64(nrow(y)), as.si64(nrow(classes))); | ||
resp = as.matrix<f64>(resp + t(seq(as.f64(1), as.si64(nrow(classes)), 1 <= as.si64(nrow(classes)) ? 1 : -1))); | ||
respY = resp == y; | ||
respYhat = resp == yhat; | ||
pred = respY * respYhat; | ||
classes = replace(classes, 0, 1); | ||
accuracy = as.f64(mean(sum(pred, 1) / t(classes)) * 100); | ||
} | ||
|
||
return as.f64(accuracy); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#------------------------------------------------------------- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
# Min-max normalization (a.k.a. min-max scaling) to range [0,1]. For matrices | ||
# of positive values, this normalization preserves the input sparsity. | ||
# | ||
# INPUT: | ||
# --------------------------------------------------------------------------------------- | ||
# X Input feature matrix of shape n-by-m | ||
# --------------------------------------------------------------------------------------- | ||
# | ||
# OUTPUT: | ||
# --------------------------------------------------------------------------------------- | ||
# Y Modified output feature matrix of shape n-by-m | ||
# cmin Column minima of shape 1-by-m | ||
# cmax Column maxima of shape 1-by-m | ||
# --------------------------------------------------------------------------------------- | ||
|
||
import "normalizeApply.daph"; | ||
|
||
def m_normalize(X:matrix<f64>) -> matrix<f64>, matrix<f64>, matrix<f64> { | ||
# compute feature ranges for transformations | ||
cmin = aggMin(X, 1); | ||
cmax = aggMax(X, 1); | ||
|
||
# normalize features to range [0,1] | ||
Y = normalizeApply.m_normalizeApply(X, cmin, cmax); | ||
|
||
return Y, cmin, cmax; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#------------------------------------------------------------- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
# Min-max normalization (a.k.a. min-max scaling) to range [0,1], given | ||
# existing min-max ranges. For matrices of positive values, this normalization | ||
# preserves the input sparsity. The validity of the provided min-max range | ||
# and post-processing is under control of the caller. | ||
# | ||
# INPUT: | ||
# ------------------------------------------------ | ||
# X Input feature matrix of shape n-by-m | ||
# cmin Column min of shape 1-by-m | ||
# cmax Column max of shape 1-by-m | ||
# ------------------------------------------------ | ||
# | ||
# OUTPUT: | ||
# ------------------------------------------------ | ||
# Y Modified output feature matrix of shape n-by-m | ||
# ------------------------------------------------ | ||
|
||
def m_normalizeApply(X:matrix<f64>, cmin:matrix<f64>, cmax:matrix<f64>) -> matrix<f64> { | ||
diff = (cmax - cmin); | ||
# avoid division by zero and divide by 1 instead | ||
diff = replace(diff, 0, 1); | ||
# normalize features to given range ([0,1] if indeed min/max) | ||
Y = (X - cmin) / diff; | ||
return as.matrix<f64>(Y); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
def m_raSelection(X:matrix<f64>, col:si64, op:str, val:f64) -> matrix<f64> { | ||
I = op == "==" ? X[, col - 1] == val : op == "!=" ? X[, col - 1] != val : op == "<" ? X[, col - 1] < val : op == ">" ? X[, col - 1] > val : op == "<=" ? X[, col - 1] <= val : X[, col - 1] >= val; | ||
Y = X[[I, ]]; | ||
return as.matrix<f64>(Y); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#------------------------------------------------------------- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
# This function scales and center individual features in the input | ||
# matrix (column wise.) using z-score to scale the values. | ||
# The transformation is sometimes also called scale and shift, | ||
# but it is shifted first and then subsequently scaled. | ||
# | ||
# The method is not resistant to inputs containing NaN nor overflows | ||
# of doubles, but handle it by guaranteeing that no extra NaN values | ||
# are introduced and columns that contain NaN will not be scaled or shifted. | ||
# | ||
# INPUT: | ||
# -------------------------------------------------------------------------------------- | ||
# X Input feature matrix | ||
# center Indicates to center the feature matrix | ||
# scale Indicates to scale the feature matrix according to z-score | ||
# -------------------------------------------------------------------------------------- | ||
# | ||
# OUTPUT: | ||
# ------------------------------------------------------------------------------------------- | ||
# Out Output feature matrix scaled and shifted | ||
# Centering The column means of the input, subtracted if Center was TRUE | ||
# ScaleFactor The scaling of the values, to make each dimension have similar value ranges | ||
# ------------------------------------------------------------------------------------------- | ||
|
||
def m_scale(X:matrix<f64>, center:bool /*= true*/, scale:bool /*= true*/) -> matrix<f64>, matrix<f64>, matrix<f64> { | ||
# Allocate the Centering and ScaleFactor as empty matrices, | ||
# to return something on the function call. | ||
Centering = fill(as.f64(0), 1, 1); | ||
ScaleFactor = fill(as.f64(0), 1, 1); | ||
|
||
if (center) { | ||
Centering = mean(X, 1); | ||
# Replace entries with Nan with 0 to avoid introducing more NaN values. | ||
Centering = replace(Centering, nan, 0); | ||
X = as.matrix<f64>(X - Centering); | ||
} | ||
|
||
|
||
if (scale) { | ||
N = as.si64(nrow(X)); | ||
ScaleFactor = sqrt(sum(X ^ 2, 1) / (N - 1)); | ||
|
||
# Replace entries in the scale factor that are 0 and NaN with 1. | ||
# To avoid division by 0 or NaN, introducing NaN to the ouput. | ||
ScaleFactor = replace(ScaleFactor, nan, 1); | ||
ScaleFactor = replace(ScaleFactor, 0, 1); | ||
X = as.matrix<f64>(X / ScaleFactor); | ||
} | ||
|
||
# assign output to the returned value. | ||
Out = X; | ||
return as.matrix<f64>(Out), as.matrix<f64>(Centering), as.matrix<f64>(ScaleFactor); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#------------------------------------------------------------- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
# This function scales and center individual features in the input matrix (column wise.) using the input matrices. | ||
# | ||
# INPUT: | ||
# ------------------------------------------------------------------------------------------ | ||
# X Input feature matrix | ||
# Centering The column means to subtract from X (not done if empty) | ||
# ScaleFactor The column scaling to multiply with X (not done if empty) | ||
# ------------------------------------------------------------------------------------------ | ||
# | ||
# OUTPUT: | ||
# ------------------------------------------------------------------------------------ | ||
# Y Output feature matrix with K columns | ||
# ------------------------------------------------------------------------------------ | ||
|
||
def m_scaleApply(X:matrix<f64>, Centering:matrix<f64>, ScaleFactor:matrix<f64>) -> matrix<f64> { | ||
Y = [0.0]; | ||
if (as.si64(nrow(Centering)) > 0 && as.si64(ncol(Centering)) > 0) { | ||
Y = X - Centering; | ||
} else { | ||
Y = as.matrix<f64>(X); | ||
} | ||
|
||
|
||
if (as.si64(nrow(ScaleFactor)) > 0 && as.si64(ncol(ScaleFactor)) > 0) { | ||
Y = as.matrix<f64>(Y / ScaleFactor); | ||
} | ||
|
||
return as.matrix<f64>(Y); | ||
} | ||
|
Oops, something went wrong.