spmallick · lipi17dpatnaik · Nov 7, 2020 · Nov 9, 2020 · Nov 9, 2020 · Nov 9, 2020
diff --git a/SVM_Python_Cpp/CMakeLists.txt b/SVM_Python_Cpp/CMakeLists.txt
@@ -0,0 +1,24 @@
+cmake_minimum_required(VERSION 2.6)
+project(svm_cpp)
+
+
+# The library prefix
+SET(LIB_PREFIX _svm_test)
+
+set(CMAKE_CXX_FLAGS "-g -Wall")
+
+SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${svm_test_SOURCE_DIR}/bin)
+
+ADD_LIBRARY(svm_lib
+        svm.h svm.cpp
+)
+ADD_EXECUTABLE(svm_classification SVM_Classification.cpp)
+ADD_EXECUTABLE(svm_regression SVM_Regression.cpp)
+
+SET_TARGET_PROPERTIES(svm_lib PROPERTIES OUTPUT_NAME ${LIB_PREFIX}_svm_lib)
+SET_TARGET_PROPERTIES(svm_classification PROPERTIES OUTPUT_NAME svm_classification)
+SET_TARGET_PROPERTIES(svm_regression PROPERTIES OUTPUT_NAME svm_regression)
+
+# link the library to the executable
+TARGET_LINK_LIBRARIES(svm_classification svm_lib)
+TARGET_LINK_LIBRARIES(svm_regression svm_lib)
diff --git a/SVM_Python_Cpp/README.md b/SVM_Python_Cpp/README.md
@@ -0,0 +1,28 @@
+# SVM using Python and C++
+
+## Requirements
+
+In order to run the Python scripts, you will need to install `scikit-learn` module using `pip install scikit-learn`
+
+For running the C++ code, make sure you download and place all the files provided in the repository following the same folder structure.
+
+## Compilation Instructions
+
+To run the Python scripts, use:
+
+```
+python SVM_Regression.py
+python SVM_Classification.py
+```
+
+To run the C++ code, use:
+
+```
+mkdir build
+cd build
+cmake ..
+cmake --build . --config Release
+cd ..
+```
+
+The executable files will be created in the folder `bin/`. You can run regression example using `svm_regression` binary and classification example using `svm_classification` binary.
diff --git a/SVM_Python_Cpp/SVM_Classification.cpp b/SVM_Python_Cpp/SVM_Classification.cpp
@@ -0,0 +1,158 @@
+// Classification using SVM
+
+#include "svm.h"
+#include <ctype.h>
+#include <stdlib.h>
+#include <vector>
+#include <iostream>
+using namespace std;
+
+// create data
+vector<vector<double>> generateData(int problemSize, int featureNum) {
+  vector<vector<double>> data; 
+  // our data 
+  for(int i = 0; i < problemSize; i++) { 
+    // create feature vector 
+    vector<double> featureSet; 
+    for(int j = 0; j < featureNum-1; j++) { 
+      int value = 0; 
+      int value_2 = 0; 
+      // to make a gap between both classes 
+      while(abs(value_2 - value) < 40){ 
+        value = rand() % 1000; 
+        value_2 = rand() % 1000; 
+      } 
+      featureSet.push_back(value); 
+      featureSet.push_back(value_2); 
+    } 
+    data.push_back(featureSet); 
+  } 
+  return data; 
+} 
+
+// create labels 
+vector<int> generateLabels(int labelsSize, vector< vector<double>> data) {
+  // create labels vector 
+  vector<int> labels; 
+  for(int i = 0; i < labelsSize; i++) { 
+    if(data[i][0] > data[i][1]) { 
+      labels.push_back(1); 
+    }
+    else { 
+      labels.push_back(-1); 
+    } 
+  } 
+  // introduce noise in the data 
+  for(int i = 0; i < labelsSize; i++) { 
+    // invert label only for a few points 
+    if(rand() % 1000 > 980) { 
+      labels[i] = -1 * labels[i]; // invert the label 
+    } 
+  }
+  return labels; 
+}
+
+// utility function to scale data
+vector<vector<double>> scale_data(vector<vector<double>> data) {
+  //vector<int> minimum, maximum;
+  vector<vector<double>> scaled_data;
+  for(int i = 0; i < data.size(); i++) {
+    vector<double> featureSet;
+    for(int j = 0; j < data[i].size(); j++) {
+      // scale data
+      //double value = 2 * (data[i][j] - minimum[j])/(maximum[j] - minimum[j]) -1;
+      double value = 2 * (data[i][j] - 0)/(999.0 - 0) -1;
+      featureSet.push_back(value);
+    }
+    scaled_data.push_back(featureSet);
+  }
+  return scaled_data;
+}
+
+int main(){
+	// Training and testing data
+	int test_size = 300;
+	int train_size = 700;
+	int featureNum = 2;
+
+	vector<vector<double>> test_data = generateData(test_size, featureNum);
+	vector<int> test_labels = generateLabels(test_size, test_data);
+	vector<vector<double>> train_data = generateData(train_size, featureNum);
+	vector<int> train_labels = generateLabels(train_size, train_data);
+
+	// Scale data
+	//train_data = scale_data(train_data);
+	//test_data = scale_data(test_data);
+
+	// Train model on the dataset
+	struct svm_parameter param; // parameters of svm
+	struct svm_problem prob; // contains the training data in svm_node format
+	// set parameters
+	param.svm_type = C_SVC;
+	param.kernel_type = RBF;
+	param.degree = 3;
+	param.gamma = 0.5;
+	param.coef0 = 0;
+	param.nu = 0.5;
+	param.cache_size = 100;
+	param.eps = 1e-3;
+	param.p = 0.1;
+	param.shrinking = 1;
+	param.probability = 0;
+	param.nr_weight = 0;
+	param.weight_label = NULL;
+	param.weight = NULL;
+	param.C = 10;
+
+	// Number of training examples
+	prob.l = train_size;
+
+	// training dataset in svm_node matrix format
+	svm_node** svm_x_train = (svm_node**)malloc((prob.l) * sizeof(svm_node*));
+
+	// iterate over each sample
+	for (int sample=0; sample < prob.l; sample++){
+	  svm_node* x_space = (svm_node*)malloc((featureNum+1) * sizeof(svm_node));
+	  for (int feature=0; feature < featureNum; feature++){
+		// feature value
+		x_space[feature].value= train_data[sample][feature];
+		// feature index
+		x_space[feature].index = feature+1;
+	  }
+	  // each sample's last feature should be -1 in libSVM
+	  x_space[featureNum].index = -1;
+	  svm_x_train[sample] = x_space;
+	}
+
+	// store training data in prob
+	prob.x = svm_x_train;
+
+	// store labels
+	prob.y = (double *)malloc(prob.l * sizeof(double));
+	for (int sample = 0; sample < prob.l; sample++){
+	  prob.y[sample] = train_labels[sample];
+	}
+
+	// train the model
+	struct svm_model *model;
+	model = svm_train(&prob, &param);
+
+	// Evaluating the trained model on test dataset
+	// svm_predict returns the predicted value in C++
+	int prediction;
+
+	// iterate over each test sample
+	for (int sample=0; sample < test_data.size(); sample++){
+	  svm_node* x_space = (svm_node*)malloc((featureNum+1) * sizeof(svm_node));
+	  for (int feature=0; feature < featureNum; feature++){
+		// feature value
+		x_space[feature].value= test_data[sample][feature];
+		// feature index
+		x_space[feature].index = feature+1;
+	  }
+	  // each sample's last feature should be -1 in libSVM
+	  x_space[featureNum].index = -1;
+	  prediction = svm_predict(model, x_space);
+	  std::cout << "Prediction: " << prediction << ", Groundtruth: " << test_labels[sample] << std::endl;
+	}
+}
diff --git a/SVM_Python_Cpp/SVM_Classification.py b/SVM_Python_Cpp/SVM_Classification.py
@@ -0,0 +1,30 @@
+# Classification using SVM
+
+## Import required modules
+from sklearn.datasets import make_classification
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn import svm
+
+## Creating a sample dataset
+X,Y = make_classification(n_samples=1000,n_features=2,n_informative=1,\
+n_clusters_per_class=1,n_redundant=0)
+
+## Training and testing split
+# dividing data to train (70%) and test (30%)
+X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3)
+
+## Normalize data
+scaler = StandardScaler()
+scaler.fit(X_train)
+X_train = scaler.transform(X_train)
+X_test = scaler.transform(X_test)
+
+## Training the SVC model
+# make a SVC classifier
+clf = svm.SVC()
+# fit the training data using classifier
+clf.fit(X_train, y_train)
+
+## Predicting the trained model on test data
+clf_predictions = clf.predict(X_test)
diff --git a/SVM_Python_Cpp/SVM_Regression.cpp b/SVM_Python_Cpp/SVM_Regression.cpp
@@ -0,0 +1,138 @@
+// Regression using SVM
+
+#include "svm.h"
+#include <ctype.h>
+#include <stdlib.h>
+#include <vector>
+#include <iostream>
+using namespace std;
+
+// generate data for regression task 
+vector<vector<double>> generateData(int problemSize, int featureNum) { 
+  vector<vector<double>> data; 
+  for(int i = 0; i < problemSize; i++) { 
+    vector<double> featureSet; 
+    for(int j = 0; j < featureNum-1; j++) { 
+      int value = rand() % 1000; 
+      int value_2 = rand() % 1000; 
+      featureSet.push_back(value); 
+      featureSet.push_back(value_2); 
+    } 
+    data.push_back(featureSet); 
+  } 
+  return data; 
+} 
+
+// generate labels for the data provided
+vector<int> generateLabels(int labelsSize, vector<vector<double>> data) { 
+  vector<int> labels; 
+  for (int i=0; i < labelsSize; ++i) { 
+    // create labels (average of both values) 
+    labels.push_back((data[i][0] + data[i][1])/2); 
+  } 
+  return labels; 
+}
+
+// utility function to scale data
+vector<vector<double>> scale_data(vector<vector<double>> data) {
+  //vector<int> minimum, maximum;
+  vector<vector<double>> scaled_data;
+  for(int i = 0; i < data.size(); i++) {
+    vector<double> featureSet;
+    for(int j = 0; j < data[i].size(); j++) {
+      // scale data
+      //double value = 2 * (data[i][j] - minimum[j])/(maximum[j] - minimum[j]) -1;
+      double value = 2 * (data[i][j] - 0)/(999.0 - 0) -1;
+      featureSet.push_back(value);
+    }
+    scaled_data.push_back(featureSet);
+  }
+  return scaled_data;
+}
+
+int main(){
+	// Training and testing data
+	int test_size = 300;
+	int featureNum = 2;
+	int train_size = 700;
+
+	vector<vector<double>> test_data = generateData(test_size, featureNum);
+	vector<int> test_labels = generateLabels(test_size, test_data);
+	vector<vector<double>> train_data = generateData(train_size, featureNum);
+	vector<int> train_labels = generateLabels(train_size, train_data);
+
+	// Scale data
+	train_data = scale_data(train_data);
+	test_data = scale_data(test_data);
+
+	// Train model on the dataset
+	struct svm_parameter param; // parameters of svm
+	struct svm_problem prob; // contains the training data in svm_node format
+	// set parameters
+	param.svm_type = EPSILON_SVR;
+	param.kernel_type = RBF;
+	param.gamma = 0.5;
+	param.degree = 3;
+	param.coef0 = 0;
+	param.nu = 0.5;
+	param.C = 10;
+	param.eps = 1e-3;
+	param.p = 0.1;
+	param.shrinking = 1;
+	param.probability = 0;
+	param.nr_weight = 0;
+	param.weight_label = NULL;
+	param.weight = NULL;
+
+	// Number of training examples
+	prob.l = train_size;
+
+	// training dataset in svm_node matrix format
+	svm_node** svm_x_train = (svm_node**)malloc((prob.l) * sizeof(svm_node*));
+
+	// iterate over each sample
+	for (int sample=0; sample < prob.l; sample++){
+	  svm_node* x_space = (svm_node*)malloc((featureNum+1) * sizeof(svm_node));
+	  for (int feature=0; feature < featureNum; feature++){
+		// feature value
+		x_space[feature].value= train_data[sample][feature];
+		// feature index
+		x_space[feature].index = feature+1;
+	  }
+	  // each sample's last feature should be -1 in libSVM
+	  x_space[featureNum].index = -1;
+	  svm_x_train[sample] = x_space;
+	}
+
+	// store training data in prob
+	prob.x = svm_x_train;
+
+	// store labels
+	prob.y = (double *)malloc(prob.l * sizeof(double));
+	for (int sample = 0; sample < prob.l; sample++){
+	  prob.y[sample] = train_labels[sample];
+	}
+
+	// train the model
+	struct svm_model *model;
+	model = svm_train(&prob, &param);
+
+	// Evaluating the trained model on test dataset
+	// svm_predict returns the predicted value in C++
+	int prediction;
+
+	// iterate over each test sample
+	for (int sample=0; sample < test_data.size(); sample++){
+	  svm_node* x_space = (svm_node*)malloc((featureNum+1) * sizeof(svm_node));
+	  for (int feature=0; feature < featureNum; feature++){
+		// feature value
+		x_space[feature].value= train_data[sample][feature];
+		// feature index
+		x_space[feature].index = feature+1;
+	  }
+	  // each sample's last feature should be -1 in libSVM
+	  x_space[featureNum].index = -1;
+	  prediction = svm_predict(model, x_space);
+	  std::cout << "Prediction: " << prediction << ", Groundtruth: " << test_labels[sample] << std::endl;
+	}
+}