diff --git a/Prediction Models/House Selling Prediction/Model.ipynb b/Prediction Models/House Selling Prediction/Model.ipynb new file mode 100644 index 000000000..11d04205c --- /dev/null +++ b/Prediction Models/House Selling Prediction/Model.ipynb @@ -0,0 +1,234 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "house=pd.read_csv(\"https://github.com/YBIFoundation/Dataset/raw/main/Boston.csv\")\n", + "house" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "house.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "house.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "house.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "house.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "y=house[ 'MEDV']\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "X=house[['CRIM', 'ZN', 'INDUS', 'CHAS', 'NX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',\n", + " 'PTRATIO', 'B', 'LSTAT']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train,X_test,y_train,y_test= train_test_split(X,y)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "model= LinearRegression()\n", + "model.fit(X_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "y_pred=model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_absolute_percentage_error\n", + "mean_absolute_percentage_error(y_test,y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "model.intercept_\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "model.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "# Example test values (replace with actual test values you want)\n", + "test_values = pd.DataFrame({\n", + " 'CRIM': [0.1], # Crime rate\n", + " 'ZN': [20], # Proportion of residential land zoned\n", + " 'INDUS': [5.0], # Proportion of non-retail business acres\n", + " 'CHAS': [0], # Charles River dummy variable\n", + " 'NX': [0.5], # Nitric oxide concentration\n", + " 'RM': [6], # Average number of rooms\n", + " 'AGE': [65], # Proportion of owner-occupied units built before 1940\n", + " 'DIS': [4], # Distance to employment centers\n", + " 'RAD': [2], # Index of accessibility to highways\n", + " 'TAX': [300], # Property tax rate\n", + " 'PTRATIO': [15],# Pupil-teacher ratio\n", + " 'B': [395], # Proportion of African Americans\n", + " 'LSTAT': [12] # % lower status of the population\n", + "})\n", + "\n", + "# Use the trained model to make predictions\n", + "y_test_pred = model.predict(test_values)\n", + "print(f\"Predicted House Price: {y_test_pred[0]}\")\n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Prediction Models/House Selling Prediction/README.md b/Prediction Models/House Selling Prediction/README.md new file mode 100644 index 000000000..f276b8ce9 --- /dev/null +++ b/Prediction Models/House Selling Prediction/README.md @@ -0,0 +1,41 @@ +### Boston Housing Price Prediction +This project builds a Linear Regression Model to predict house prices based on features such as crime rate, average number of rooms, distance to employment centers, property tax rate, and more. The model is trained using the Boston Housing Dataset. + +### Table of Contents +- Introduction +- Dataset +- Requirements +- Conclusion + +**Introduction** +The Boston Housing Price Prediction project uses a linear regression model to predict the median value of owner-occupied homes (in $1000s) in various suburbs of Boston. This prediction is based on a set of 13 features related to socioeconomic, geographical, and property-specific factors. + +**Dataset** +The dataset used is the Boston Housing Dataset, which contains 506 samples and 14 attributes. The target variable is MEDV, the median house price. The features include: + +- CRIM: Per capita crime rate by town. +- ZN: Proportion of residential land zoned for large lots. +- INDUS: Proportion of non-retail business acres per town. +- CHAS: Charles River dummy variable (1 if tract bounds river; 0 otherwise). +- NX: Nitric oxides concentration (parts per 10 million). +- RM: Average number of rooms per dwelling. +- AGE: Proportion of owner-occupied units built prior to 1940. +- DIS: Weighted distances to five Boston employment centers. +- RAD: Index of accessibility to radial highways. +- TAX: Full-value property tax rate per $10,000. +- PTRATIO: Pupil-teacher ratio by town. +- B: Proportion of African American population. +- LSTAT: Percentage of lower status of the population. +- MEDV: Median value of owner-occupied homes in $1000s (Target variable). + +**Requirements** +Ensure you have the following libraries installed: + +- pandas +- numpy +- matplotlib +- seaborn +- scikit-learn + +### Conclusion +This model provides a basic linear regression implementation to predict house prices in Boston. While linear regression is useful for many tasks, there are likely more advanced techniques (e.g., decision trees, random forests) that could yield better results for this dataset. Future improvements may include using these models and performing cross-validation. \ No newline at end of file