diff --git a/opensource_analysis/OpenSourceEda.ipynb b/opensource_analysis/OpenSourceEda.ipynb
new file mode 100644
index 0000000..2168c13
--- /dev/null
+++ b/opensource_analysis/OpenSourceEda.ipynb
@@ -0,0 +1,596 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "authorship_tag": "ABX9TyNup9TirO5QiDqQeUkcbri+",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## EDA to Open Source Analysis"
+ ],
+ "metadata": {
+ "id": "c80GbPckrARJ"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "5_o2eklhmybL",
+ "outputId": "f436c24e-407d-4caa-806f-6aeb89c9bd91"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 99 entries, 0 to 98\n",
+ "Columns: 129 entries, Respondent to SurveyEasy\n",
+ "dtypes: float64(42), int64(1), object(86)\n",
+ "memory usage: 99.9+ KB\n",
+ "None\n",
+ " Respondent AssessJob1 AssessJob2 AssessJob3 AssessJob4 AssessJob5 \\\n",
+ "count 99.000000 64.000000 64.000000 64.000000 64.000000 64.000000 \n",
+ "mean 74.252525 5.750000 6.437500 6.312500 4.359375 3.218750 \n",
+ "std 42.168598 2.817181 2.695528 2.641999 2.674792 2.458989 \n",
+ "min 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 \n",
+ "25% 40.000000 4.000000 4.750000 4.000000 2.000000 1.000000 \n",
+ "50% 77.000000 6.000000 7.000000 7.000000 4.000000 2.000000 \n",
+ "75% 111.500000 8.000000 9.000000 8.000000 6.000000 4.000000 \n",
+ "max 143.000000 10.000000 10.000000 10.000000 10.000000 10.000000 \n",
+ "\n",
+ " AssessJob6 AssessJob7 AssessJob8 AssessJob9 ... \\\n",
+ "count 64.00000 64.000000 64.000000 64.000000 ... \n",
+ "mean 5.03125 5.500000 4.343750 7.781250 ... \n",
+ "std 2.41009 2.949307 2.533701 2.446045 ... \n",
+ "min 1.00000 1.000000 1.000000 1.000000 ... \n",
+ "25% 3.00000 3.000000 2.000000 6.750000 ... \n",
+ "50% 5.00000 5.500000 4.000000 9.000000 ... \n",
+ "75% 6.25000 8.000000 6.000000 10.000000 ... \n",
+ "max 10.00000 10.000000 10.000000 10.000000 ... \n",
+ "\n",
+ " JobEmailPriorities7 Salary ConvertedSalary AdsPriorities1 \\\n",
+ "count 42.000000 4.900000e+01 46.000000 56.000000 \n",
+ "mean 4.904762 4.666699e+05 80993.739130 2.803571 \n",
+ "std 1.736404 2.175638e+06 131734.984542 1.891797 \n",
+ "min 1.000000 0.000000e+00 0.000000 1.000000 \n",
+ "25% 3.250000 1.600000e+04 30603.000000 1.000000 \n",
+ "50% 5.000000 7.000000e+04 56313.500000 2.500000 \n",
+ "75% 6.000000 1.200000e+05 84943.000000 4.000000 \n",
+ "max 7.000000 1.520000e+07 900000.000000 7.000000 \n",
+ "\n",
+ " AdsPriorities2 AdsPriorities3 AdsPriorities4 AdsPriorities5 \\\n",
+ "count 56.000000 56.000000 56.000000 56.000000 \n",
+ "mean 4.285714 2.964286 3.928571 4.392857 \n",
+ "std 1.691844 1.628879 1.895997 2.154729 \n",
+ "min 1.000000 1.000000 1.000000 1.000000 \n",
+ "25% 3.000000 2.000000 2.000000 2.000000 \n",
+ "50% 4.000000 2.500000 4.000000 5.000000 \n",
+ "75% 6.000000 4.000000 5.000000 6.000000 \n",
+ "max 7.000000 7.000000 7.000000 7.000000 \n",
+ "\n",
+ " AdsPriorities6 AdsPriorities7 \n",
+ "count 56.000000 56.000000 \n",
+ "mean 4.964286 4.660714 \n",
+ "std 1.953684 1.781489 \n",
+ "min 1.000000 1.000000 \n",
+ "25% 4.000000 3.000000 \n",
+ "50% 5.000000 5.000000 \n",
+ "75% 7.000000 6.000000 \n",
+ "max 7.000000 7.000000 \n",
+ "\n",
+ "[8 rows x 43 columns]\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "# Load the data\n",
+ "df = pd.read_csv('/content/survey_results_sample_2018 (1).csv')\n",
+ "\n",
+ "# Display basic information\n",
+ "print(df.info())\n",
+ "print(df.describe())\n",
+ "\n",
+ "# Handle missing values\n",
+ "df = df.fillna('Not Specified')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "\n",
+ "\n",
+ "# Handle missing values\n",
+ "df = df.fillna('Not Specified')\n",
+ "\n",
+ "# Value counts for categorical columns\n",
+ "categorical_columns = ['Country', 'Employment', 'DevType', 'LanguageWorkedWith'] # Changed 'EmploymentStatus' to 'Employment'\n",
+ "for col in categorical_columns:\n",
+ " print(f\"\\nValue counts for {col}:\")\n",
+ " print(df[col].value_counts(normalize=True).head())"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "XvJHXLDNnTVG",
+ "outputId": "e208b6cc-f42f-42b5-bf1e-5e20a37c6747"
+ },
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "Value counts for Country:\n",
+ "Country\n",
+ "United States 0.282828\n",
+ "India 0.161616\n",
+ "United Kingdom 0.070707\n",
+ "Germany 0.050505\n",
+ "Russian Federation 0.040404\n",
+ "Name: proportion, dtype: float64\n",
+ "\n",
+ "Value counts for Employment:\n",
+ "Employment\n",
+ "Employed full-time 0.919192\n",
+ "Employed part-time 0.080808\n",
+ "Name: proportion, dtype: float64\n",
+ "\n",
+ "Value counts for DevType:\n",
+ "DevType\n",
+ "Full-stack developer 0.080808\n",
+ "Back-end developer;Front-end developer;Full-stack developer 0.060606\n",
+ "Back-end developer;Full-stack developer 0.050505\n",
+ "Mobile developer 0.040404\n",
+ "Back-end developer;DevOps specialist 0.020202\n",
+ "Name: proportion, dtype: float64\n",
+ "\n",
+ "Value counts for LanguageWorkedWith:\n",
+ "LanguageWorkedWith\n",
+ "Not Specified 0.252525\n",
+ "Java;JavaScript;PHP;SQL;TypeScript;HTML;CSS 0.020202\n",
+ "JavaScript;PHP;HTML;CSS 0.020202\n",
+ "JavaScript;Python;HTML;CSS 0.010101\n",
+ "JavaScript;PHP;Python;SQL;TypeScript;HTML;CSS 0.010101\n",
+ "Name: proportion, dtype: float64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "\n",
+ "# Salary analysis\n",
+ "df['ConvertedSalary'] = pd.to_numeric(df['ConvertedSalary'], errors='coerce')\n",
+ "plt.figure(figsize=(10, 6))\n",
+ "sns.histplot(df['ConvertedSalary'].dropna(), kde=True)\n",
+ "plt.title('Distribution of Salaries')\n",
+ "plt.xlabel('Salary (USD)')\n",
+ "plt.show()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 564
+ },
+ "id": "XLBoZV8nnTTO",
+ "outputId": "2a787cab-0305-4f7d-a728-c26d8cc10982"
+ },
+ "execution_count": 4,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "