diff --git a/movierecommendation1.ipynb b/movierecommendation1.ipynb new file mode 100644 index 000000000..60beee520 --- /dev/null +++ b/movierecommendation1.ipynb @@ -0,0 +1,3760 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Movie Recommendation System" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "eRn6QKqOqLJS" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\srija\\AppData\\Local\\Temp\\ipykernel_24728\\1662815981.py:2: DeprecationWarning: \n", + "Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n", + "(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\n", + "but was not found to be installed on your system.\n", + "If this would cause problems for you,\n", + "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n", + " \n", + " import pandas as pd\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 217 + }, + "id": "Zppld355rFRM", + "outputId": "0e0e9934-526b-412c-b471-b21a37187308" + }, + "outputs": [], + "source": [ + "movies = pd.read_csv('tmdb_5000_movies.csv')\n", + "credits = pd.read_csv('tmdb_5000_credits.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data Preprocessing" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "s2aA_IWx703n" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(4803, 4)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "credits.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "kNTnWQqUrFPD" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
budgetgenreshomepageidkeywordsoriginal_languageoriginal_titleoverviewpopularityproduction_companiesproduction_countriesrelease_daterevenueruntimespoken_languagesstatustaglinetitlevote_averagevote_count
0237000000[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...http://www.avatarmovie.com/19995[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...enAvatarIn the 22nd century, a paraplegic Marine is di...150.437577[{\"name\": \"Ingenious Film Partners\", \"id\": 289...[{\"iso_3166_1\": \"US\", \"name\": \"United States o...2009-12-102787965087162.0[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...ReleasedEnter the World of Pandora.Avatar7.211800
\n", + "
" + ], + "text/plain": [ + " budget genres \\\n", + "0 237000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", + "\n", + " homepage id \\\n", + "0 http://www.avatarmovie.com/ 19995 \n", + "\n", + " keywords original_language \\\n", + "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... en \n", + "\n", + " original_title overview \\\n", + "0 Avatar In the 22nd century, a paraplegic Marine is di... \n", + "\n", + " popularity production_companies \\\n", + "0 150.437577 [{\"name\": \"Ingenious Film Partners\", \"id\": 289... \n", + "\n", + " production_countries release_date revenue \\\n", + "0 [{\"iso_3166_1\": \"US\", \"name\": \"United States o... 2009-12-10 2787965087 \n", + "\n", + " runtime spoken_languages status \\\n", + "0 162.0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... Released \n", + "\n", + " tagline title vote_average vote_count \n", + "0 Enter the World of Pandora. Avatar 7.2 11800 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "V2mvZ9UqrFMi" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie_idtitlecastcrew
019995Avatar[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...
\n", + "
" + ], + "text/plain": [ + " movie_id title cast \\\n", + "0 19995 Avatar [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n", + "\n", + " crew \n", + "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "credits.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "31aIi2eIrFJ_" + }, + "outputs": [], + "source": [ + "#merging two dataframes movies and columns on the basis of the title of the movie\n", + "movies = movies.merge(credits, on='title')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "JisS5HPE7WGX" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(4809, 23)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "JNWKKhVQ7WD_" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
budgetgenreshomepageidkeywordsoriginal_languageoriginal_titleoverviewpopularityproduction_companies...runtimespoken_languagesstatustaglinetitlevote_averagevote_countmovie_idcastcrew
0237000000[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...http://www.avatarmovie.com/19995[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...enAvatarIn the 22nd century, a paraplegic Marine is di...150.437577[{\"name\": \"Ingenious Film Partners\", \"id\": 289......162.0[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...ReleasedEnter the World of Pandora.Avatar7.21180019995[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...
\n", + "

1 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " budget genres \\\n", + "0 237000000 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", + "\n", + " homepage id \\\n", + "0 http://www.avatarmovie.com/ 19995 \n", + "\n", + " keywords original_language \\\n", + "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... en \n", + "\n", + " original_title overview \\\n", + "0 Avatar In the 22nd century, a paraplegic Marine is di... \n", + "\n", + " popularity production_companies ... runtime \\\n", + "0 150.437577 [{\"name\": \"Ingenious Film Partners\", \"id\": 289... ... 162.0 \n", + "\n", + " spoken_languages status \\\n", + "0 [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso... Released \n", + "\n", + " tagline title vote_average vote_count movie_id \\\n", + "0 Enter the World of Pandora. Avatar 7.2 11800 19995 \n", + "\n", + " cast \\\n", + "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n", + "\n", + " crew \n", + "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n", + "\n", + "[1 rows x 23 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "72wx-xOw981F" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "original_language\n", + "en 4510\n", + "fr 70\n", + "es 32\n", + "zh 27\n", + "de 27\n", + "hi 19\n", + "ja 16\n", + "it 14\n", + "ko 12\n", + "cn 12\n", + "ru 11\n", + "pt 9\n", + "da 7\n", + "sv 5\n", + "nl 4\n", + "fa 4\n", + "th 3\n", + "he 3\n", + "ta 2\n", + "cs 2\n", + "ro 2\n", + "id 2\n", + "ar 2\n", + "vi 1\n", + "sl 1\n", + "ps 1\n", + "no 1\n", + "ky 1\n", + "hu 1\n", + "pl 1\n", + "af 1\n", + "nb 1\n", + "tr 1\n", + "is 1\n", + "xx 1\n", + "te 1\n", + "el 1\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies['original_language'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "5-CulFck7WBd" + }, + "outputs": [], + "source": [ + "#now we will select which columns or features are important for recommendation\n", + "#budget does not decide the criteria for recommendation\n", + "#genre is a very important feature for recommendation\n", + "#homepage does not decide the criteria for recommendation\n", + "#we will keep the id column for the website part(frontend)\n", + "#keywords are important\n", + "#as we can see 90 percent of the movies are in english so this column does not matter\n", + "#we will keep the title column(remove original_title as it can contain the names in regional language)\n", + "#overview is very important\n", + "#we will not keep popularity\n", + "#we will not keep production_companies\n", + "#we wont keep production_countries\n", + "#we will not keep release dates\n", + "#we will keep cast and crew" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "z336rfNu7V_G" + }, + "outputs": [], + "source": [ + "#columns which we will keep\n", + "#genre\n", + "#id\n", + "#keywords\n", + "#title\n", + "#overview\n", + "#cast\n", + "#crew\n", + "\n", + "movies = movies[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "ihvMynEi7V8m" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie_idtitleoverviewgenreskeywordscastcrew
019995AvatarIn the 22nd century, a paraplegic Marine is di...[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...
1285Pirates of the Caribbean: At World's EndCaptain Barbossa, long believed to be dead, ha...[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...
2206647SpectreA cryptic message from Bond’s past sends him o...[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...
349026The Dark Knight RisesFollowing the death of District Attorney Harve...[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...
449529John CarterJohn Carter is a war-weary, former military ca...[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...
\n", + "
" + ], + "text/plain": [ + " movie_id title \\\n", + "0 19995 Avatar \n", + "1 285 Pirates of the Caribbean: At World's End \n", + "2 206647 Spectre \n", + "3 49026 The Dark Knight Rises \n", + "4 49529 John Carter \n", + "\n", + " overview \\\n", + "0 In the 22nd century, a paraplegic Marine is di... \n", + "1 Captain Barbossa, long believed to be dead, ha... \n", + "2 A cryptic message from Bond’s past sends him o... \n", + "3 Following the death of District Attorney Harve... \n", + "4 John Carter is a war-weary, former military ca... \n", + "\n", + " genres \\\n", + "0 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", + "1 [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"... \n", + "2 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", + "3 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam... \n", + "4 [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam... \n", + "\n", + " keywords \\\n", + "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n", + "1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... \n", + "2 [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name... \n", + "3 [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,... \n", + "4 [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":... \n", + "\n", + " cast \\\n", + "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n", + "1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n", + "2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n", + "3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n", + "4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n", + "\n", + " crew \n", + "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n", + "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n", + "2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n", + "3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n", + "4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "VoxrFaLW_uCh" + }, + "outputs": [], + "source": [ + "#we will create a new dataframe from the above dataframe in the following format\n", + "#movie| title | tags\n", + "#tags columns will contain the merge of overview, genre, keywords, cast, crew\n", + "#we will merge the genre at the back of the overview, some keywords, top three actor's name and directors name" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "y-YYNs1Q_uAh" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "movie_id 0\n", + "title 0\n", + "overview 3\n", + "genres 0\n", + "keywords 0\n", + "cast 0\n", + "crew 0\n", + "dtype: int64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "STVqNT1E_t9q" + }, + "outputs": [], + "source": [ + "movies.dropna(inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "bIeEvKKf_t7E" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.duplicated().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Feature Scaling" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "iXlGgXlk_t4t" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {\"id\": 878, \"name\": \"Science Fiction\"}]'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.iloc[0].genres #the data is in the form of list of dictionaries" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "id": "s5MfnJ_y_t2T" + }, + "outputs": [], + "source": [ + "#1.'[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {\"id\": 878, \"name\": \"Science Fiction\"}]'\n", + "#2.we have to convert the format above in the format below\n", + "#3.['Action', 'Adventure', 'Fantasy', 'SciFi']" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "HmjVtSiCpqM7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': 28, 'name': 'Action'},\n", + " {'id': 12, 'name': 'Adventure'},\n", + " {'id': 14, 'name': 'Fantasy'},\n", + " {'id': 878, 'name': 'Science Fiction'}]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#We can see that in the above code block line 1 is enclosed in string format, so we have to remove the string using a python function called ast\n", + "#this library ast will convert the string into list of dictionary\n", + "import ast\n", + "ast.literal_eval('[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {\"id\": 878, \"name\": \"Science Fiction\"}]')" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "id": "P4liiuW9_tzz" + }, + "outputs": [], + "source": [ + "def convert(obj):\n", + " L = []\n", + " for i in ast.literal_eval(obj):\n", + " L.append(i['name'])\n", + " return L" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "id": "ED6dnUcf_txI" + }, + "outputs": [], + "source": [ + "movies['genres'] = movies['genres'].apply(convert)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "id": "E7ShLbyNqnSg" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie_idtitleoverviewgenreskeywordscastcrew
019995AvatarIn the 22nd century, a paraplegic Marine is di...[Action, Adventure, Fantasy, Science Fiction][{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...
1285Pirates of the Caribbean: At World's EndCaptain Barbossa, long believed to be dead, ha...[Adventure, Fantasy, Action][{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...
2206647SpectreA cryptic message from Bond’s past sends him o...[Action, Adventure, Crime][{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...
349026The Dark Knight RisesFollowing the death of District Attorney Harve...[Action, Crime, Drama, Thriller][{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...
449529John CarterJohn Carter is a war-weary, former military ca...[Action, Adventure, Science Fiction][{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...
\n", + "
" + ], + "text/plain": [ + " movie_id title \\\n", + "0 19995 Avatar \n", + "1 285 Pirates of the Caribbean: At World's End \n", + "2 206647 Spectre \n", + "3 49026 The Dark Knight Rises \n", + "4 49529 John Carter \n", + "\n", + " overview \\\n", + "0 In the 22nd century, a paraplegic Marine is di... \n", + "1 Captain Barbossa, long believed to be dead, ha... \n", + "2 A cryptic message from Bond’s past sends him o... \n", + "3 Following the death of District Attorney Harve... \n", + "4 John Carter is a war-weary, former military ca... \n", + "\n", + " genres \\\n", + "0 [Action, Adventure, Fantasy, Science Fiction] \n", + "1 [Adventure, Fantasy, Action] \n", + "2 [Action, Adventure, Crime] \n", + "3 [Action, Crime, Drama, Thriller] \n", + "4 [Action, Adventure, Science Fiction] \n", + "\n", + " keywords \\\n", + "0 [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":... \n", + "1 [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na... \n", + "2 [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name... \n", + "3 [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,... \n", + "4 [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":... \n", + "\n", + " cast \\\n", + "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n", + "1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n", + "2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n", + "3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n", + "4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n", + "\n", + " crew \n", + "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n", + "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n", + "2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n", + "3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n", + "4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "id": "V7Qjx3wwqnPD" + }, + "outputs": [], + "source": [ + "#now you can see that the genres column is sorted out" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "id": "ovEEy_LAqnNQ" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\": 2964, \"name\": \"future\"}, {\"id\": 3386, \"name\": \"space war\"}, {\"id\": 3388, \"name\": \"space colony\"}, {\"id\": 3679, \"name\": \"society\"}, {\"id\": 3801, \"name\": \"space travel\"}, {\"id\": 9685, \"name\": \"futuristic\"}, {\"id\": 9840, \"name\": \"romance\"}, {\"id\": 9882, \"name\": \"space\"}, {\"id\": 9951, \"name\": \"alien\"}, {\"id\": 10148, \"name\": \"tribe\"}, {\"id\": 10158, \"name\": \"alien planet\"}, {\"id\": 10987, \"name\": \"cgi\"}, {\"id\": 11399, \"name\": \"marine\"}, {\"id\": 13065, \"name\": \"soldier\"}, {\"id\": 14643, \"name\": \"battle\"}, {\"id\": 14720, \"name\": \"love affair\"}, {\"id\": 165431, \"name\": \"anti war\"}, {\"id\": 193554, \"name\": \"power relations\"}, {\"id\": 206690, \"name\": \"mind and soul\"}, {\"id\": 209714, \"name\": \"3d\"}]'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.iloc[0].keywords" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "id": "M9EDFHm2qnKC" + }, + "outputs": [], + "source": [ + "#we will do the same thing on the keywords column" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "mUC7Bd0yqnH9" + }, + "outputs": [], + "source": [ + "movies['keywords'] = movies['keywords'].apply(convert)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "id": "DWQ1WHl6qnEy" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie_idtitleoverviewgenreskeywordscastcrew
019995AvatarIn the 22nd century, a paraplegic Marine is di...[Action, Adventure, Fantasy, Science Fiction][culture clash, future, space war, space colon...[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...
1285Pirates of the Caribbean: At World's EndCaptain Barbossa, long believed to be dead, ha...[Adventure, Fantasy, Action][ocean, drug abuse, exotic island, east india ...[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...
2206647SpectreA cryptic message from Bond’s past sends him o...[Action, Adventure, Crime][spy, based on novel, secret agent, sequel, mi...[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...
349026The Dark Knight RisesFollowing the death of District Attorney Harve...[Action, Crime, Drama, Thriller][dc comics, crime fighter, terrorist, secret i...[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...
449529John CarterJohn Carter is a war-weary, former military ca...[Action, Adventure, Science Fiction][based on novel, mars, medallion, space travel...[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...
\n", + "
" + ], + "text/plain": [ + " movie_id title \\\n", + "0 19995 Avatar \n", + "1 285 Pirates of the Caribbean: At World's End \n", + "2 206647 Spectre \n", + "3 49026 The Dark Knight Rises \n", + "4 49529 John Carter \n", + "\n", + " overview \\\n", + "0 In the 22nd century, a paraplegic Marine is di... \n", + "1 Captain Barbossa, long believed to be dead, ha... \n", + "2 A cryptic message from Bond’s past sends him o... \n", + "3 Following the death of District Attorney Harve... \n", + "4 John Carter is a war-weary, former military ca... \n", + "\n", + " genres \\\n", + "0 [Action, Adventure, Fantasy, Science Fiction] \n", + "1 [Adventure, Fantasy, Action] \n", + "2 [Action, Adventure, Crime] \n", + "3 [Action, Crime, Drama, Thriller] \n", + "4 [Action, Adventure, Science Fiction] \n", + "\n", + " keywords \\\n", + "0 [culture clash, future, space war, space colon... \n", + "1 [ocean, drug abuse, exotic island, east india ... \n", + "2 [spy, based on novel, secret agent, sequel, mi... \n", + "3 [dc comics, crime fighter, terrorist, secret i... \n", + "4 [based on novel, mars, medallion, space travel... \n", + "\n", + " cast \\\n", + "0 [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"... \n", + "1 [{\"cast_id\": 4, \"character\": \"Captain Jack Spa... \n", + "2 [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr... \n", + "3 [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba... \n", + "4 [{\"cast_id\": 5, \"character\": \"John Carter\", \"c... \n", + "\n", + " crew \n", + "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n", + "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n", + "2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n", + "3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n", + "4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "id": "WsWPLZo-qnC2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"credit_id\": \"5602a8a7c3a3685532001c9a\", \"gender\": 2, \"id\": 65731, \"name\": \"Sam Worthington\", \"order\": 0}, {\"cast_id\": 3, \"character\": \"Neytiri\", \"credit_id\": \"52fe48009251416c750ac9cb\", \"gender\": 1, \"id\": 8691, \"name\": \"Zoe Saldana\", \"order\": 1}, {\"cast_id\": 25, \"character\": \"Dr. Grace Augustine\", \"credit_id\": \"52fe48009251416c750aca39\", \"gender\": 1, \"id\": 10205, \"name\": \"Sigourney Weaver\", \"order\": 2}, {\"cast_id\": 4, \"character\": \"Col. Quaritch\", \"credit_id\": \"52fe48009251416c750ac9cf\", \"gender\": 2, \"id\": 32747, \"name\": \"Stephen Lang\", \"order\": 3}, {\"cast_id\": 5, \"character\": \"Trudy Chacon\", \"credit_id\": \"52fe48009251416c750ac9d3\", \"gender\": 1, \"id\": 17647, \"name\": \"Michelle Rodriguez\", \"order\": 4}, {\"cast_id\": 8, \"character\": \"Selfridge\", \"credit_id\": \"52fe48009251416c750ac9e1\", \"gender\": 2, \"id\": 1771, \"name\": \"Giovanni Ribisi\", \"order\": 5}, {\"cast_id\": 7, \"character\": \"Norm Spellman\", \"credit_id\": \"52fe48009251416c750ac9dd\", \"gender\": 2, \"id\": 59231, \"name\": \"Joel David Moore\", \"order\": 6}, {\"cast_id\": 9, \"character\": \"Moat\", \"credit_id\": \"52fe48009251416c750ac9e5\", \"gender\": 1, \"id\": 30485, \"name\": \"CCH Pounder\", \"order\": 7}, {\"cast_id\": 11, \"character\": \"Eytukan\", \"credit_id\": \"52fe48009251416c750ac9ed\", \"gender\": 2, \"id\": 15853, \"name\": \"Wes Studi\", \"order\": 8}, {\"cast_id\": 10, \"character\": \"Tsu\\'Tey\", \"credit_id\": \"52fe48009251416c750ac9e9\", \"gender\": 2, \"id\": 10964, \"name\": \"Laz Alonso\", \"order\": 9}, {\"cast_id\": 12, \"character\": \"Dr. Max Patel\", \"credit_id\": \"52fe48009251416c750ac9f1\", \"gender\": 2, \"id\": 95697, \"name\": \"Dileep Rao\", \"order\": 10}, {\"cast_id\": 13, \"character\": \"Lyle Wainfleet\", \"credit_id\": \"52fe48009251416c750ac9f5\", \"gender\": 2, \"id\": 98215, \"name\": \"Matt Gerald\", \"order\": 11}, {\"cast_id\": 32, \"character\": \"Private Fike\", \"credit_id\": \"52fe48009251416c750aca5b\", \"gender\": 2, \"id\": 154153, \"name\": \"Sean Anthony Moran\", \"order\": 12}, {\"cast_id\": 33, \"character\": \"Cryo Vault Med Tech\", \"credit_id\": \"52fe48009251416c750aca5f\", \"gender\": 2, \"id\": 397312, \"name\": \"Jason Whyte\", \"order\": 13}, {\"cast_id\": 34, \"character\": \"Venture Star Crew Chief\", \"credit_id\": \"52fe48009251416c750aca63\", \"gender\": 2, \"id\": 42317, \"name\": \"Scott Lawrence\", \"order\": 14}, {\"cast_id\": 35, \"character\": \"Lock Up Trooper\", \"credit_id\": \"52fe48009251416c750aca67\", \"gender\": 2, \"id\": 986734, \"name\": \"Kelly Kilgour\", \"order\": 15}, {\"cast_id\": 36, \"character\": \"Shuttle Pilot\", \"credit_id\": \"52fe48009251416c750aca6b\", \"gender\": 0, \"id\": 1207227, \"name\": \"James Patrick Pitt\", \"order\": 16}, {\"cast_id\": 37, \"character\": \"Shuttle Co-Pilot\", \"credit_id\": \"52fe48009251416c750aca6f\", \"gender\": 0, \"id\": 1180936, \"name\": \"Sean Patrick Murphy\", \"order\": 17}, {\"cast_id\": 38, \"character\": \"Shuttle Crew Chief\", \"credit_id\": \"52fe48009251416c750aca73\", \"gender\": 2, \"id\": 1019578, \"name\": \"Peter Dillon\", \"order\": 18}, {\"cast_id\": 39, \"character\": \"Tractor Operator / Troupe\", \"credit_id\": \"52fe48009251416c750aca77\", \"gender\": 0, \"id\": 91443, \"name\": \"Kevin Dorman\", \"order\": 19}, {\"cast_id\": 40, \"character\": \"Dragon Gunship Pilot\", \"credit_id\": \"52fe48009251416c750aca7b\", \"gender\": 2, \"id\": 173391, \"name\": \"Kelson Henderson\", \"order\": 20}, {\"cast_id\": 41, \"character\": \"Dragon Gunship Gunner\", \"credit_id\": \"52fe48009251416c750aca7f\", \"gender\": 0, \"id\": 1207236, \"name\": \"David Van Horn\", \"order\": 21}, {\"cast_id\": 42, \"character\": \"Dragon Gunship Navigator\", \"credit_id\": \"52fe48009251416c750aca83\", \"gender\": 0, \"id\": 215913, \"name\": \"Jacob Tomuri\", \"order\": 22}, {\"cast_id\": 43, \"character\": \"Suit #1\", \"credit_id\": \"52fe48009251416c750aca87\", \"gender\": 0, \"id\": 143206, \"name\": \"Michael Blain-Rozgay\", \"order\": 23}, {\"cast_id\": 44, \"character\": \"Suit #2\", \"credit_id\": \"52fe48009251416c750aca8b\", \"gender\": 2, \"id\": 169676, \"name\": \"Jon Curry\", \"order\": 24}, {\"cast_id\": 46, \"character\": \"Ambient Room Tech\", \"credit_id\": \"52fe48009251416c750aca8f\", \"gender\": 0, \"id\": 1048610, \"name\": \"Luke Hawker\", \"order\": 25}, {\"cast_id\": 47, \"character\": \"Ambient Room Tech / Troupe\", \"credit_id\": \"52fe48009251416c750aca93\", \"gender\": 0, \"id\": 42288, \"name\": \"Woody Schultz\", \"order\": 26}, {\"cast_id\": 48, \"character\": \"Horse Clan Leader\", \"credit_id\": \"52fe48009251416c750aca97\", \"gender\": 2, \"id\": 68278, \"name\": \"Peter Mensah\", \"order\": 27}, {\"cast_id\": 49, \"character\": \"Link Room Tech\", \"credit_id\": \"52fe48009251416c750aca9b\", \"gender\": 0, \"id\": 1207247, \"name\": \"Sonia Yee\", \"order\": 28}, {\"cast_id\": 50, \"character\": \"Basketball Avatar / Troupe\", \"credit_id\": \"52fe48009251416c750aca9f\", \"gender\": 1, \"id\": 1207248, \"name\": \"Jahnel Curfman\", \"order\": 29}, {\"cast_id\": 51, \"character\": \"Basketball Avatar\", \"credit_id\": \"52fe48009251416c750acaa3\", \"gender\": 0, \"id\": 89714, \"name\": \"Ilram Choi\", \"order\": 30}, {\"cast_id\": 52, \"character\": \"Na\\'vi Child\", \"credit_id\": \"52fe48009251416c750acaa7\", \"gender\": 0, \"id\": 1207249, \"name\": \"Kyla Warren\", \"order\": 31}, {\"cast_id\": 53, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acaab\", \"gender\": 0, \"id\": 1207250, \"name\": \"Lisa Roumain\", \"order\": 32}, {\"cast_id\": 54, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acaaf\", \"gender\": 1, \"id\": 83105, \"name\": \"Debra Wilson\", \"order\": 33}, {\"cast_id\": 57, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acabb\", \"gender\": 0, \"id\": 1207253, \"name\": \"Chris Mala\", \"order\": 34}, {\"cast_id\": 55, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acab3\", \"gender\": 0, \"id\": 1207251, \"name\": \"Taylor Kibby\", \"order\": 35}, {\"cast_id\": 56, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acab7\", \"gender\": 0, \"id\": 1207252, \"name\": \"Jodie Landau\", \"order\": 36}, {\"cast_id\": 58, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acabf\", \"gender\": 0, \"id\": 1207254, \"name\": \"Julie Lamm\", \"order\": 37}, {\"cast_id\": 59, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acac3\", \"gender\": 0, \"id\": 1207257, \"name\": \"Cullen B. Madden\", \"order\": 38}, {\"cast_id\": 60, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acac7\", \"gender\": 0, \"id\": 1207259, \"name\": \"Joseph Brady Madden\", \"order\": 39}, {\"cast_id\": 61, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acacb\", \"gender\": 0, \"id\": 1207262, \"name\": \"Frankie Torres\", \"order\": 40}, {\"cast_id\": 62, \"character\": \"Troupe\", \"credit_id\": \"52fe48009251416c750acacf\", \"gender\": 1, \"id\": 1158600, \"name\": \"Austin Wilson\", \"order\": 41}, {\"cast_id\": 63, \"character\": \"Troupe\", \"credit_id\": \"52fe48019251416c750acad3\", \"gender\": 1, \"id\": 983705, \"name\": \"Sara Wilson\", \"order\": 42}, {\"cast_id\": 64, \"character\": \"Troupe\", \"credit_id\": \"52fe48019251416c750acad7\", \"gender\": 0, \"id\": 1207263, \"name\": \"Tamica Washington-Miller\", \"order\": 43}, {\"cast_id\": 65, \"character\": \"Op Center Staff\", \"credit_id\": \"52fe48019251416c750acadb\", \"gender\": 1, \"id\": 1145098, \"name\": \"Lucy Briant\", \"order\": 44}, {\"cast_id\": 66, \"character\": \"Op Center Staff\", \"credit_id\": \"52fe48019251416c750acadf\", \"gender\": 2, \"id\": 33305, \"name\": \"Nathan Meister\", \"order\": 45}, {\"cast_id\": 67, \"character\": \"Op Center Staff\", \"credit_id\": \"52fe48019251416c750acae3\", \"gender\": 0, \"id\": 1207264, \"name\": \"Gerry Blair\", \"order\": 46}, {\"cast_id\": 68, \"character\": \"Op Center Staff\", \"credit_id\": \"52fe48019251416c750acae7\", \"gender\": 2, \"id\": 33311, \"name\": \"Matthew Chamberlain\", \"order\": 47}, {\"cast_id\": 69, \"character\": \"Op Center Staff\", \"credit_id\": \"52fe48019251416c750acaeb\", \"gender\": 0, \"id\": 1207265, \"name\": \"Paul Yates\", \"order\": 48}, {\"cast_id\": 70, \"character\": \"Op Center Duty Officer\", \"credit_id\": \"52fe48019251416c750acaef\", \"gender\": 0, \"id\": 1207266, \"name\": \"Wray Wilson\", \"order\": 49}, {\"cast_id\": 71, \"character\": \"Op Center Staff\", \"credit_id\": \"52fe48019251416c750acaf3\", \"gender\": 2, \"id\": 54492, \"name\": \"James Gaylyn\", \"order\": 50}, {\"cast_id\": 72, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acaf7\", \"gender\": 0, \"id\": 1207267, \"name\": \"Melvin Leno Clark III\", \"order\": 51}, {\"cast_id\": 73, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acafb\", \"gender\": 0, \"id\": 1207268, \"name\": \"Carvon Futrell\", \"order\": 52}, {\"cast_id\": 74, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acaff\", \"gender\": 0, \"id\": 1207269, \"name\": \"Brandon Jelkes\", \"order\": 53}, {\"cast_id\": 75, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acb03\", \"gender\": 0, \"id\": 1207270, \"name\": \"Micah Moch\", \"order\": 54}, {\"cast_id\": 76, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acb07\", \"gender\": 0, \"id\": 1207271, \"name\": \"Hanniyah Muhammad\", \"order\": 55}, {\"cast_id\": 77, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acb0b\", \"gender\": 0, \"id\": 1207272, \"name\": \"Christopher Nolen\", \"order\": 56}, {\"cast_id\": 78, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acb0f\", \"gender\": 0, \"id\": 1207273, \"name\": \"Christa Oliver\", \"order\": 57}, {\"cast_id\": 79, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acb13\", \"gender\": 0, \"id\": 1207274, \"name\": \"April Marie Thomas\", \"order\": 58}, {\"cast_id\": 80, \"character\": \"Dancer\", \"credit_id\": \"52fe48019251416c750acb17\", \"gender\": 0, \"id\": 1207275, \"name\": \"Bravita A. Threatt\", \"order\": 59}, {\"cast_id\": 81, \"character\": \"Mining Chief (uncredited)\", \"credit_id\": \"52fe48019251416c750acb1b\", \"gender\": 0, \"id\": 1207276, \"name\": \"Colin Bleasdale\", \"order\": 60}, {\"cast_id\": 82, \"character\": \"Veteran Miner (uncredited)\", \"credit_id\": \"52fe48019251416c750acb1f\", \"gender\": 0, \"id\": 107969, \"name\": \"Mike Bodnar\", \"order\": 61}, {\"cast_id\": 83, \"character\": \"Richard (uncredited)\", \"credit_id\": \"52fe48019251416c750acb23\", \"gender\": 0, \"id\": 1207278, \"name\": \"Matt Clayton\", \"order\": 62}, {\"cast_id\": 84, \"character\": \"Nav\\'i (uncredited)\", \"credit_id\": \"52fe48019251416c750acb27\", \"gender\": 1, \"id\": 147898, \"name\": \"Nicole Dionne\", \"order\": 63}, {\"cast_id\": 85, \"character\": \"Trooper (uncredited)\", \"credit_id\": \"52fe48019251416c750acb2b\", \"gender\": 0, \"id\": 1207280, \"name\": \"Jamie Harrison\", \"order\": 64}, {\"cast_id\": 86, \"character\": \"Trooper (uncredited)\", \"credit_id\": \"52fe48019251416c750acb2f\", \"gender\": 0, \"id\": 1207281, \"name\": \"Allan Henry\", \"order\": 65}, {\"cast_id\": 87, \"character\": \"Ground Technician (uncredited)\", \"credit_id\": \"52fe48019251416c750acb33\", \"gender\": 2, \"id\": 1207282, \"name\": \"Anthony Ingruber\", \"order\": 66}, {\"cast_id\": 88, \"character\": \"Flight Crew Mechanic (uncredited)\", \"credit_id\": \"52fe48019251416c750acb37\", \"gender\": 0, \"id\": 1207283, \"name\": \"Ashley Jeffery\", \"order\": 67}, {\"cast_id\": 14, \"character\": \"Samson Pilot\", \"credit_id\": \"52fe48009251416c750ac9f9\", \"gender\": 0, \"id\": 98216, \"name\": \"Dean Knowsley\", \"order\": 68}, {\"cast_id\": 89, \"character\": \"Trooper (uncredited)\", \"credit_id\": \"52fe48019251416c750acb3b\", \"gender\": 0, \"id\": 1201399, \"name\": \"Joseph Mika-Hunt\", \"order\": 69}, {\"cast_id\": 90, \"character\": \"Banshee (uncredited)\", \"credit_id\": \"52fe48019251416c750acb3f\", \"gender\": 0, \"id\": 236696, \"name\": \"Terry Notary\", \"order\": 70}, {\"cast_id\": 91, \"character\": \"Soldier (uncredited)\", \"credit_id\": \"52fe48019251416c750acb43\", \"gender\": 0, \"id\": 1207287, \"name\": \"Kai Pantano\", \"order\": 71}, {\"cast_id\": 92, \"character\": \"Blast Technician (uncredited)\", \"credit_id\": \"52fe48019251416c750acb47\", \"gender\": 0, \"id\": 1207288, \"name\": \"Logan Pithyou\", \"order\": 72}, {\"cast_id\": 93, \"character\": \"Vindum Raah (uncredited)\", \"credit_id\": \"52fe48019251416c750acb4b\", \"gender\": 0, \"id\": 1207289, \"name\": \"Stuart Pollock\", \"order\": 73}, {\"cast_id\": 94, \"character\": \"Hero (uncredited)\", \"credit_id\": \"52fe48019251416c750acb4f\", \"gender\": 0, \"id\": 584868, \"name\": \"Raja\", \"order\": 74}, {\"cast_id\": 95, \"character\": \"Ops Centreworker (uncredited)\", \"credit_id\": \"52fe48019251416c750acb53\", \"gender\": 0, \"id\": 1207290, \"name\": \"Gareth Ruck\", \"order\": 75}, {\"cast_id\": 96, \"character\": \"Engineer (uncredited)\", \"credit_id\": \"52fe48019251416c750acb57\", \"gender\": 0, \"id\": 1062463, \"name\": \"Rhian Sheehan\", \"order\": 76}, {\"cast_id\": 97, \"character\": \"Col. Quaritch\\'s Mech Suit (uncredited)\", \"credit_id\": \"52fe48019251416c750acb5b\", \"gender\": 0, \"id\": 60656, \"name\": \"T. J. Storm\", \"order\": 77}, {\"cast_id\": 98, \"character\": \"Female Marine (uncredited)\", \"credit_id\": \"52fe48019251416c750acb5f\", \"gender\": 0, \"id\": 1207291, \"name\": \"Jodie Taylor\", \"order\": 78}, {\"cast_id\": 99, \"character\": \"Ikran Clan Leader (uncredited)\", \"credit_id\": \"52fe48019251416c750acb63\", \"gender\": 1, \"id\": 1186027, \"name\": \"Alicia Vela-Bailey\", \"order\": 79}, {\"cast_id\": 100, \"character\": \"Geologist (uncredited)\", \"credit_id\": \"52fe48019251416c750acb67\", \"gender\": 0, \"id\": 1207292, \"name\": \"Richard Whiteside\", \"order\": 80}, {\"cast_id\": 101, \"character\": \"Na\\'vi (uncredited)\", \"credit_id\": \"52fe48019251416c750acb6b\", \"gender\": 0, \"id\": 103259, \"name\": \"Nikie Zambo\", \"order\": 81}, {\"cast_id\": 102, \"character\": \"Ambient Room Tech / Troupe\", \"credit_id\": \"52fe48019251416c750acb6f\", \"gender\": 1, \"id\": 42286, \"name\": \"Julene Renee\", \"order\": 82}]'" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.iloc[0].cast" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "id": "8dljD8hSqm_S" + }, + "outputs": [], + "source": [ + "#we need to take out the first 3 actor names" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "id": "KjOHxrBCqm9M" + }, + "outputs": [], + "source": [ + "def convert3(obj):\n", + " L = []\n", + " counter = 0\n", + " for i in ast.literal_eval(obj):\n", + " if counter!=3:\n", + " L.append(i['name'])\n", + " counter+=1\n", + " else:\n", + " break\n", + " return L\n", + "#first the value of counter will be 1, then the value of name will be appended, then the counter value will be 2 and so on till 3." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "id": "St_C0zsNqm6p" + }, + "outputs": [], + "source": [ + "movies['cast'] = movies['cast'].apply(convert3)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "id": "4ueJccknqm5C" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie_idtitleoverviewgenreskeywordscastcrew
019995AvatarIn the 22nd century, a paraplegic Marine is di...[Action, Adventure, Fantasy, Science Fiction][culture clash, future, space war, space colon...[Sam Worthington, Zoe Saldana, Sigourney Weaver][{\"credit_id\": \"52fe48009251416c750aca23\", \"de...
1285Pirates of the Caribbean: At World's EndCaptain Barbossa, long believed to be dead, ha...[Adventure, Fantasy, Action][ocean, drug abuse, exotic island, east india ...[Johnny Depp, Orlando Bloom, Keira Knightley][{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...
2206647SpectreA cryptic message from Bond’s past sends him o...[Action, Adventure, Crime][spy, based on novel, secret agent, sequel, mi...[Daniel Craig, Christoph Waltz, Léa Seydoux][{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...
349026The Dark Knight RisesFollowing the death of District Attorney Harve...[Action, Crime, Drama, Thriller][dc comics, crime fighter, terrorist, secret i...[Christian Bale, Michael Caine, Gary Oldman][{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...
449529John CarterJohn Carter is a war-weary, former military ca...[Action, Adventure, Science Fiction][based on novel, mars, medallion, space travel...[Taylor Kitsch, Lynn Collins, Samantha Morton][{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...
\n", + "
" + ], + "text/plain": [ + " movie_id title \\\n", + "0 19995 Avatar \n", + "1 285 Pirates of the Caribbean: At World's End \n", + "2 206647 Spectre \n", + "3 49026 The Dark Knight Rises \n", + "4 49529 John Carter \n", + "\n", + " overview \\\n", + "0 In the 22nd century, a paraplegic Marine is di... \n", + "1 Captain Barbossa, long believed to be dead, ha... \n", + "2 A cryptic message from Bond’s past sends him o... \n", + "3 Following the death of District Attorney Harve... \n", + "4 John Carter is a war-weary, former military ca... \n", + "\n", + " genres \\\n", + "0 [Action, Adventure, Fantasy, Science Fiction] \n", + "1 [Adventure, Fantasy, Action] \n", + "2 [Action, Adventure, Crime] \n", + "3 [Action, Crime, Drama, Thriller] \n", + "4 [Action, Adventure, Science Fiction] \n", + "\n", + " keywords \\\n", + "0 [culture clash, future, space war, space colon... \n", + "1 [ocean, drug abuse, exotic island, east india ... \n", + "2 [spy, based on novel, secret agent, sequel, mi... \n", + "3 [dc comics, crime fighter, terrorist, secret i... \n", + "4 [based on novel, mars, medallion, space travel... \n", + "\n", + " cast \\\n", + "0 [Sam Worthington, Zoe Saldana, Sigourney Weaver] \n", + "1 [Johnny Depp, Orlando Bloom, Keira Knightley] \n", + "2 [Daniel Craig, Christoph Waltz, Léa Seydoux] \n", + "3 [Christian Bale, Michael Caine, Gary Oldman] \n", + "4 [Taylor Kitsch, Lynn Collins, Samantha Morton] \n", + "\n", + " crew \n", + "0 [{\"credit_id\": \"52fe48009251416c750aca23\", \"de... \n", + "1 [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de... \n", + "2 [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de... \n", + "3 [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de... \n", + "4 [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de... " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "id": "i5Y6uqVZqm3G" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'[{\"credit_id\": \"52fe48009251416c750aca23\", \"department\": \"Editing\", \"gender\": 0, \"id\": 1721, \"job\": \"Editor\", \"name\": \"Stephen E. Rivkin\"}, {\"credit_id\": \"539c47ecc3a36810e3001f87\", \"department\": \"Art\", \"gender\": 2, \"id\": 496, \"job\": \"Production Design\", \"name\": \"Rick Carter\"}, {\"credit_id\": \"54491c89c3a3680fb4001cf7\", \"department\": \"Sound\", \"gender\": 0, \"id\": 900, \"job\": \"Sound Designer\", \"name\": \"Christopher Boyes\"}, {\"credit_id\": \"54491cb70e0a267480001bd0\", \"department\": \"Sound\", \"gender\": 0, \"id\": 900, \"job\": \"Supervising Sound Editor\", \"name\": \"Christopher Boyes\"}, {\"credit_id\": \"539c4a4cc3a36810c9002101\", \"department\": \"Production\", \"gender\": 1, \"id\": 1262, \"job\": \"Casting\", \"name\": \"Mali Finn\"}, {\"credit_id\": \"5544ee3b925141499f0008fc\", \"department\": \"Sound\", \"gender\": 2, \"id\": 1729, \"job\": \"Original Music Composer\", \"name\": \"James Horner\"}, {\"credit_id\": \"52fe48009251416c750ac9c3\", \"department\": \"Directing\", \"gender\": 2, \"id\": 2710, \"job\": \"Director\", \"name\": \"James Cameron\"}, {\"credit_id\": \"52fe48009251416c750ac9d9\", \"department\": \"Writing\", \"gender\": 2, \"id\": 2710, \"job\": \"Writer\", \"name\": \"James Cameron\"}, {\"credit_id\": \"52fe48009251416c750aca17\", \"department\": \"Editing\", \"gender\": 2, \"id\": 2710, \"job\": \"Editor\", \"name\": \"James Cameron\"}, {\"credit_id\": \"52fe48009251416c750aca29\", \"department\": \"Production\", \"gender\": 2, \"id\": 2710, \"job\": \"Producer\", \"name\": \"James Cameron\"}, {\"credit_id\": \"52fe48009251416c750aca3f\", \"department\": \"Writing\", \"gender\": 2, \"id\": 2710, \"job\": \"Screenplay\", \"name\": \"James Cameron\"}, {\"credit_id\": \"539c4987c3a36810ba0021a4\", \"department\": \"Art\", \"gender\": 2, \"id\": 7236, \"job\": \"Art Direction\", \"name\": \"Andrew Menzies\"}, {\"credit_id\": \"549598c3c3a3686ae9004383\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 6690, \"job\": \"Visual Effects Producer\", \"name\": \"Jill Brooks\"}, {\"credit_id\": \"52fe48009251416c750aca4b\", \"department\": \"Production\", \"gender\": 1, \"id\": 6347, \"job\": \"Casting\", \"name\": \"Margery Simkin\"}, {\"credit_id\": \"570b6f419251417da70032fe\", \"department\": \"Art\", \"gender\": 2, \"id\": 6878, \"job\": \"Supervising Art Director\", \"name\": \"Kevin Ishioka\"}, {\"credit_id\": \"5495a0fac3a3686ae9004468\", \"department\": \"Sound\", \"gender\": 0, \"id\": 6883, \"job\": \"Music Editor\", \"name\": \"Dick Bernstein\"}, {\"credit_id\": \"54959706c3a3686af3003e81\", \"department\": \"Sound\", \"gender\": 0, \"id\": 8159, \"job\": \"Sound Effects Editor\", \"name\": \"Shannon Mills\"}, {\"credit_id\": \"54491d58c3a3680fb1001ccb\", \"department\": \"Sound\", \"gender\": 0, \"id\": 8160, \"job\": \"Foley\", \"name\": \"Dennie Thorpe\"}, {\"credit_id\": \"54491d6cc3a3680fa5001b2c\", \"department\": \"Sound\", \"gender\": 0, \"id\": 8163, \"job\": \"Foley\", \"name\": \"Jana Vance\"}, {\"credit_id\": \"52fe48009251416c750aca57\", \"department\": \"Costume & Make-Up\", \"gender\": 1, \"id\": 8527, \"job\": \"Costume Design\", \"name\": \"Deborah Lynn Scott\"}, {\"credit_id\": \"52fe48009251416c750aca2f\", \"department\": \"Production\", \"gender\": 2, \"id\": 8529, \"job\": \"Producer\", \"name\": \"Jon Landau\"}, {\"credit_id\": \"539c4937c3a36810ba002194\", \"department\": \"Art\", \"gender\": 0, \"id\": 9618, \"job\": \"Art Direction\", \"name\": \"Sean Haworth\"}, {\"credit_id\": \"539c49b6c3a36810c10020e6\", \"department\": \"Art\", \"gender\": 1, \"id\": 12653, \"job\": \"Set Decoration\", \"name\": \"Kim Sinclair\"}, {\"credit_id\": \"570b6f2f9251413a0e00020d\", \"department\": \"Art\", \"gender\": 1, \"id\": 12653, \"job\": \"Supervising Art Director\", \"name\": \"Kim Sinclair\"}, {\"credit_id\": \"54491a6c0e0a26748c001b19\", \"department\": \"Art\", \"gender\": 2, \"id\": 14350, \"job\": \"Set Designer\", \"name\": \"Richard F. Mays\"}, {\"credit_id\": \"56928cf4c3a3684cff0025c4\", \"department\": \"Production\", \"gender\": 1, \"id\": 20294, \"job\": \"Executive Producer\", \"name\": \"Laeta Kalogridis\"}, {\"credit_id\": \"52fe48009251416c750aca51\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 17675, \"job\": \"Costume Design\", \"name\": \"Mayes C. Rubeo\"}, {\"credit_id\": \"52fe48009251416c750aca11\", \"department\": \"Camera\", \"gender\": 2, \"id\": 18265, \"job\": \"Director of Photography\", \"name\": \"Mauro Fiore\"}, {\"credit_id\": \"5449194d0e0a26748f001b39\", \"department\": \"Art\", \"gender\": 0, \"id\": 42281, \"job\": \"Set Designer\", \"name\": \"Scott Herbertson\"}, {\"credit_id\": \"52fe48009251416c750aca05\", \"department\": \"Crew\", \"gender\": 0, \"id\": 42288, \"job\": \"Stunts\", \"name\": \"Woody Schultz\"}, {\"credit_id\": \"5592aefb92514152de0010f5\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 29067, \"job\": \"Makeup Artist\", \"name\": \"Linda DeVetta\"}, {\"credit_id\": \"5592afa492514152de00112c\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 29067, \"job\": \"Hairstylist\", \"name\": \"Linda DeVetta\"}, {\"credit_id\": \"54959ed592514130fc002e5d\", \"department\": \"Camera\", \"gender\": 2, \"id\": 33302, \"job\": \"Camera Operator\", \"name\": \"Richard Bluck\"}, {\"credit_id\": \"539c4891c3a36810ba002147\", \"department\": \"Art\", \"gender\": 2, \"id\": 33303, \"job\": \"Art Direction\", \"name\": \"Simon Bright\"}, {\"credit_id\": \"54959c069251417a81001f3a\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 113145, \"job\": \"Visual Effects Supervisor\", \"name\": \"Richard Martin\"}, {\"credit_id\": \"54959a0dc3a3680ff5002c8d\", \"department\": \"Crew\", \"gender\": 2, \"id\": 58188, \"job\": \"Visual Effects Editor\", \"name\": \"Steve R. Moore\"}, {\"credit_id\": \"52fe48009251416c750aca1d\", \"department\": \"Editing\", \"gender\": 2, \"id\": 58871, \"job\": \"Editor\", \"name\": \"John Refoua\"}, {\"credit_id\": \"54491a4dc3a3680fc30018ca\", \"department\": \"Art\", \"gender\": 0, \"id\": 92359, \"job\": \"Set Designer\", \"name\": \"Karl J. Martin\"}, {\"credit_id\": \"52fe48009251416c750aca35\", \"department\": \"Camera\", \"gender\": 1, \"id\": 72201, \"job\": \"Director of Photography\", \"name\": \"Chiling Lin\"}, {\"credit_id\": \"52fe48009251416c750ac9ff\", \"department\": \"Crew\", \"gender\": 0, \"id\": 89714, \"job\": \"Stunts\", \"name\": \"Ilram Choi\"}, {\"credit_id\": \"54959c529251416e2b004394\", \"department\": \"Visual Effects\", \"gender\": 2, \"id\": 93214, \"job\": \"Visual Effects Supervisor\", \"name\": \"Steven Quale\"}, {\"credit_id\": \"54491edf0e0a267489001c37\", \"department\": \"Crew\", \"gender\": 1, \"id\": 122607, \"job\": \"Dialect Coach\", \"name\": \"Carla Meyer\"}, {\"credit_id\": \"539c485bc3a368653d001a3a\", \"department\": \"Art\", \"gender\": 2, \"id\": 132585, \"job\": \"Art Direction\", \"name\": \"Nick Bassett\"}, {\"credit_id\": \"539c4903c3a368653d001a74\", \"department\": \"Art\", \"gender\": 0, \"id\": 132596, \"job\": \"Art Direction\", \"name\": \"Jill Cormack\"}, {\"credit_id\": \"539c4967c3a368653d001a94\", \"department\": \"Art\", \"gender\": 0, \"id\": 132604, \"job\": \"Art Direction\", \"name\": \"Andy McLaren\"}, {\"credit_id\": \"52fe48009251416c750aca45\", \"department\": \"Crew\", \"gender\": 0, \"id\": 236696, \"job\": \"Motion Capture Artist\", \"name\": \"Terry Notary\"}, {\"credit_id\": \"54959e02c3a3680fc60027d2\", \"department\": \"Crew\", \"gender\": 2, \"id\": 956198, \"job\": \"Stunt Coordinator\", \"name\": \"Garrett Warren\"}, {\"credit_id\": \"54959ca3c3a3686ae300438c\", \"department\": \"Visual Effects\", \"gender\": 2, \"id\": 957874, \"job\": \"Visual Effects Supervisor\", \"name\": \"Jonathan Rothbart\"}, {\"credit_id\": \"570b6f519251412c74001b2f\", \"department\": \"Art\", \"gender\": 0, \"id\": 957889, \"job\": \"Supervising Art Director\", \"name\": \"Stefan Dechant\"}, {\"credit_id\": \"570b6f62c3a3680b77007460\", \"department\": \"Art\", \"gender\": 2, \"id\": 959555, \"job\": \"Supervising Art Director\", \"name\": \"Todd Cherniawsky\"}, {\"credit_id\": \"539c4a3ac3a36810da0021cc\", \"department\": \"Production\", \"gender\": 0, \"id\": 1016177, \"job\": \"Casting\", \"name\": \"Miranda Rivers\"}, {\"credit_id\": \"539c482cc3a36810c1002062\", \"department\": \"Art\", \"gender\": 0, \"id\": 1032536, \"job\": \"Production Design\", \"name\": \"Robert Stromberg\"}, {\"credit_id\": \"539c4b65c3a36810c9002125\", \"department\": \"Costume & Make-Up\", \"gender\": 2, \"id\": 1071680, \"job\": \"Costume Design\", \"name\": \"John Harding\"}, {\"credit_id\": \"54959e6692514130fc002e4e\", \"department\": \"Camera\", \"gender\": 0, \"id\": 1177364, \"job\": \"Steadicam Operator\", \"name\": \"Roberto De Angelis\"}, {\"credit_id\": \"539c49f1c3a368653d001aac\", \"department\": \"Costume & Make-Up\", \"gender\": 2, \"id\": 1202850, \"job\": \"Makeup Department Head\", \"name\": \"Mike Smithson\"}, {\"credit_id\": \"5495999ec3a3686ae100460c\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1204668, \"job\": \"Visual Effects Producer\", \"name\": \"Alain Lalanne\"}, {\"credit_id\": \"54959cdfc3a3681153002729\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1206410, \"job\": \"Visual Effects Supervisor\", \"name\": \"Lucas Salton\"}, {\"credit_id\": \"549596239251417a81001eae\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1234266, \"job\": \"Post Production Supervisor\", \"name\": \"Janace Tashjian\"}, {\"credit_id\": \"54959c859251416e1e003efe\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1271932, \"job\": \"Visual Effects Supervisor\", \"name\": \"Stephen Rosenbaum\"}, {\"credit_id\": \"5592af28c3a368775a00105f\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1310064, \"job\": \"Makeup Artist\", \"name\": \"Frankie Karena\"}, {\"credit_id\": \"539c4adfc3a36810e300203b\", \"department\": \"Costume & Make-Up\", \"gender\": 1, \"id\": 1319844, \"job\": \"Costume Supervisor\", \"name\": \"Lisa Lovaas\"}, {\"credit_id\": \"54959b579251416e2b004371\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1327028, \"job\": \"Visual Effects Supervisor\", \"name\": \"Jonathan Fawkner\"}, {\"credit_id\": \"539c48a7c3a36810b5001fa7\", \"department\": \"Art\", \"gender\": 0, \"id\": 1330561, \"job\": \"Art Direction\", \"name\": \"Robert Bavin\"}, {\"credit_id\": \"539c4a71c3a36810da0021e0\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1330567, \"job\": \"Costume Supervisor\", \"name\": \"Anthony Almaraz\"}, {\"credit_id\": \"539c4a8ac3a36810ba0021e4\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1330570, \"job\": \"Costume Supervisor\", \"name\": \"Carolyn M. Fenton\"}, {\"credit_id\": \"539c4ab6c3a36810da0021f0\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1330574, \"job\": \"Costume Supervisor\", \"name\": \"Beth Koenigsberg\"}, {\"credit_id\": \"54491ab70e0a267480001ba2\", \"department\": \"Art\", \"gender\": 0, \"id\": 1336191, \"job\": \"Set Designer\", \"name\": \"Sam Page\"}, {\"credit_id\": \"544919d9c3a3680fc30018bd\", \"department\": \"Art\", \"gender\": 0, \"id\": 1339441, \"job\": \"Set Designer\", \"name\": \"Tex Kadonaga\"}, {\"credit_id\": \"54491cf50e0a267483001b0c\", \"department\": \"Editing\", \"gender\": 0, \"id\": 1352422, \"job\": \"Dialogue Editor\", \"name\": \"Kim Foscato\"}, {\"credit_id\": \"544919f40e0a26748c001b09\", \"department\": \"Art\", \"gender\": 0, \"id\": 1352962, \"job\": \"Set Designer\", \"name\": \"Tammy S. Lee\"}, {\"credit_id\": \"5495a115c3a3680ff5002d71\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1357070, \"job\": \"Transportation Coordinator\", \"name\": \"Denny Caira\"}, {\"credit_id\": \"5495a12f92514130fc002e94\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1357071, \"job\": \"Transportation Coordinator\", \"name\": \"James Waitkus\"}, {\"credit_id\": \"5495976fc3a36811530026b0\", \"department\": \"Sound\", \"gender\": 0, \"id\": 1360103, \"job\": \"Supervising Sound Editor\", \"name\": \"Addison Teague\"}, {\"credit_id\": \"54491837c3a3680fb1001c5a\", \"department\": \"Art\", \"gender\": 2, \"id\": 1376887, \"job\": \"Set Designer\", \"name\": \"C. Scott Baker\"}, {\"credit_id\": \"54491878c3a3680fb4001c9d\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376888, \"job\": \"Set Designer\", \"name\": \"Luke Caska\"}, {\"credit_id\": \"544918dac3a3680fa5001ae0\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376889, \"job\": \"Set Designer\", \"name\": \"David Chow\"}, {\"credit_id\": \"544919110e0a267486001b68\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376890, \"job\": \"Set Designer\", \"name\": \"Jonathan Dyer\"}, {\"credit_id\": \"54491967c3a3680faa001b5e\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376891, \"job\": \"Set Designer\", \"name\": \"Joseph Hiura\"}, {\"credit_id\": \"54491997c3a3680fb1001c8a\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376892, \"job\": \"Art Department Coordinator\", \"name\": \"Rebecca Jellie\"}, {\"credit_id\": \"544919ba0e0a26748f001b42\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376893, \"job\": \"Set Designer\", \"name\": \"Robert Andrew Johnson\"}, {\"credit_id\": \"54491b1dc3a3680faa001b8c\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376895, \"job\": \"Assistant Art Director\", \"name\": \"Mike Stassi\"}, {\"credit_id\": \"54491b79c3a3680fbb001826\", \"department\": \"Art\", \"gender\": 0, \"id\": 1376897, \"job\": \"Construction Coordinator\", \"name\": \"John Villarino\"}, {\"credit_id\": \"54491baec3a3680fb4001ce6\", \"department\": \"Art\", \"gender\": 2, \"id\": 1376898, \"job\": \"Assistant Art Director\", \"name\": \"Jeffrey Wisniewski\"}, {\"credit_id\": \"54491d2fc3a3680fb4001d07\", \"department\": \"Editing\", \"gender\": 0, \"id\": 1376899, \"job\": \"Dialogue Editor\", \"name\": \"Cheryl Nardi\"}, {\"credit_id\": \"54491d86c3a3680fa5001b2f\", \"department\": \"Editing\", \"gender\": 0, \"id\": 1376901, \"job\": \"Dialogue Editor\", \"name\": \"Marshall Winn\"}, {\"credit_id\": \"54491d9dc3a3680faa001bb0\", \"department\": \"Sound\", \"gender\": 0, \"id\": 1376902, \"job\": \"Supervising Sound Editor\", \"name\": \"Gwendolyn Yates Whittle\"}, {\"credit_id\": \"54491dc10e0a267486001bce\", \"department\": \"Sound\", \"gender\": 0, \"id\": 1376903, \"job\": \"Sound Re-Recording Mixer\", \"name\": \"William Stein\"}, {\"credit_id\": \"54491f500e0a26747c001c07\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1376909, \"job\": \"Choreographer\", \"name\": \"Lula Washington\"}, {\"credit_id\": \"549599239251412c4e002a2e\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1391692, \"job\": \"Visual Effects Producer\", \"name\": \"Chris Del Conte\"}, {\"credit_id\": \"54959d54c3a36831b8001d9a\", \"department\": \"Visual Effects\", \"gender\": 2, \"id\": 1391695, \"job\": \"Visual Effects Supervisor\", \"name\": \"R. Christopher White\"}, {\"credit_id\": \"54959bdf9251412c4e002a66\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1394070, \"job\": \"Visual Effects Supervisor\", \"name\": \"Dan Lemmon\"}, {\"credit_id\": \"5495971d92514132ed002922\", \"department\": \"Sound\", \"gender\": 0, \"id\": 1394129, \"job\": \"Sound Effects Editor\", \"name\": \"Tim Nielsen\"}, {\"credit_id\": \"5592b25792514152cc0011aa\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1394286, \"job\": \"CG Supervisor\", \"name\": \"Michael Mulholland\"}, {\"credit_id\": \"54959a329251416e2b004355\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1394750, \"job\": \"Visual Effects Editor\", \"name\": \"Thomas Nittmann\"}, {\"credit_id\": \"54959d6dc3a3686ae9004401\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1394755, \"job\": \"Visual Effects Supervisor\", \"name\": \"Edson Williams\"}, {\"credit_id\": \"5495a08fc3a3686ae300441c\", \"department\": \"Editing\", \"gender\": 0, \"id\": 1394953, \"job\": \"Digital Intermediate\", \"name\": \"Christine Carr\"}, {\"credit_id\": \"55402d659251413d6d000249\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1395269, \"job\": \"Visual Effects Supervisor\", \"name\": \"John Bruno\"}, {\"credit_id\": \"54959e7b9251416e1e003f3e\", \"department\": \"Camera\", \"gender\": 0, \"id\": 1398970, \"job\": \"Steadicam Operator\", \"name\": \"David Emmerichs\"}, {\"credit_id\": \"54959734c3a3686ae10045e0\", \"department\": \"Sound\", \"gender\": 0, \"id\": 1400906, \"job\": \"Sound Effects Editor\", \"name\": \"Christopher Scarabosio\"}, {\"credit_id\": \"549595dd92514130fc002d79\", \"department\": \"Production\", \"gender\": 0, \"id\": 1401784, \"job\": \"Production Supervisor\", \"name\": \"Jennifer Teves\"}, {\"credit_id\": \"549596009251413af70028cc\", \"department\": \"Production\", \"gender\": 0, \"id\": 1401785, \"job\": \"Production Manager\", \"name\": \"Brigitte Yorke\"}, {\"credit_id\": \"549596e892514130fc002d99\", \"department\": \"Sound\", \"gender\": 0, \"id\": 1401786, \"job\": \"Sound Effects Editor\", \"name\": \"Ken Fischer\"}, {\"credit_id\": \"549598229251412c4e002a1c\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1401787, \"job\": \"Special Effects Coordinator\", \"name\": \"Iain Hutton\"}, {\"credit_id\": \"549598349251416e2b00432b\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1401788, \"job\": \"Special Effects Coordinator\", \"name\": \"Steve Ingram\"}, {\"credit_id\": \"54959905c3a3686ae3004324\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401789, \"job\": \"Visual Effects Producer\", \"name\": \"Joyce Cox\"}, {\"credit_id\": \"5495994b92514132ed002951\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401790, \"job\": \"Visual Effects Producer\", \"name\": \"Jenny Foster\"}, {\"credit_id\": \"549599cbc3a3686ae1004613\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1401791, \"job\": \"Visual Effects Editor\", \"name\": \"Christopher Marino\"}, {\"credit_id\": \"549599f2c3a3686ae100461e\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1401792, \"job\": \"Visual Effects Editor\", \"name\": \"Jim Milton\"}, {\"credit_id\": \"54959a51c3a3686af3003eb5\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401793, \"job\": \"Visual Effects Producer\", \"name\": \"Cyndi Ochs\"}, {\"credit_id\": \"54959a7cc3a36811530026f4\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1401794, \"job\": \"Visual Effects Editor\", \"name\": \"Lucas Putnam\"}, {\"credit_id\": \"54959b91c3a3680ff5002cb4\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401795, \"job\": \"Visual Effects Supervisor\", \"name\": \"Anthony \\'Max\\' Ivins\"}, {\"credit_id\": \"54959bb69251412c4e002a5f\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401796, \"job\": \"Visual Effects Supervisor\", \"name\": \"John Knoll\"}, {\"credit_id\": \"54959cbbc3a3686ae3004391\", \"department\": \"Visual Effects\", \"gender\": 2, \"id\": 1401799, \"job\": \"Visual Effects Supervisor\", \"name\": \"Eric Saindon\"}, {\"credit_id\": \"54959d06c3a3686ae90043f6\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401800, \"job\": \"Visual Effects Supervisor\", \"name\": \"Wayne Stables\"}, {\"credit_id\": \"54959d259251416e1e003f11\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401801, \"job\": \"Visual Effects Supervisor\", \"name\": \"David Stinnett\"}, {\"credit_id\": \"54959db49251413af7002975\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401803, \"job\": \"Visual Effects Supervisor\", \"name\": \"Guy Williams\"}, {\"credit_id\": \"54959de4c3a3681153002750\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1401804, \"job\": \"Stunt Coordinator\", \"name\": \"Stuart Thorp\"}, {\"credit_id\": \"54959ef2c3a3680fc60027f2\", \"department\": \"Lighting\", \"gender\": 0, \"id\": 1401805, \"job\": \"Best Boy Electric\", \"name\": \"Giles Coburn\"}, {\"credit_id\": \"54959f07c3a3680fc60027f9\", \"department\": \"Camera\", \"gender\": 2, \"id\": 1401806, \"job\": \"Still Photographer\", \"name\": \"Mark Fellman\"}, {\"credit_id\": \"54959f47c3a3681153002774\", \"department\": \"Lighting\", \"gender\": 0, \"id\": 1401807, \"job\": \"Lighting Technician\", \"name\": \"Scott Sprague\"}, {\"credit_id\": \"54959f8cc3a36831b8001df2\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401808, \"job\": \"Animation Director\", \"name\": \"Jeremy Hollobon\"}, {\"credit_id\": \"54959fa0c3a36831b8001dfb\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401809, \"job\": \"Animation Director\", \"name\": \"Orlando Meunier\"}, {\"credit_id\": \"54959fb6c3a3686af3003f54\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1401810, \"job\": \"Animation Director\", \"name\": \"Taisuke Tanimura\"}, {\"credit_id\": \"54959fd2c3a36831b8001e02\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1401812, \"job\": \"Set Costumer\", \"name\": \"Lilia Mishel Acevedo\"}, {\"credit_id\": \"54959ff9c3a3686ae300440c\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1401814, \"job\": \"Set Costumer\", \"name\": \"Alejandro M. Hernandez\"}, {\"credit_id\": \"5495a0ddc3a3686ae10046fe\", \"department\": \"Editing\", \"gender\": 0, \"id\": 1401815, \"job\": \"Digital Intermediate\", \"name\": \"Marvin Hall\"}, {\"credit_id\": \"5495a1f7c3a3686ae3004443\", \"department\": \"Production\", \"gender\": 0, \"id\": 1401816, \"job\": \"Publicist\", \"name\": \"Judy Alley\"}, {\"credit_id\": \"5592b29fc3a36869d100002f\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1418381, \"job\": \"CG Supervisor\", \"name\": \"Mike Perry\"}, {\"credit_id\": \"5592b23a9251415df8001081\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1426854, \"job\": \"CG Supervisor\", \"name\": \"Andrew Morley\"}, {\"credit_id\": \"55491e1192514104c40002d8\", \"department\": \"Art\", \"gender\": 0, \"id\": 1438901, \"job\": \"Conceptual Design\", \"name\": \"Seth Engstrom\"}, {\"credit_id\": \"5525d5809251417276002b06\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1447362, \"job\": \"Visual Effects Art Director\", \"name\": \"Eric Oliver\"}, {\"credit_id\": \"554427ca925141586500312a\", \"department\": \"Visual Effects\", \"gender\": 0, \"id\": 1447503, \"job\": \"Modeling\", \"name\": \"Matsune Suzuki\"}, {\"credit_id\": \"551906889251415aab001c88\", \"department\": \"Art\", \"gender\": 0, \"id\": 1447524, \"job\": \"Art Department Manager\", \"name\": \"Paul Tobin\"}, {\"credit_id\": \"5592af8492514152cc0010de\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1452643, \"job\": \"Hairstylist\", \"name\": \"Roxane Griffin\"}, {\"credit_id\": \"553d3c109251415852001318\", \"department\": \"Lighting\", \"gender\": 0, \"id\": 1453938, \"job\": \"Lighting Artist\", \"name\": \"Arun Ram-Mohan\"}, {\"credit_id\": \"5592af4692514152d5001355\", \"department\": \"Costume & Make-Up\", \"gender\": 0, \"id\": 1457305, \"job\": \"Makeup Artist\", \"name\": \"Georgia Lockhart-Adams\"}, {\"credit_id\": \"5592b2eac3a36877470012a5\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1466035, \"job\": \"CG Supervisor\", \"name\": \"Thrain Shadbolt\"}, {\"credit_id\": \"5592b032c3a36877450015f1\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483220, \"job\": \"CG Supervisor\", \"name\": \"Brad Alexander\"}, {\"credit_id\": \"5592b05592514152d80012f6\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483221, \"job\": \"CG Supervisor\", \"name\": \"Shadi Almassizadeh\"}, {\"credit_id\": \"5592b090c3a36877570010b5\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483222, \"job\": \"CG Supervisor\", \"name\": \"Simon Clutterbuck\"}, {\"credit_id\": \"5592b0dbc3a368774b00112c\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483223, \"job\": \"CG Supervisor\", \"name\": \"Graeme Demmocks\"}, {\"credit_id\": \"5592b0fe92514152db0010c1\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483224, \"job\": \"CG Supervisor\", \"name\": \"Adrian Fernandes\"}, {\"credit_id\": \"5592b11f9251415df8001059\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483225, \"job\": \"CG Supervisor\", \"name\": \"Mitch Gates\"}, {\"credit_id\": \"5592b15dc3a3687745001645\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483226, \"job\": \"CG Supervisor\", \"name\": \"Jerry Kung\"}, {\"credit_id\": \"5592b18e925141645a0004ae\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483227, \"job\": \"CG Supervisor\", \"name\": \"Andy Lomas\"}, {\"credit_id\": \"5592b1bfc3a368775d0010e7\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483228, \"job\": \"CG Supervisor\", \"name\": \"Sebastian Marino\"}, {\"credit_id\": \"5592b2049251415df8001078\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483229, \"job\": \"CG Supervisor\", \"name\": \"Matthias Menz\"}, {\"credit_id\": \"5592b27b92514152d800136a\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483230, \"job\": \"CG Supervisor\", \"name\": \"Sergei Nevshupov\"}, {\"credit_id\": \"5592b2c3c3a36869e800003c\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483231, \"job\": \"CG Supervisor\", \"name\": \"Philippe Rebours\"}, {\"credit_id\": \"5592b317c3a36877470012af\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483232, \"job\": \"CG Supervisor\", \"name\": \"Michael Takarangi\"}, {\"credit_id\": \"5592b345c3a36877470012bb\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483233, \"job\": \"CG Supervisor\", \"name\": \"David Weitzberg\"}, {\"credit_id\": \"5592b37cc3a368775100113b\", \"department\": \"Crew\", \"gender\": 0, \"id\": 1483234, \"job\": \"CG Supervisor\", \"name\": \"Ben White\"}, {\"credit_id\": \"573c8e2f9251413f5d000094\", \"department\": \"Crew\", \"gender\": 1, \"id\": 1621932, \"job\": \"Stunts\", \"name\": \"Min Windle\"}]'" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies['crew'][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "id": "7ihXgjy1qm1K" + }, + "outputs": [], + "source": [ + "def fetch_director(obj):\n", + " L = []\n", + " for i in ast.literal_eval(obj):\n", + " if i['job'] == 'Director':\n", + " L.append(i['name'])\n", + " break\n", + " return L" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "id": "73e5ipCkqmzP" + }, + "outputs": [], + "source": [ + "movies['crew'] = movies['crew'].apply(fetch_director)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "id": "VWaUGWeFqmxx" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie_idtitleoverviewgenreskeywordscastcrew
019995AvatarIn the 22nd century, a paraplegic Marine is di...[Action, Adventure, Fantasy, Science Fiction][culture clash, future, space war, space colon...[Sam Worthington, Zoe Saldana, Sigourney Weaver][James Cameron]
1285Pirates of the Caribbean: At World's EndCaptain Barbossa, long believed to be dead, ha...[Adventure, Fantasy, Action][ocean, drug abuse, exotic island, east india ...[Johnny Depp, Orlando Bloom, Keira Knightley][Gore Verbinski]
2206647SpectreA cryptic message from Bond’s past sends him o...[Action, Adventure, Crime][spy, based on novel, secret agent, sequel, mi...[Daniel Craig, Christoph Waltz, Léa Seydoux][Sam Mendes]
349026The Dark Knight RisesFollowing the death of District Attorney Harve...[Action, Crime, Drama, Thriller][dc comics, crime fighter, terrorist, secret i...[Christian Bale, Michael Caine, Gary Oldman][Christopher Nolan]
449529John CarterJohn Carter is a war-weary, former military ca...[Action, Adventure, Science Fiction][based on novel, mars, medallion, space travel...[Taylor Kitsch, Lynn Collins, Samantha Morton][Andrew Stanton]
\n", + "
" + ], + "text/plain": [ + " movie_id title \\\n", + "0 19995 Avatar \n", + "1 285 Pirates of the Caribbean: At World's End \n", + "2 206647 Spectre \n", + "3 49026 The Dark Knight Rises \n", + "4 49529 John Carter \n", + "\n", + " overview \\\n", + "0 In the 22nd century, a paraplegic Marine is di... \n", + "1 Captain Barbossa, long believed to be dead, ha... \n", + "2 A cryptic message from Bond’s past sends him o... \n", + "3 Following the death of District Attorney Harve... \n", + "4 John Carter is a war-weary, former military ca... \n", + "\n", + " genres \\\n", + "0 [Action, Adventure, Fantasy, Science Fiction] \n", + "1 [Adventure, Fantasy, Action] \n", + "2 [Action, Adventure, Crime] \n", + "3 [Action, Crime, Drama, Thriller] \n", + "4 [Action, Adventure, Science Fiction] \n", + "\n", + " keywords \\\n", + "0 [culture clash, future, space war, space colon... \n", + "1 [ocean, drug abuse, exotic island, east india ... \n", + "2 [spy, based on novel, secret agent, sequel, mi... \n", + "3 [dc comics, crime fighter, terrorist, secret i... \n", + "4 [based on novel, mars, medallion, space travel... \n", + "\n", + " cast crew \n", + "0 [Sam Worthington, Zoe Saldana, Sigourney Weaver] [James Cameron] \n", + "1 [Johnny Depp, Orlando Bloom, Keira Knightley] [Gore Verbinski] \n", + "2 [Daniel Craig, Christoph Waltz, Léa Seydoux] [Sam Mendes] \n", + "3 [Christian Bale, Michael Caine, Gary Oldman] [Christopher Nolan] \n", + "4 [Taylor Kitsch, Lynn Collins, Samantha Morton] [Andrew Stanton] " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "id": "Q_XPFjxSqmvy" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization.'" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies['overview'][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "id": "jqv29H4aqmtq" + }, + "outputs": [], + "source": [ + "#we have to convert the overview column from a list to a string\n", + "movies['overview'] = movies['overview'].apply(lambda x:x.split())" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie_idtitleoverviewgenreskeywordscastcrew
019995Avatar[In, the, 22nd, century,, a, paraplegic, Marin...[Action, Adventure, Fantasy, Science Fiction][culture clash, future, space war, space colon...[Sam Worthington, Zoe Saldana, Sigourney Weaver][James Cameron]
1285Pirates of the Caribbean: At World's End[Captain, Barbossa,, long, believed, to, be, d...[Adventure, Fantasy, Action][ocean, drug abuse, exotic island, east india ...[Johnny Depp, Orlando Bloom, Keira Knightley][Gore Verbinski]
2206647Spectre[A, cryptic, message, from, Bond’s, past, send...[Action, Adventure, Crime][spy, based on novel, secret agent, sequel, mi...[Daniel Craig, Christoph Waltz, Léa Seydoux][Sam Mendes]
349026The Dark Knight Rises[Following, the, death, of, District, Attorney...[Action, Crime, Drama, Thriller][dc comics, crime fighter, terrorist, secret i...[Christian Bale, Michael Caine, Gary Oldman][Christopher Nolan]
449529John Carter[John, Carter, is, a, war-weary,, former, mili...[Action, Adventure, Science Fiction][based on novel, mars, medallion, space travel...[Taylor Kitsch, Lynn Collins, Samantha Morton][Andrew Stanton]
\n", + "
" + ], + "text/plain": [ + " movie_id title \\\n", + "0 19995 Avatar \n", + "1 285 Pirates of the Caribbean: At World's End \n", + "2 206647 Spectre \n", + "3 49026 The Dark Knight Rises \n", + "4 49529 John Carter \n", + "\n", + " overview \\\n", + "0 [In, the, 22nd, century,, a, paraplegic, Marin... \n", + "1 [Captain, Barbossa,, long, believed, to, be, d... \n", + "2 [A, cryptic, message, from, Bond’s, past, send... \n", + "3 [Following, the, death, of, District, Attorney... \n", + "4 [John, Carter, is, a, war-weary,, former, mili... \n", + "\n", + " genres \\\n", + "0 [Action, Adventure, Fantasy, Science Fiction] \n", + "1 [Adventure, Fantasy, Action] \n", + "2 [Action, Adventure, Crime] \n", + "3 [Action, Crime, Drama, Thriller] \n", + "4 [Action, Adventure, Science Fiction] \n", + "\n", + " keywords \\\n", + "0 [culture clash, future, space war, space colon... \n", + "1 [ocean, drug abuse, exotic island, east india ... \n", + "2 [spy, based on novel, secret agent, sequel, mi... \n", + "3 [dc comics, crime fighter, terrorist, secret i... \n", + "4 [based on novel, mars, medallion, space travel... \n", + "\n", + " cast crew \n", + "0 [Sam Worthington, Zoe Saldana, Sigourney Weaver] [James Cameron] \n", + "1 [Johnny Depp, Orlando Bloom, Keira Knightley] [Gore Verbinski] \n", + "2 [Daniel Craig, Christoph Waltz, Léa Seydoux] [Sam Mendes] \n", + "3 [Christian Bale, Michael Caine, Gary Oldman] [Christopher Nolan] \n", + "4 [Taylor Kitsch, Lynn Collins, Samantha Morton] [Andrew Stanton] " + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"we have to remove the spaces between words like 'Sam Worthington' to 'SamWorthington' because if we do not\\nremove the space the recommender system will take sam as a different entity and worthington as a different entity so, \\nwhenever we type sam, we can get recommendations of other sams as well.\\n\"" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"we have to remove the spaces between words like 'Sam Worthington' to 'SamWorthington' because if we do not\n", + "remove the space the recommender system will take sam as a different entity and worthington as a different entity so, \n", + "whenever we type sam, we can get recommendations of other sams as well.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 [Action, Adventure, Fantasy, ScienceFiction]\n", + "1 [Adventure, Fantasy, Action]\n", + "2 [Action, Adventure, Crime]\n", + "3 [Action, Crime, Drama, Thriller]\n", + "4 [Action, Adventure, ScienceFiction]\n", + " ... \n", + "4804 [Action, Crime, Thriller]\n", + "4805 [Comedy, Romance]\n", + "4806 [Comedy, Drama, Romance, TVMovie]\n", + "4807 []\n", + "4808 [Documentary]\n", + "Name: genres, Length: 4806, dtype: object" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#removing spaces\n", + "movies['genres'].apply(lambda x:[i.replace(\" \", \"\") for i in x])\n", + "#as you can see in the first line of the output the space between Science and Fiction is removed" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "movies['genres'] = movies['genres'].apply(lambda x:[i.replace(\" \", \"\") for i in x])\n", + "movies['keywords'] = movies['keywords'].apply(lambda x:[i.replace(\" \", \"\") for i in x])\n", + "movies['cast'] = movies['cast'].apply(lambda x:[i.replace(\" \", \"\") for i in x])\n", + "movies['crew'] = movies['crew'].apply(lambda x:[i.replace(\" \", \"\") for i in x])" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie_idtitleoverviewgenreskeywordscastcrew
019995Avatar[In, the, 22nd, century,, a, paraplegic, Marin...[Action, Adventure, Fantasy, ScienceFiction][cultureclash, future, spacewar, spacecolony, ...[SamWorthington, ZoeSaldana, SigourneyWeaver][JamesCameron]
1285Pirates of the Caribbean: At World's End[Captain, Barbossa,, long, believed, to, be, d...[Adventure, Fantasy, Action][ocean, drugabuse, exoticisland, eastindiatrad...[JohnnyDepp, OrlandoBloom, KeiraKnightley][GoreVerbinski]
2206647Spectre[A, cryptic, message, from, Bond’s, past, send...[Action, Adventure, Crime][spy, basedonnovel, secretagent, sequel, mi6, ...[DanielCraig, ChristophWaltz, LéaSeydoux][SamMendes]
349026The Dark Knight Rises[Following, the, death, of, District, Attorney...[Action, Crime, Drama, Thriller][dccomics, crimefighter, terrorist, secretiden...[ChristianBale, MichaelCaine, GaryOldman][ChristopherNolan]
449529John Carter[John, Carter, is, a, war-weary,, former, mili...[Action, Adventure, ScienceFiction][basedonnovel, mars, medallion, spacetravel, p...[TaylorKitsch, LynnCollins, SamanthaMorton][AndrewStanton]
\n", + "
" + ], + "text/plain": [ + " movie_id title \\\n", + "0 19995 Avatar \n", + "1 285 Pirates of the Caribbean: At World's End \n", + "2 206647 Spectre \n", + "3 49026 The Dark Knight Rises \n", + "4 49529 John Carter \n", + "\n", + " overview \\\n", + "0 [In, the, 22nd, century,, a, paraplegic, Marin... \n", + "1 [Captain, Barbossa,, long, believed, to, be, d... \n", + "2 [A, cryptic, message, from, Bond’s, past, send... \n", + "3 [Following, the, death, of, District, Attorney... \n", + "4 [John, Carter, is, a, war-weary,, former, mili... \n", + "\n", + " genres \\\n", + "0 [Action, Adventure, Fantasy, ScienceFiction] \n", + "1 [Adventure, Fantasy, Action] \n", + "2 [Action, Adventure, Crime] \n", + "3 [Action, Crime, Drama, Thriller] \n", + "4 [Action, Adventure, ScienceFiction] \n", + "\n", + " keywords \\\n", + "0 [cultureclash, future, spacewar, spacecolony, ... \n", + "1 [ocean, drugabuse, exoticisland, eastindiatrad... \n", + "2 [spy, basedonnovel, secretagent, sequel, mi6, ... \n", + "3 [dccomics, crimefighter, terrorist, secretiden... \n", + "4 [basedonnovel, mars, medallion, spacetravel, p... \n", + "\n", + " cast crew \n", + "0 [SamWorthington, ZoeSaldana, SigourneyWeaver] [JamesCameron] \n", + "1 [JohnnyDepp, OrlandoBloom, KeiraKnightley] [GoreVerbinski] \n", + "2 [DanielCraig, ChristophWaltz, LéaSeydoux] [SamMendes] \n", + "3 [ChristianBale, MichaelCaine, GaryOldman] [ChristopherNolan] \n", + "4 [TaylorKitsch, LynnCollins, SamanthaMorton] [AndrewStanton] " + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "#we will make a new column called tags which will be the concatentation of overview, genres, keywords, cast and crew\n", + "movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie_idtitleoverviewgenreskeywordscastcrewtags
019995Avatar[In, the, 22nd, century,, a, paraplegic, Marin...[Action, Adventure, Fantasy, ScienceFiction][cultureclash, future, spacewar, spacecolony, ...[SamWorthington, ZoeSaldana, SigourneyWeaver][JamesCameron][In, the, 22nd, century,, a, paraplegic, Marin...
1285Pirates of the Caribbean: At World's End[Captain, Barbossa,, long, believed, to, be, d...[Adventure, Fantasy, Action][ocean, drugabuse, exoticisland, eastindiatrad...[JohnnyDepp, OrlandoBloom, KeiraKnightley][GoreVerbinski][Captain, Barbossa,, long, believed, to, be, d...
2206647Spectre[A, cryptic, message, from, Bond’s, past, send...[Action, Adventure, Crime][spy, basedonnovel, secretagent, sequel, mi6, ...[DanielCraig, ChristophWaltz, LéaSeydoux][SamMendes][A, cryptic, message, from, Bond’s, past, send...
349026The Dark Knight Rises[Following, the, death, of, District, Attorney...[Action, Crime, Drama, Thriller][dccomics, crimefighter, terrorist, secretiden...[ChristianBale, MichaelCaine, GaryOldman][ChristopherNolan][Following, the, death, of, District, Attorney...
449529John Carter[John, Carter, is, a, war-weary,, former, mili...[Action, Adventure, ScienceFiction][basedonnovel, mars, medallion, spacetravel, p...[TaylorKitsch, LynnCollins, SamanthaMorton][AndrewStanton][John, Carter, is, a, war-weary,, former, mili...
\n", + "
" + ], + "text/plain": [ + " movie_id title \\\n", + "0 19995 Avatar \n", + "1 285 Pirates of the Caribbean: At World's End \n", + "2 206647 Spectre \n", + "3 49026 The Dark Knight Rises \n", + "4 49529 John Carter \n", + "\n", + " overview \\\n", + "0 [In, the, 22nd, century,, a, paraplegic, Marin... \n", + "1 [Captain, Barbossa,, long, believed, to, be, d... \n", + "2 [A, cryptic, message, from, Bond’s, past, send... \n", + "3 [Following, the, death, of, District, Attorney... \n", + "4 [John, Carter, is, a, war-weary,, former, mili... \n", + "\n", + " genres \\\n", + "0 [Action, Adventure, Fantasy, ScienceFiction] \n", + "1 [Adventure, Fantasy, Action] \n", + "2 [Action, Adventure, Crime] \n", + "3 [Action, Crime, Drama, Thriller] \n", + "4 [Action, Adventure, ScienceFiction] \n", + "\n", + " keywords \\\n", + "0 [cultureclash, future, spacewar, spacecolony, ... \n", + "1 [ocean, drugabuse, exoticisland, eastindiatrad... \n", + "2 [spy, basedonnovel, secretagent, sequel, mi6, ... \n", + "3 [dccomics, crimefighter, terrorist, secretiden... \n", + "4 [basedonnovel, mars, medallion, spacetravel, p... \n", + "\n", + " cast crew \\\n", + "0 [SamWorthington, ZoeSaldana, SigourneyWeaver] [JamesCameron] \n", + "1 [JohnnyDepp, OrlandoBloom, KeiraKnightley] [GoreVerbinski] \n", + "2 [DanielCraig, ChristophWaltz, LéaSeydoux] [SamMendes] \n", + "3 [ChristianBale, MichaelCaine, GaryOldman] [ChristopherNolan] \n", + "4 [TaylorKitsch, LynnCollins, SamanthaMorton] [AndrewStanton] \n", + "\n", + " tags \n", + "0 [In, the, 22nd, century,, a, paraplegic, Marin... \n", + "1 [Captain, Barbossa,, long, believed, to, be, d... \n", + "2 [A, cryptic, message, from, Bond’s, past, send... \n", + "3 [Following, the, death, of, District, Attorney... \n", + "4 [John, Carter, is, a, war-weary,, former, mili... " + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "movies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "new_df = movies[['movie_id', 'title', 'tags']]" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie_idtitletags
019995Avatar[In, the, 22nd, century,, a, paraplegic, Marin...
1285Pirates of the Caribbean: At World's End[Captain, Barbossa,, long, believed, to, be, d...
2206647Spectre[A, cryptic, message, from, Bond’s, past, send...
349026The Dark Knight Rises[Following, the, death, of, District, Attorney...
449529John Carter[John, Carter, is, a, war-weary,, former, mili...
............
48049367El Mariachi[El, Mariachi, just, wants, to, play, his, gui...
480572766Newlyweds[A, newlywed, couple's, honeymoon, is, upended...
4806231617Signed, Sealed, Delivered[\"Signed,, Sealed,, Delivered\", introduces, a,...
4807126186Shanghai Calling[When, ambitious, New, York, attorney, Sam, is...
480825975My Date with Drew[Ever, since, the, second, grade, when, he, fi...
\n", + "

4806 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " movie_id title \\\n", + "0 19995 Avatar \n", + "1 285 Pirates of the Caribbean: At World's End \n", + "2 206647 Spectre \n", + "3 49026 The Dark Knight Rises \n", + "4 49529 John Carter \n", + "... ... ... \n", + "4804 9367 El Mariachi \n", + "4805 72766 Newlyweds \n", + "4806 231617 Signed, Sealed, Delivered \n", + "4807 126186 Shanghai Calling \n", + "4808 25975 My Date with Drew \n", + "\n", + " tags \n", + "0 [In, the, 22nd, century,, a, paraplegic, Marin... \n", + "1 [Captain, Barbossa,, long, believed, to, be, d... \n", + "2 [A, cryptic, message, from, Bond’s, past, send... \n", + "3 [Following, the, death, of, District, Attorney... \n", + "4 [John, Carter, is, a, war-weary,, former, mili... \n", + "... ... \n", + "4804 [El, Mariachi, just, wants, to, play, his, gui... \n", + "4805 [A, newlywed, couple's, honeymoon, is, upended... \n", + "4806 [\"Signed,, Sealed,, Delivered\", introduces, a,... \n", + "4807 [When, ambitious, New, York, attorney, Sam, is... \n", + "4808 [Ever, since, the, second, grade, when, he, fi... \n", + "\n", + "[4806 rows x 3 columns]" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\srija\\AppData\\Local\\Temp\\ipykernel_24728\\3328571501.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " new_df['tags'] = new_df['tags'].apply(lambda x:\" \".join(x))\n" + ] + } + ], + "source": [ + "#now we have to convert the list of the tags column in string format\n", + "new_df['tags'] = new_df['tags'].apply(lambda x:\" \".join(x))" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie_idtitletags
019995AvatarIn the 22nd century, a paraplegic Marine is di...
1285Pirates of the Caribbean: At World's EndCaptain Barbossa, long believed to be dead, ha...
2206647SpectreA cryptic message from Bond’s past sends him o...
349026The Dark Knight RisesFollowing the death of District Attorney Harve...
449529John CarterJohn Carter is a war-weary, former military ca...
\n", + "
" + ], + "text/plain": [ + " movie_id title \\\n", + "0 19995 Avatar \n", + "1 285 Pirates of the Caribbean: At World's End \n", + "2 206647 Spectre \n", + "3 49026 The Dark Knight Rises \n", + "4 49529 John Carter \n", + "\n", + " tags \n", + "0 In the 22nd century, a paraplegic Marine is di... \n", + "1 Captain Barbossa, long believed to be dead, ha... \n", + "2 A cryptic message from Bond’s past sends him o... \n", + "3 Following the death of District Attorney Harve... \n", + "4 John Carter is a war-weary, former military ca... " + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: nltk in c:\\users\\srija\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.11_qbz5n2kfra8p0\\localcache\\local-packages\\python311\\site-packages (3.8.1)\n", + "Requirement already satisfied: click in c:\\users\\srija\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.11_qbz5n2kfra8p0\\localcache\\local-packages\\python311\\site-packages (from nltk) (8.1.7)\n", + "Requirement already satisfied: joblib in c:\\users\\srija\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.11_qbz5n2kfra8p0\\localcache\\local-packages\\python311\\site-packages (from nltk) (1.3.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in c:\\users\\srija\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.11_qbz5n2kfra8p0\\localcache\\local-packages\\python311\\site-packages (from nltk) (2023.12.25)\n", + "Requirement already satisfied: tqdm in c:\\users\\srija\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.11_qbz5n2kfra8p0\\localcache\\local-packages\\python311\\site-packages (from nltk) (4.66.2)\n", + "Requirement already satisfied: colorama in c:\\users\\srija\\appdata\\local\\packages\\pythonsoftwarefoundation.python.3.11_qbz5n2kfra8p0\\localcache\\local-packages\\python311\\site-packages (from click->nltk) (0.4.6)\n" + ] + } + ], + "source": [ + "!pip install nltk" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "#we are doing stemming of words\n", + "import nltk\n", + "from nltk.stem.porter import PorterStemmer\n", + "ps = PorterStemmer()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "def stem(text):\n", + " y = []\n", + " for i in text.split():\n", + " y.append(ps.stem(i))\n", + " return \" \".join(y)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\srija\\AppData\\Local\\Temp\\ipykernel_24728\\3213734980.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " new_df['tags'] = new_df['tags'].apply(stem)\n" + ] + } + ], + "source": [ + "new_df['tags'] = new_df['tags'].apply(stem)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'in the 22nd century, a parapleg marin is dispatch to the moon pandora on a uniqu mission, but becom torn between follow order and protect an alien civilization. action adventur fantasi sciencefict cultureclash futur spacewar spacecoloni societi spacetravel futurist romanc space alien tribe alienplanet cgi marin soldier battl loveaffair antiwar powerrel mindandsoul 3d samworthington zoesaldana sigourneyweav jamescameron'" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df['tags'][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\srija\\AppData\\Local\\Temp\\ipykernel_24728\\748620445.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " new_df['tags'] = new_df['tags'].apply(lambda x:x.lower())\n" + ] + } + ], + "source": [ + "#converting all the string in the tags column to lowercase\n", + "new_df['tags'] = new_df['tags'].apply(lambda x:x.lower())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vectorizing the text in Tags Column" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " ...,\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0]], dtype=int64)" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#we have to vectorize the text, and take the top 5000 words from the text\n", + "from sklearn.feature_extraction.text import CountVectorizer\n", + "cv = CountVectorizer(max_features=5000, stop_words='english') #max_features=5000 means it takes the top most 5000 repeating words\n", + "#stop_words hyperparameter removes the stop words from the text\n", + "cv.fit_transform(new_df['tags']).toarray()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(4806, 5000)" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cv.fit_transform(new_df['tags']).toarray().shape" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "vectors = cv.fit_transform(new_df['tags']).toarray()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, ..., 0, 0, 0], dtype=int64)" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#vector format of the first movie\n", + "vectors[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Applying Cosine Similarity of Sklearn" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "#we will calculate the cosine similarity(ranges from 0 to 1). Closer to 1 means more similar, closer to 0 means less similar\n", + "from sklearn.metrics.pairwise import cosine_similarity\n", + "similarity = cosine_similarity(vectors)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1. , 0.08346223, 0.0860309 , ..., 0.04499213, 0. ,\n", + " 0. ],\n", + " [0.08346223, 1. , 0.06063391, ..., 0.02378257, 0. ,\n", + " 0.02615329],\n", + " [0.0860309 , 0.06063391, 1. , ..., 0.02451452, 0. ,\n", + " 0. ],\n", + " ...,\n", + " [0.04499213, 0.02378257, 0.02451452, ..., 1. , 0.03962144,\n", + " 0.04229549],\n", + " [0. , 0. , 0. , ..., 0.03962144, 1. ,\n", + " 0.08714204],\n", + " [0. , 0.02615329, 0. , ..., 0.04229549, 0.08714204,\n", + " 1. ]])" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "similarity" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1. , 0.08346223, 0.0860309 , ..., 0.04499213, 0. ,\n", + " 0. ])" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "similarity[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(0, 1.0000000000000002),\n", + " (1, 0.08346223261119858),\n", + " (2, 0.08603090020146065),\n", + " (3, 0.0734718358370645),\n", + " (4, 0.1892994097121204),\n", + " (5, 0.10838874619051501),\n", + " (6, 0.04024218182927669),\n", + " (7, 0.14673479641335554),\n", + " (8, 0.05923488777590923),\n", + " (9, 0.0967301666813349),\n", + " (10, 0.10259783520851541),\n", + " (11, 0.09464970485606021),\n", + " (12, 0.09037128496931669),\n", + " (13, 0.04499212706658476),\n", + " (14, 0.12824729401064427),\n", + " (15, 0.06282808624375433),\n", + " (16, 0.07894736842105264),\n", + " (17, 0.13977653617040256),\n", + " (18, 0.09493290614465533),\n", + " (19, 0.0830812984794528),\n", + " (20, 0.058038100008800934),\n", + " (21, 0.10968169942141635),\n", + " (22, 0.0662266178532522),\n", + " (23, 0.08740748201220976),\n", + " (24, 0.0533380747062665),\n", + " (25, 0.05101627678885769),\n", + " (26, 0.15389675281277312),\n", + " (27, 0.18693292157876878),\n", + " (28, 0.116543309349613),\n", + " (29, 0.065033247714309),\n", + " (30, 0.06684847767323797),\n", + " (31, 0.15907119074394446),\n", + " (32, 0.08520286456846099),\n", + " (33, 0.09733285267845754),\n", + " (34, 0.0),\n", + " (35, 0.09933992677987831),\n", + " (36, 0.17316974359835272),\n", + " (37, 0.07894736842105264),\n", + " (38, 0.08111071056538127),\n", + " (39, 0.08226127456606226),\n", + " (40, 0.07694837640638656),\n", + " (41, 0.16563698349810535),\n", + " (42, 0.0),\n", + " (43, 0.09086217008485092),\n", + " (44, 0.03382550457458692),\n", + " (45, 0.08240856434303293),\n", + " (46, 0.1391037210186643),\n", + " (47, 0.19672236884115843),\n", + " (48, 0.08447772061910234),\n", + " (49, 0.0582716546748065),\n", + " (50, 0.11295649894498103),\n", + " (51, 0.08048436365855338),\n", + " (52, 0.14843120879858804),\n", + " (53, 0.042973504259354006),\n", + " (54, 0.031219527052723135),\n", + " (55, 0.07038153430777869),\n", + " (56, 0.13834289277321493),\n", + " (57, 0.024182541670333724),\n", + " (58, 0.06748819059987714),\n", + " (59, 0.07254762501100116),\n", + " (60, 0.018367958959266125),\n", + " (61, 0.23179316248638276),\n", + " (62, 0.0837707816583391),\n", + " (63, 0.08175191193132876),\n", + " (64, 0.029617443887954616),\n", + " (65, 0.019529164171612674),\n", + " (66, 0.02249606353329238),\n", + " (67, 0.16452254913212452),\n", + " (68, 0.1204950466257556),\n", + " (69, 0.02649064714130088),\n", + " (70, 0.076004188790721),\n", + " (71, 0.07336739820667779),\n", + " (72, 0.13710212427677043),\n", + " (73, 0.0331133089266261),\n", + " (74, 0.2024645717996314),\n", + " (75, 0.10390486669322622),\n", + " (76, 0.07175473098524099),\n", + " (77, 0.014394823400711486),\n", + " (78, 0.050669459193814),\n", + " (79, 0.0623109738595896),\n", + " (80, 0.050464733268936676),\n", + " (81, 0.11020775375559676),\n", + " (82, 0.076004188790721),\n", + " (83, 0.17996850826633903),\n", + " (84, 0.12361284651454937),\n", + " (85, 0.1141270638770543),\n", + " (86, 0.07098727864204515),\n", + " (87, 0.11128297681493143),\n", + " (88, 0.09037128496931669),\n", + " (89, 0.05757929360284594),\n", + " (90, 0.06131393394849658),\n", + " (91, 0.16692446522239718),\n", + " (92, 0.0623109738595896),\n", + " (93, 0.07098727864204515),\n", + " (94, 0.1873171623163388),\n", + " (95, 0.14048787173725413),\n", + " (96, 0.07694837640638656),\n", + " (97, 0.06952346619889824),\n", + " (98, 0.07509392614826384),\n", + " (99, 0.02666903735313325),\n", + " (100, 0.06765100914917384),\n", + " (101, 0.07694837640638656),\n", + " (102, 0.12462194771917919),\n", + " (103, 0.07987230638308718),\n", + " (104, 0.05564148840746572),\n", + " (105, 0.1037571695799112),\n", + " (106, 0.0623109738595896),\n", + " (107, 0.061806423257274694),\n", + " (108, 0.14509525002200235),\n", + " (109, 0.06488856845230502),\n", + " (110, 0.04499212706658476),\n", + " (111, 0.15018785229652767),\n", + " (112, 0.07694837640638656),\n", + " (113, 0.043355498476206004),\n", + " (114, 0.04588314677411236),\n", + " (115, 0.043747863925980714),\n", + " (116, 0.04836508334066745),\n", + " (117, 0.05923488777590923),\n", + " (118, 0.03364315551262445),\n", + " (119, 0.043355498476206004),\n", + " (120, 0.0),\n", + " (121, 0.11128297681493143),\n", + " (122, 0.116543309349613),\n", + " (123, 0.1124803176664619),\n", + " (124, 0.06748819059987714),\n", + " (125, 0.07843304784240032),\n", + " (126, 0.13334518676566626),\n", + " (127, 0.14601068654819693),\n", + " (128, 0.0),\n", + " (129, 0.09269795493186431),\n", + " (130, 0.027036903521793755),\n", + " (131, 0.10526315789473685),\n", + " (132, 0.04499212706658476),\n", + " (133, 0.028239124736245257),\n", + " (134, 0.12977713690461004),\n", + " (135, 0.0),\n", + " (136, 0.023414645289542353),\n", + " (137, 0.0301237616564389),\n", + " (138, 0.12824729401064427),\n", + " (139, 0.12072654548783007),\n", + " (140, 0.04499212706658476),\n", + " (141, 0.08740748201220976),\n", + " (142, 0.08471737420873576),\n", + " (143, 0.08671099695241201),\n", + " (144, 0.05006261743217589),\n", + " (145, 0.019252140716412975),\n", + " (146, 0.016303866268150616),\n", + " (147, 0.05564148840746572),\n", + " (148, 0.09733285267845754),\n", + " (149, 0.15511334686589626),\n", + " (150, 0.16426202236672155),\n", + " (151, 0.2029530274475215),\n", + " (152, 0.08346223261119858),\n", + " (153, 0.07868894753646337),\n", + " (154, 0.06131393394849658),\n", + " (155, 0.02043797798283219),\n", + " (156, 0.05990422978731538),\n", + " (157, 0.09933992677987831),\n", + " (158, 0.13369695534647594),\n", + " (159, 0.06362847629757779),\n", + " (160, 0.11712303424679457),\n", + " (161, 0.03181423814878889),\n", + " (162, 0.0331133089266261),\n", + " (163, 0.050669459193814),\n", + " (164, 0.08000711205939975),\n", + " (165, 0.04588314677411236),\n", + " (166, 0.12515654358043973),\n", + " (167, 0.09037128496931669),\n", + " (168, 0.17521916101261562),\n", + " (169, 0.08830215713766959),\n", + " (170, 0.10650358071057624),\n", + " (171, 0.02271554252121273),\n", + " (172, 0.21239769762143662),\n", + " (173, 0.023174488732966077),\n", + " (174, 0.07792865001991967),\n", + " (175, 0.07098727864204515),\n", + " (176, 0.02913582733740325),\n", + " (177, 0.0),\n", + " (178, 0.024455799402225926),\n", + " (179, 0.07254762501100116),\n", + " (180, 0.022075539284417398),\n", + " (181, 0.04499212706658476),\n", + " (182, 0.11707322644771176),\n", + " (183, 0.07038153430777869),\n", + " (184, 0.177343107178349),\n", + " (185, 0.050669459193814),\n", + " (186, 0.05901671065234752),\n", + " (187, 0.15389675281277312),\n", + " (188, 0.022075539284417398),\n", + " (189, 0.1147078669352809),\n", + " (190, 0.08447772061910234),\n", + " (191, 0.04891159880445185),\n", + " (192, 0.047324852428030105),\n", + " (193, 0.07878168645790681),\n", + " (194, 0.0),\n", + " (195, 0.05923488777590923),\n", + " (196, 0.05195243334661311),\n", + " (197, 0.036973693332326786),\n", + " (198, 0.057353933467640436),\n", + " (199, 0.08671099695241201),\n", + " (200, 0.10882143751650175),\n", + " (201, 0.0),\n", + " (202, 0.031219527052723135),\n", + " (203, 0.08346223261119858),\n", + " (204, 0.04947706959952935),\n", + " (205, 0.07443229275647868),\n", + " (206, 0.11092107999698035),\n", + " (207, 0.07694837640638656),\n", + " (208, 0.12262786789699316),\n", + " (209, 0.06765100914917384),\n", + " (210, 0.07509392614826384),\n", + " (211, 0.024455799402225926),\n", + " (212, 0.06622661785325219),\n", + " (213, 0.05901671065234752),\n", + " (214, 0.0),\n", + " (215, 0.06622661785325219),\n", + " (216, 0.06488856845230502),\n", + " (217, 0.08520286456846099),\n", + " (218, 0.03823595564509363),\n", + " (219, 0.09197090092274487),\n", + " (220, 0.1777046633277277),\n", + " (221, 0.047324852428030105),\n", + " (222, 0.07987230638308718),\n", + " (223, 0.10390486669322622),\n", + " (224, 0.060833032924035954),\n", + " (225, 0.046829290579084706),\n", + " (226, 0.0301237616564389),\n", + " (227, 0.08226127456606226),\n", + " (228, 0.11037769642208699),\n", + " (229, 0.1074861545850028),\n", + " (230, 0.09183979479633063),\n", + " (231, 0.0),\n", + " (232, 0.11128297681493143),\n", + " (233, 0.1037571695799112),\n", + " (234, 0.08594700851870801),\n", + " (235, 0.076004188790721),\n", + " (236, 0.11470786693528087),\n", + " (237, 0.0662266178532522),\n", + " (238, 0.11092107999698035),\n", + " (239, 0.09125667909262308),\n", + " (240, 0.046829290579084706),\n", + " (241, 0.07894736842105264),\n", + " (242, 0.08111071056538127),\n", + " (243, 0.09269795493186431),\n", + " (244, 0.09197090092274487),\n", + " (245, 0.08111071056538127),\n", + " (246, 0.07594632491572426),\n", + " (247, 0.0),\n", + " (248, 0.04456565178215865),\n", + " (249, 0.13834289277321493),\n", + " (250, 0.0),\n", + " (251, 0.04188539082916955),\n", + " (252, 0.020277677641345318),\n", + " (253, 0.06282808624375433),\n", + " (254, 0.06952346619889824),\n", + " (255, 0.0),\n", + " (256, 0.11164843913471803),\n", + " (257, 0.0533380747062665),\n", + " (258, 0.01769980813511972),\n", + " (259, 0.06131393394849658),\n", + " (260, 0.20395079136182276),\n", + " (261, 0.024182541670333724),\n", + " (262, 0.10650358071057624),\n", + " (263, 0.09733285267845754),\n", + " (264, 0.0),\n", + " (265, 0.02742042485535409),\n", + " (266, 0.0989541391990587),\n", + " (267, 0.06488856845230502),\n", + " (268, 0.052631578947368425),\n", + " (269, 0.029617443887954616),\n", + " (270, 0.08830215713766959),\n", + " (271, 0.07098727864204515),\n", + " (272, 0.02782074420373286),\n", + " (273, 0.021119430154775586),\n", + " (274, 0.08977310580745099),\n", + " (275, 0.1433848336691011),\n", + " (276, 0.06243905410544627),\n", + " (277, 0.053099424405359155),\n", + " (278, 0.11707322644771176),\n", + " (279, 0.09269795493186431),\n", + " (280, 0.0),\n", + " (281, 0.065033247714309),\n", + " (282, 0.04456565178215865),\n", + " (283, 0.02742042485535409),\n", + " (284, 0.018367958959266125),\n", + " (285, 0.019529164171612674),\n", + " (286, 0.0781166566864507),\n", + " (287, 0.025976216673306556),\n", + " (288, 0.02742042485535409),\n", + " (289, 0.04588314677411236),\n", + " (290, 0.06362847629757779),\n", + " (291, 0.05006261743217589),\n", + " (292, 0.08885233166386385),\n", + " (293, 0.029617443887954616),\n", + " (294, 0.1433848336691011),\n", + " (295, 0.10526315789473685),\n", + " (296, 0.038778336716474064),\n", + " (297, 0.044151078568834795),\n", + " (298, 0.0),\n", + " (299, 0.05945550264670635),\n", + " (300, 0.1711905958155815),\n", + " (301, 0.09365858115816939),\n", + " (302, 0.06952346619889824),\n", + " (303, 0.02782074420373286),\n", + " (304, 0.06578947368421054),\n", + " (305, 0.19007487139298027),\n", + " (306, 0.08111071056538127),\n", + " (307, 0.10876595882948199),\n", + " (308, 0.06765100914917384),\n", + " (309, 0.06243905410544627),\n", + " (310, 0.03065696697424829),\n", + " (311, 0.15018785229652765),\n", + " (312, 0.10012523486435178),\n", + " (313, 0.05484084971070818),\n", + " (314, 0.0),\n", + " (315, 0.08346223261119858),\n", + " (316, 0.0),\n", + " (317, 0.0),\n", + " (318, 0.0),\n", + " (319, 0.09933992677987831),\n", + " (320, 0.03065696697424829),\n", + " (321, 0.0),\n", + " (322, 0.2105263157894737),\n", + " (323, 0.029617443887954616),\n", + " (324, 0.024455799402225926),\n", + " (325, 0.09933992677987831),\n", + " (326, 0.05647824947249051),\n", + " (327, 0.027036903521793755),\n", + " (328, 0.02742042485535409),\n", + " (329, 0.07894736842105264),\n", + " (330, 0.10012523486435178),\n", + " (331, 0.08998425413316952),\n", + " (332, 0.106676149412533),\n", + " (333, 0.024455799402225926),\n", + " (334, 0.0967301666813349),\n", + " (335, 0.046348977465932154),\n", + " (336, 0.06917144638660747),\n", + " (337, 0.04543108504242546),\n", + " (338, 0.0),\n", + " (339, 0.04891159880445185),\n", + " (340, 0.04947706959952935),\n", + " (341, 0.08226127456606226),\n", + " (342, 0.16122923187750418),\n", + " (343, 0.019252140716412975),\n", + " (344, 0.029617443887954616),\n", + " (345, 0.02294157338705618),\n", + " (346, 0.0),\n", + " (347, 0.0),\n", + " (348, 0.06131393394849658),\n", + " (349, 0.076004188790721),\n", + " (350, 0.04891159880445185),\n", + " (351, 0.0),\n", + " (352, 0.02742042485535409),\n", + " (353, 0.04120428217151646),\n", + " (354, 0.021677749238103002),\n", + " (355, 0.03650267163704923),\n", + " (356, 0.09037128496931669),\n", + " (357, 0.0),\n", + " (358, 0.0602475233128778),\n", + " (359, 0.031219527052723135),\n", + " (360, 0.020942695414584774),\n", + " (361, 0.07509392614826384),\n", + " (362, 0.09544271444636668),\n", + " (363, 0.09909250441117726),\n", + " (364, 0.023414645289542353),\n", + " (365, 0.07509392614826384),\n", + " (366, 0.05647824947249051),\n", + " (367, 0.0),\n", + " (368, 0.12977713690461004),\n", + " (369, 0.12824729401064427),\n", + " (370, 0.050669459193814),\n", + " (371, 0.09567297464698798),\n", + " (372, 0.047324852428030105),\n", + " (373, 0.17292861596651865),\n", + " (374, 0.03797316245786213),\n", + " (375, 0.061313933948496574),\n", + " (376, 0.046829290579084706),\n", + " (377, 0.0),\n", + " (378, 0.0),\n", + " (379, 0.05073825686188038),\n", + " (380, 0.05195243334661311),\n", + " (381, 0.0837707816583391),\n", + " (382, 0.0),\n", + " (383, 0.043355498476206004),\n", + " (384, 0.028676966733820218),\n", + " (385, 0.0),\n", + " (386, 0.021873931962990357),\n", + " (387, 0.023662426214015053),\n", + " (388, 0.0),\n", + " (389, 0.08740748201220976),\n", + " (390, 0.026315789473684213),\n", + " (391, 0.056573571480353566),\n", + " (392, 0.0301237616564389),\n", + " (393, 0.0),\n", + " (394, 0.024738534799764674),\n", + " (395, 0.0533380747062665),\n", + " (396, 0.043747863925980714),\n", + " (397, 0.05195243334661311),\n", + " (398, 0.0),\n", + " (399, 0.023662426214015053),\n", + " (400, 0.09037128496931669),\n", + " (401, 0.031219527052723135),\n", + " (402, 0.07792865001991967),\n", + " (403, 0.0830812984794528),\n", + " (404, 0.06131393394849658),\n", + " (405, 0.05564148840746572),\n", + " (406, 0.0207703246198632),\n", + " (407, 0.021300716142115247),\n", + " (408, 0.03244428422615251),\n", + " (409, 0.0),\n", + " (410, 0.0978231976089037),\n", + " (411, 0.052631578947368425),\n", + " (412, 0.09567297464698798),\n", + " (413, 0.1411956236812263),\n", + " (414, 0.029617443887954616),\n", + " (415, 0.053418342734682205),\n", + " (416, 0.050669459193814),\n", + " (417, 0.03244428422615251),\n", + " (418, 0.03539961627023944),\n", + " (419, 0.06882472016116853),\n", + " (420, 0.1124803176664619),\n", + " (421, 0.023174488732966077),\n", + " (422, 0.116543309349613),\n", + " (423, 0.04120428217151646),\n", + " (424, 0.03721614637823934),\n", + " (425, 0.06390214842634574),\n", + " (426, 0.05374307729250139),\n", + " (427, 0.026315789473684213),\n", + " (428, 0.055824219567359015),\n", + " (429, 0.0),\n", + " (430, 0.050669459193814),\n", + " (431, 0.024182541670333724),\n", + " (432, 0.11357771260606365),\n", + " (433, 0.031219527052723135),\n", + " (434, 0.0),\n", + " (435, 0.03539961627023944),\n", + " (436, 0.0),\n", + " (437, 0.025649458802128853),\n", + " (438, 0.12227899701112963),\n", + " (439, 0.026315789473684213),\n", + " (440, 0.024738534799764674),\n", + " (441, 0.057353933467640436),\n", + " (442, 0.04543108504242546),\n", + " (443, 0.06390214842634574),\n", + " (444, 0.0),\n", + " (445, 0.0989541391990587),\n", + " (446, 0.023174488732966077),\n", + " (447, 0.06446025638903101),\n", + " (448, 0.021300716142115247),\n", + " (449, 0.10968169942141635),\n", + " (450, 0.021300716142115247),\n", + " (451, 0.025031308716087945),\n", + " (452, 0.03065696697424829),\n", + " (453, 0.024455799402225926),\n", + " (454, 0.11707322644771176),\n", + " (455, 0.060833032924035954),\n", + " (456, 0.043355498476206004),\n", + " (457, 0.08603090020146065),\n", + " (458, 0.043355498476206004),\n", + " (459, 0.03458572319330373),\n", + " (460, 0.024182541670333724),\n", + " (461, 0.1411956236812263),\n", + " (462, 0.032283269414765066),\n", + " (463, 0.03244428422615251),\n", + " (464, 0.0),\n", + " (465, 0.0),\n", + " (466, 0.14509525002200233),\n", + " (467, 0.10559715077387793),\n", + " (468, 0.02782074420373286),\n", + " (469, 0.05945550264670635),\n", + " (470, 0.04543108504242546),\n", + " (471, 0.06390214842634574),\n", + " (472, 0.106676149412533),\n", + " (473, 0.09269795493186431),\n", + " (474, 0.0),\n", + " (475, 0.043747863925980714),\n", + " (476, 0.13530201829834768),\n", + " (477, 0.0),\n", + " (478, 0.05407380704358751),\n", + " (479, 0.08797691788472337),\n", + " (480, 0.0978231976089037),\n", + " (481, 0.0),\n", + " (482, 0.021873931962990357),\n", + " (483, 0.12487810821089254),\n", + " (484, 0.02913582733740325),\n", + " (485, 0.052631578947368425),\n", + " (486, 0.07992075789278004),\n", + " (487, 0.12725695259515557),\n", + " (488, 0.06657426652986062),\n", + " (489, 0.0),\n", + " (490, 0.14159846508095775),\n", + " (491, 0.09909250441117726),\n", + " (492, 0.076004188790721),\n", + " (493, 0.03065696697424829),\n", + " (494, 0.0),\n", + " (495, 0.19088542889273336),\n", + " (496, 0.0),\n", + " (497, 0.0),\n", + " (498, 0.06765100914917384),\n", + " (499, 0.0),\n", + " (500, 0.025976216673306556),\n", + " (501, 0.08175191193132876),\n", + " (502, 0.13369695534647594),\n", + " (503, 0.07509392614826384),\n", + " (504, 0.0),\n", + " (505, 0.13530201829834768),\n", + " (506, 0.03458572319330373),\n", + " (507, 0.255608593705383),\n", + " (508, 0.101338918387628),\n", + " (509, 0.022075539284417398),\n", + " (510, 0.0989541391990587),\n", + " (511, 0.09197090092274487),\n", + " (512, 0.02742042485535409),\n", + " (513, 0.14843120879858804),\n", + " (514, 0.04947706959952935),\n", + " (515, 0.024738534799764674),\n", + " (516, 0.028239124736245257),\n", + " (517, 0.0),\n", + " (518, 0.043747863925980714),\n", + " (519, 0.0),\n", + " (520, 0.0),\n", + " (521, 0.025031308716087945),\n", + " (522, 0.024182541670333724),\n", + " (523, 0.04836508334066745),\n", + " (524, 0.0),\n", + " (525, 0.047324852428030105),\n", + " (526, 0.028239124736245257),\n", + " (527, 0.0331133089266261),\n", + " (528, 0.022282825891079324),\n", + " (529, 0.16742770563222897),\n", + " (530, 0.05564148840746572),\n", + " (531, 0.046348977465932154),\n", + " (532, 0.021300716142115247),\n", + " (533, 0.0602475233128778),\n", + " (534, 0.0837707816583391),\n", + " (535, 0.0978231976089037),\n", + " (536, 0.1124803176664619),\n", + " (537, 0.06765100914917384),\n", + " (538, 0.0),\n", + " (539, 0.25038669783359574),\n", + " (540, 0.07509392614826384),\n", + " (541, 0.10559715077387793),\n", + " (542, 0.036973693332326786),\n", + " (543, 0.07098727864204515),\n", + " (544, 0.05564148840746572),\n", + " (545, 0.024738534799764674),\n", + " (546, 0.06362847629757779),\n", + " (547, 0.08346223261119858),\n", + " (548, 0.0),\n", + " (549, 0.11959121830873498),\n", + " (550, 0.024738534799764674),\n", + " (551, 0.03771571432023571),\n", + " (552, 0.0),\n", + " (553, 0.028676966733820218),\n", + " (554, 0.0),\n", + " (555, 0.04499212706658475),\n", + " (556, 0.02666903735313325),\n", + " (557, 0.12227899701112963),\n", + " (558, 0.0),\n", + " (559, 0.0301237616564389),\n", + " (560, 0.03604920469572501),\n", + " (561, 0.03181423814878889),\n", + " (562, 0.03065696697424829),\n", + " (563, 0.025649458802128853),\n", + " (564, 0.08885233166386385),\n", + " (565, 0.043355498476206004),\n", + " (566, 0.023918243661746996),\n", + " (567, 0.025031308716087945),\n", + " (568, 0.1411956236812263),\n", + " (569, 0.047324852428030105),\n", + " (570, 0.06765100914917384),\n", + " (571, 0.04223886030955117),\n", + " (572, 0.10743376064838502),\n", + " (573, 0.0207703246198632),\n", + " (574, 0.023414645289542353),\n", + " (575, 0.04543108504242546),\n", + " (576, 0.05195243334661311),\n", + " (577, 0.1976738731537168),\n", + " (578, 0.037463432463267755),\n", + " (579, 0.14567913668701626),\n", + " (580, 0.024182541670333724),\n", + " (581, 0.14567913668701626),\n", + " (582, 0.24511108480187255),\n", + " (583, 0.03771571432023571),\n", + " (584, 0.08111071056538127),\n", + " (585, 0.024455799402225926),\n", + " (586, 0.04499212706658476),\n", + " (587, 0.09464970485606021),\n", + " (588, 0.025649458802128853),\n", + " (589, 0.1163350101494222),\n", + " (590, 0.0301237616564389),\n", + " (591, 0.06282808624375433),\n", + " (592, 0.021119430154775586),\n", + " (593, 0.0),\n", + " (594, 0.04947706959952935),\n", + " (595, 0.16059101370939324),\n", + " (596, 0.09269795493186431),\n", + " (597, 0.05923488777590923),\n", + " (598, 0.03627381250550058),\n", + " (599, 0.02742042485535409),\n", + " (600, 0.06882472016116853),\n", + " (601, 0.033463724070512735),\n", + " (602, 0.09086217008485092),\n", + " (603, 0.05407380704358751),\n", + " (604, 0.0662266178532522),\n", + " (605, 0.025031308716087945),\n", + " (606, 0.0),\n", + " (607, 0.05695974368679319),\n", + " (608, 0.0),\n", + " (609, 0.046829290579084706),\n", + " (610, 0.058038100008800934),\n", + " (611, 0.023174488732966077),\n", + " (612, 0.06335829046432676),\n", + " (613, 0.03539961627023944),\n", + " (614, 0.04891159880445185),\n", + " (615, 0.06131393394849658),\n", + " (616, 0.03458572319330373),\n", + " (617, 0.05006261743217589),\n", + " (618, 0.08830215713766959),\n", + " (619, 0.0),\n", + " (620, 0.0),\n", + " (621, 0.06488856845230502),\n", + " (622, 0.021119430154775586),\n", + " (623, 0.0),\n", + " (624, 0.1037571695799112),\n", + " (625, 0.031219527052723135),\n", + " (626, 0.08226127456606226),\n", + " (627, 0.10882143751650175),\n", + " (628, 0.0),\n", + " (629, 0.04836508334066745),\n", + " (630, 0.024455799402225926),\n", + " (631, 0.050669459193814),\n", + " (632, 0.07694837640638656),\n", + " (633, 0.022282825891079324),\n", + " (634, 0.07421560439929402),\n", + " (635, 0.06622661785325219),\n", + " (636, 0.023414645289542353),\n", + " (637, 0.024182541670333724),\n", + " (638, 0.03382550457458692),\n", + " (639, 0.0),\n", + " (640, 0.05990422978731538),\n", + " (641, 0.0),\n", + " (642, 0.0),\n", + " (643, 0.10012523486435178),\n", + " (644, 0.05006261743217589),\n", + " (645, 0.044151078568834795),\n", + " (646, 0.052631578947368425),\n", + " (647, 0.04836508334066745),\n", + " (648, 0.0301237616564389),\n", + " (649, 0.0),\n", + " (650, 0.05195243334661311),\n", + " (651, 0.0),\n", + " (652, 0.042973504259354006),\n", + " (653, 0.10882143751650175),\n", + " (654, 0.10385162309931599),\n", + " (655, 0.023918243661746996),\n", + " (656, 0.0),\n", + " (657, 0.03458572319330373),\n", + " (658, 0.04836508334066745),\n", + " (659, 0.025334729596907),\n", + " (660, 0.09365858115816941),\n", + " (661, 0.1675415633166782),\n", + " (662, 0.0),\n", + " (663, 0.0),\n", + " (664, 0.0602475233128778),\n", + " (665, 0.06814662756363819),\n", + " (666, 0.05006261743217589),\n", + " (667, 0.0),\n", + " (668, 0.13530201829834768),\n", + " (669, 0.03539961627023944),\n", + " (670, 0.01749278571353299),\n", + " (671, 0.06814662756363819),\n", + " (672, 0.028239124736245257),\n", + " (673, 0.019968076595771794),\n", + " (674, 0.07098727864204515),\n", + " (675, 0.04499212706658476),\n", + " (676, 0.028676966733820218),\n", + " (677, 0.025031308716087945),\n", + " (678, 0.116543309349613),\n", + " (679, 0.0),\n", + " (680, 0.07509392614826384),\n", + " (681, 0.02742042485535409),\n", + " (682, 0.019968076595771794),\n", + " (683, 0.04836508334066745),\n", + " (684, 0.0),\n", + " (685, 0.03244428422615251),\n", + " (686, 0.03181423814878889),\n", + " (687, 0.10390486669322622),\n", + " (688, 0.043355498476206004),\n", + " (689, 0.0),\n", + " (690, 0.025031308716087945),\n", + " (691, 0.05923488777590923),\n", + " (692, 0.10936965981495178),\n", + " (693, 0.03382550457458692),\n", + " (694, 0.021873931962990357),\n", + " (695, 0.0662266178532522),\n", + " (696, 0.021486752129677003),\n", + " (697, 0.0),\n", + " (698, 0.043355498476206004),\n", + " (699, 0.03823595564509363),\n", + " (700, 0.028676966733820218),\n", + " (701, 0.057353933467640436),\n", + " (702, 0.02249606353329238),\n", + " (703, 0.021486752129677003),\n", + " (704, 0.06814662756363819),\n", + " (705, 0.024455799402225926),\n", + " (706, 0.021677749238103002),\n", + " (707, 0.0),\n", + " (708, 0.024738534799764674),\n", + " (709, 0.02060214108575823),\n", + " (710, 0.05775642214923893),\n", + " (711, 0.052631578947368425),\n", + " (712, 0.0),\n", + " (713, 0.024738534799764674),\n", + " (714, 0.0),\n", + " (715, 0.09933992677987831),\n", + " (716, 0.027036903521793755),\n", + " (717, 0.0),\n", + " (718, 0.04499212706658476),\n", + " (719, 0.0),\n", + " (720, 0.026315789473684213),\n", + " (721, 0.03244428422615251),\n", + " (722, 0.0),\n", + " (723, 0.024455799402225926),\n", + " (724, 0.023662426214015053),\n", + " (725, 0.0),\n", + " (726, 0.0),\n", + " (727, 0.030794751789719334),\n", + " (728, 0.07254762501100118),\n", + " (729, 0.04891159880445185),\n", + " (730, 0.030254451040802585),\n", + " (731, 0.05195243334661311),\n", + " (732, 0.06814662756363819),\n", + " (733, 0.061313933948496574),\n", + " (734, 0.06684847767323797),\n", + " (735, 0.06684847767323797),\n", + " (736, 0.029617443887954616),\n", + " (737, 0.07694837640638656),\n", + " (738, 0.0),\n", + " (739, 0.03823595564509363),\n", + " (740, 0.10838874619051501),\n", + " (741, 0.05564148840746572),\n", + " (742, 0.0301237616564389),\n", + " (743, 0.023174488732966077),\n", + " (744, 0.06488856845230502),\n", + " (745, 0.08740748201220976),\n", + " (746, 0.06446025638903101),\n", + " (747, 0.039344473768231684),\n", + " (748, 0.03627381250550058),\n", + " (749, 0.0),\n", + " (750, 0.042601432284230495),\n", + " (751, 0.03181423814878889),\n", + " (752, 0.08749572785196143),\n", + " (753, 0.02742042485535409),\n", + " (754, 0.12515654358043973),\n", + " (755, 0.05695974368679319),\n", + " (756, 0.028676966733820218),\n", + " (757, 0.024182541670333724),\n", + " (758, 0.02271554252121273),\n", + " (759, 0.0),\n", + " (760, 0.0),\n", + " (761, 0.028676966733820218),\n", + " (762, 0.05923488777590923),\n", + " (763, 0.0),\n", + " (764, 0.02782074420373286),\n", + " (765, 0.025031308716087945),\n", + " (766, 0.025334729596907),\n", + " (767, 0.046829290579084706),\n", + " (768, 0.061313933948496574),\n", + " (769, 0.031219527052723135),\n", + " (770, 0.042973504259354006),\n", + " (771, 0.0),\n", + " (772, 0.0),\n", + " (773, 0.12725695259515557),\n", + " (774, 0.039344473768231684),\n", + " (775, 0.08603090020146065),\n", + " (776, 0.07868894753646337),\n", + " (777, 0.13369695534647594),\n", + " (778, 0.23174488732966073),\n", + " (779, 0.0),\n", + " (780, 0.0),\n", + " (781, 0.1204950466257556),\n", + " (782, 0.06952346619889824),\n", + " (783, 0.03244428422615251),\n", + " (784, 0.07175473098524099),\n", + " (785, 0.09243423333081695),\n", + " (786, 0.10814761408717502),\n", + " (787, 0.08226127456606226),\n", + " (788, 0.046829290579084706),\n", + " (789, 0.0),\n", + " (790, 0.07209840939145001),\n", + " (791, 0.09544271444636668),\n", + " (792, 0.051298917604257706),\n", + " (793, 0.050669459193814),\n", + " (794, 0.08740748201220976),\n", + " (795, 0.021873931962990357),\n", + " (796, 0.0),\n", + " (797, 0.03627381250550058),\n", + " (798, 0.0),\n", + " (799, 0.09183979479633063),\n", + " (800, 0.07443229275647868),\n", + " (801, 0.0),\n", + " (802, 0.0),\n", + " (803, 0.047324852428030105),\n", + " (804, 0.051298917604257706),\n", + " (805, 0.05407380704358751),\n", + " (806, 0.04499212706658476),\n", + " (807, 0.0533380747062665),\n", + " (808, 0.11164843913471803),\n", + " (809, 0.019389168358237032),\n", + " (810, 0.028239124736245257),\n", + " (811, 0.020942695414584774),\n", + " (812, 0.08111071056538127),\n", + " (813, 0.09269795493186431),\n", + " (814, 0.10390486669322622),\n", + " (815, 0.020277677641345318),\n", + " (816, 0.04783648732349399),\n", + " (817, 0.027036903521793755),\n", + " (818, 0.024455799402225926),\n", + " (819, 0.0),\n", + " (820, 0.01583118967153259),\n", + " (821, 0.015907119074394446),\n", + " (822, 0.020942695414584774),\n", + " (823, 0.04783648732349399),\n", + " (824, 0.0),\n", + " (825, 0.0),\n", + " (826, 0.0301237616564389),\n", + " (827, 0.08671099695241201),\n", + " (828, 0.025649458802128853),\n", + " (829, 0.0),\n", + " (830, 0.027036903521793755),\n", + " (831, 0.047324852428030105),\n", + " (832, 0.025976216673306556),\n", + " (833, 0.07254762501100118),\n", + " (834, 0.046829290579084706),\n", + " (835, 0.025334729596907),\n", + " (836, 0.061806423257274694),\n", + " (837, 0.05564148840746572),\n", + " (838, 0.1540171257313038),\n", + " (839, 0.0),\n", + " (840, 0.05647824947249051),\n", + " (841, 0.023414645289542353),\n", + " (842, 0.0),\n", + " (843, 0.13710212427677043),\n", + " (844, 0.05195243334661311),\n", + " (845, 0.0),\n", + " (846, 0.019389168358237032),\n", + " (847, 0.04757860680698066),\n", + " (848, 0.036973693332326786),\n", + " (849, 0.0),\n", + " (850, 0.07647191129018727),\n", + " (851, 0.025334729596907),\n", + " (852, 0.021300716142115247),\n", + " (853, 0.02913582733740325),\n", + " (854, 0.08000711205939975),\n", + " (855, 0.04891159880445185),\n", + " (856, 0.023918243661746996),\n", + " (857, 0.0),\n", + " (858, 0.03627381250550058),\n", + " (859, 0.13710212427677043),\n", + " (860, 0.0518785847899556),\n", + " (861, 0.02043797798283219),\n", + " (862, 0.052631578947368425),\n", + " (863, 0.06131393394849658),\n", + " (864, 0.03850428143282595),\n", + " (865, 0.0),\n", + " (866, 0.08111071056538127),\n", + " (867, 0.0),\n", + " (868, 0.0207703246198632),\n", + " (869, 0.05006261743217589),\n", + " (870, 0.11959121830873498),\n", + " (871, 0.04947706959952935),\n", + " (872, 0.0),\n", + " (873, 0.04188539082916955),\n", + " (874, 0.025649458802128853),\n", + " (875, 0.050669459193814),\n", + " (876, 0.0),\n", + " (877, 0.0),\n", + " (878, 0.09733285267845754),\n", + " (879, 0.0),\n", + " (880, 0.03181423814878889),\n", + " (881, 0.0),\n", + " (882, 0.02742042485535409),\n", + " (883, 0.025334729596907),\n", + " (884, 0.0),\n", + " (885, 0.07421560439929402),\n", + " (886, 0.023174488732966077),\n", + " (887, 0.03439087249804545),\n", + " (888, 0.047324852428030105),\n", + " (889, 0.11243619908556624),\n", + " (890, 0.0),\n", + " (891, 0.043355498476206004),\n", + " (892, 0.0),\n", + " (893, 0.016731862035256367),\n", + " (894, 0.08471737420873576),\n", + " (895, 0.0),\n", + " (896, 0.0),\n", + " (897, 0.039344473768231684),\n", + " (898, 0.0978231976089037),\n", + " (899, 0.08000711205939975),\n", + " (900, 0.11959121830873498),\n", + " (901, 0.08111071056538127),\n", + " (902, 0.023174488732966077),\n", + " (903, 0.031219527052723135),\n", + " (904, 0.02060214108575823),\n", + " (905, 0.0),\n", + " (906, 0.0),\n", + " (907, 0.023662426214015053),\n", + " (908, 0.07336739820667779),\n", + " (909, 0.0),\n", + " (910, 0.019968076595771794),\n", + " (911, 0.023662426214015053),\n", + " (912, 0.0331133089266261),\n", + " (913, 0.0),\n", + " (914, 0.03721614637823934),\n", + " (915, 0.02666903735313325),\n", + " (916, 0.03458572319330373),\n", + " (917, 0.028676966733820218),\n", + " (918, 0.0),\n", + " (919, 0.021677749238103002),\n", + " (920, 0.09365858115816941),\n", + " (921, 0.0),\n", + " (922, 0.07792865001991967),\n", + " (923, 0.0),\n", + " (924, 0.0),\n", + " (925, 0.020121090914638345),\n", + " (926, 0.0),\n", + " (927, 0.0),\n", + " (928, 0.0),\n", + " (929, 0.05195243334661311),\n", + " (930, 0.04836508334066745),\n", + " (931, 0.1748149640244195),\n", + " (932, 0.07175473098524099),\n", + " (933, 0.07175473098524099),\n", + " (934, 0.029617443887954616),\n", + " (935, 0.13530201829834768),\n", + " (936, 0.0),\n", + " (937, 0.126673647984535),\n", + " (938, 0.026315789473684213),\n", + " (939, 0.1068366854693644),\n", + " (940, 0.0),\n", + " (941, 0.0301237616564389),\n", + " (942, 0.19134594929397597),\n", + " (943, 0.0),\n", + " (944, 0.0),\n", + " (945, 0.06131393394849658),\n", + " (946, 0.024455799402225926),\n", + " (947, 0.0),\n", + " (948, 0.0),\n", + " (949, 0.0),\n", + " (950, 0.03721614637823934),\n", + " (951, 0.06362847629757779),\n", + " (952, 0.025334729596907),\n", + " (953, 0.02271554252121273),\n", + " (954, 0.0),\n", + " (955, 0.026315789473684213),\n", + " (956, 0.05006261743217589),\n", + " (957, 0.06243905410544627),\n", + " (958, 0.024455799402225926),\n", + " (959, 0.025334729596907),\n", + " (960, 0.08740748201220976),\n", + " (961, 0.05006261743217589),\n", + " (962, 0.024182541670333724),\n", + " (963, 0.0623109738595896),\n", + " (964, 0.026315789473684213),\n", + " (965, 0.0),\n", + " (966, 0.0),\n", + " (967, 0.03721614637823934),\n", + " (968, 0.0),\n", + " (969, 0.05775642214923893),\n", + " (970, 0.0301237616564389),\n", + " (971, 0.03244428422615251),\n", + " (972, 0.2108663315950723),\n", + " (973, 0.19767387315371682),\n", + " (974, 0.028676966733820218),\n", + " (975, 0.0),\n", + " (976, 0.0),\n", + " (977, 0.07254762501100118),\n", + " (978, 0.09365858115816941),\n", + " (979, 0.04499212706658476),\n", + " (980, 0.0207703246198632),\n", + " (981, 0.0),\n", + " (982, 0.0582716546748065),\n", + " (983, 0.032283269414765066),\n", + " (984, 0.0),\n", + " (985, 0.05484084971070818),\n", + " (986, 0.10012523486435178),\n", + " (987, 0.05923488777590923),\n", + " (988, 0.0),\n", + " (989, 0.0331133089266261),\n", + " (990, 0.0415406492397264),\n", + " (991, 0.0602475233128778),\n", + " (992, 0.0),\n", + " (993, 0.07443229275647868),\n", + " (994, 0.05564148840746572),\n", + " (995, 0.09086217008485092),\n", + " (996, 0.0602475233128778),\n", + " (997, 0.0),\n", + " (998, 0.0),\n", + " (999, 0.0),\n", + " ...]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(enumerate(similarity[0])) #we hold the indexes of the movies" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(1214, 0.28676966733820225),\n", + " (2405, 0.26901379342448517),\n", + " (3728, 0.2605130246476754),\n", + " (507, 0.255608593705383),\n", + " (539, 0.25038669783359574)]" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#we need to take out the first five most common movies\n", + "sorted(list(enumerate(similarity[0])), reverse=True, key=lambda x:x[1])[1:6] #gives the value of the similarity score in the descending order of the similarity score\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "def recommend(movie):\n", + " movie_index = new_df[new_df['title'] == movie].index[0] #fetches the index of the movie which we want to search\n", + " distances = similarity[movie_index] #gives the similarity values of the movie in that particular index\n", + " movies_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x:x[1])[1:6] #we get the first five common movies\n", + "\n", + " for i in movies_list:\n", + " print(new_df.iloc[i[0]].title) #gives the title of the top 5 recommended movies according to the movie searched" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Aliens vs Predator: Requiem\n", + "Aliens\n", + "Falcon Rising\n", + "Independence Day\n", + "Titan A.E.\n" + ] + } + ], + "source": [ + "recommend('Avatar') #gives the top 5 recommended movies when we search for Avatar" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import accuracy_score" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}