-
Notifications
You must be signed in to change notification settings - Fork 0
/
infoscraper.py
282 lines (231 loc) · 12.3 KB
/
infoscraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# Media Info BBCode Generator for Uploads
# Gathers info from TMDb and Mediainfo, and uploads screenshots to Imgur before generating final BBCode
# Written by dcquence 2024
# Fixes blurriness in the WPF window opened to select the video file
import ctypes
try:
ctypes.windll.shcore.SetProcessDpiAwareness(1) # Set DPI awareness to Per Monitor
except Exception:
pass
import requests
from imgurpython import ImgurClient
import subprocess
import tkinter as tk
from tkinter import filedialog
import os
import re
import sys
template = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'template_mediainfo.txt')
def upload_image_to_imgur(image_path, client_id, client_secret):
client = ImgurClient(client_id, client_secret)
image = client.upload_from_path(image_path, anon=True)
return image['link']
def get_movie_info(tmdb_id, api_key):
# Fetch basic movie info
url = f"https://api.themoviedb.org/3/movie/{tmdb_id}?api_key={api_key}"
response = requests.get(url)
data = response.json()
title = data.get('title', 'Title not available')
plot_summary = data.get('overview', 'Plot summary not available')
# Fetch credits (cast and crew) for the movie
credits_url = f"https://api.themoviedb.org/3/movie/{tmdb_id}/credits?api_key={api_key}"
credits_response = requests.get(credits_url)
credits_data = credits_response.json()
# Extract director, writers, and cast from the credits data
director = next((member['name'] for member in credits_data['crew'] if member['job'] == 'Director'), 'Director information not available')
writers = [member['name'] for member in credits_data['crew'] if member['department'] == 'Writing']
cast = [actor['name'] for actor in credits_data['cast'][:5]] # Get top 5 cast members
# Fetch the poster image path
poster_path = data.get('poster_path')
poster_url = f"https://image.tmdb.org/t/p/w500{poster_path}" if poster_path else None
# Fetch the IMDb ID
imdb_id = data.get('imdb_id')
return title, plot_summary, director, writers, cast, poster_url, imdb_id
def get_tv_series_info(tmdb_id, api_key, season=None, episode=None):
# Fetch basic TV series info
url = f"https://api.themoviedb.org/3/tv/{tmdb_id}?api_key={api_key}"
response = requests.get(url)
data = response.json()
title = data.get('name', 'Title not available')
plot_summary = data.get('overview', 'Plot summary not available')
# Fetch credits (cast and crew) for the TV series
credits_url = f"https://api.themoviedb.org/3/tv/{tmdb_id}/credits?api_key={api_key}"
credits_response = requests.get(credits_url)
credits_data = credits_response.json()
# Extract creators and cast from the credits data
creators = [creator['name'] for creator in data.get('created_by', [])]
cast = [actor['name'] for actor in credits_data['cast'][:5]] # Get top 5 cast members
# Fetch the poster image path
poster_path = data.get('poster_path')
poster_url = f"https://image.tmdb.org/t/p/w500{poster_path}" if poster_path else None
episode_info = None
if season is not None and episode is not None:
# Fetch episode details
episode_url = f"https://api.themoviedb.org/3/tv/{tmdb_id}/season/{season}/episode/{episode}?api_key={api_key}"
episode_response = requests.get(episode_url)
if episode_response.status_code == 200:
episode_data = episode_response.json()
episode_info = {
'title': episode_data.get('name', 'Episode title not available'),
'plot': episode_data.get('overview', 'Episode plot not available')
}
return title, plot_summary, creators, cast, poster_url, episode_info
def format_bbcode(title, plot_summary, creators_or_director, writers, cast, imgur_link, mediainfo_output, screenshot_links, is_movie, episode_info=None):
bbcode = f"[center][img]{imgur_link}[/img]\n\n" if imgur_link else ""
bbcode += f"[b]Title:[/b] {title}\n\n"
if episode_info:
bbcode += f"[b]Episode Title:[/b] {episode_info['title']}\n\n"
bbcode += f"[b]Episode Plot:[/b] {episode_info['plot']}\n\n"
bbcode += f"[icon=plot]\n[b]Plot:[/b] {plot_summary}\n\n"
if is_movie:
bbcode += f"[b]Director:[/b] {creators_or_director}\n\n"
bbcode += f"[b]Writers:[/b] {', '.join(writers) if writers else 'Writers information not available'}\n\n"
else:
bbcode += f"[b]Creators:[/b] {', '.join(creators_or_director) if creators_or_director else 'Creators information not available'}\n\n"
bbcode += f"[icon=cast]\n[b]Cast:[/b] {', '.join(cast) if cast else 'Cast information not available'}\n\n"
bbcode += f"[icon=info][/center]\n\n"
bbcode += f"[code]{mediainfo_output}[/code]\n\n"
# Add screenshot links
for link in screenshot_links:
bbcode += f"[img]{link}[/img]\n"
return bbcode
def select_video_file():
root = tk.Tk()
root.withdraw()
file_path = filedialog.askopenfilename()
return file_path
def read_template_file(file_path):
with open(file_path, 'r') as f:
return f.read()
def create_screenshots(video_path):
# Create screenshots folder
screenshots_folder = os.path.join(os.path.dirname(__file__), 'screenshots')
os.makedirs(screenshots_folder, exist_ok=True)
# Get the duration of the video in seconds
result = subprocess.run(['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', video_path],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
duration = float(result.stdout)
# Start taking screenshots from the 5-minute mark
start_time = 5 * 60 # 5 minutes in seconds
# Create screenshots every 15 minutes, maximum of 4 screenshots
screenshot_interval = 15 * 60 # 15 minutes in seconds
max_screenshots = 4
for i in range(start_time, min(int(duration), start_time + screenshot_interval * max_screenshots), screenshot_interval):
screenshot_path = os.path.join(screenshots_folder, f'screenshot_{i//screenshot_interval + 1}.jpg')
subprocess.run(['ffmpeg', '-ss', str(i), '-i', video_path, '-vframes', '1', '-q:v', '2', screenshot_path],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return screenshots_folder
def upload_screenshots(screenshots_folder, client_id, client_secret):
imgur_links = []
for screenshot in os.listdir(screenshots_folder):
screenshot_path = os.path.join(screenshots_folder, screenshot)
link = upload_image_to_imgur(screenshot_path, client_id, client_secret)
imgur_links.append(link)
os.remove(screenshot_path) # Delete the screenshot file after uploading
return imgur_links
def sanitize_filename(filename):
# Remove or replace invalid characters
sanitized = re.sub(r'[\\/:*?"<>|]', '', filename)
# Replace spaces with underscores
sanitized = "_".join(sanitized.split())
return sanitized
import re
def extract_season_episode(filename):
print(f"Extracting season and episode from filename: {filename}") # Debug print
# Explicitly match 4-digit format first (e.g., 1004 -> season 10, episode 4)
match = re.search(r'(\d{4})', filename)
if match:
episode_str = match.group(1)
if len(episode_str) == 4:
season = int(episode_str[:2]) # First two digits are season
episode = int(episode_str[2:]) # Last two digits are episode
print(f"Matched 4-digit format: Season {season}, Episode {episode}") # Debug print
return season, episode
# If no 4-digit match, proceed to check for other formats
match = re.search(r'(s?(\d{1,2})[x|e](\d{2}))|(\d{3})|(\d{4})', filename, re.IGNORECASE)
if match:
print(f"Regex match: {match.groups()}") # Debug print
# If match is in 's01e01' or '1x01' format
if match.group(2) and match.group(3):
season = int(match.group(2)) # Second group is the season
episode = int(match.group(3)) # Third group is the episode
return season, episode
# If match is in '101' or '102' format (i.e., season 1, episode 1)
elif match.group(4):
episode_str = match.group(4)
if len(episode_str) == 3:
season = int(episode_str[0]) # First digit is season
episode = int(episode_str[1:]) # Last two digits are episode
return season, episode
# If match is in '1004' format (i.e., season 10, episode 4)
elif match.group(5):
episode_str = match.group(5)
if len(episode_str) == 4:
season = int(episode_str[:2]) # First two digits are season
episode = int(episode_str[2:]) # Last two digits are episode
return season, episode
# Return None if no match is found
return None, None
def main():
try:
print("Starting Media Info BBCode Generator...")
media_type = input("Is this for a movie or TV show? (movie/tv): ").strip().lower()
if media_type not in ['movie', 'tv']:
print("Invalid choice. Please enter 'movie' or 'tv'.")
input("Press Enter to exit.")
return
tmdb_id = input("Enter TMDB ID: ")
api_key = "<YOUR_TMDB_API_KEY" # Replace with your actual TMDB API key
imgur_client_id = "<YOUR_IMGUR_CLIENT_ID>" # Replace with your actual Imgur client ID
imgur_client_secret = "<YOUR_IMGUR_CLIENT_SECRET>" # Replace with your actual Imgur client secret
video_path = select_video_file()
if media_type == 'movie':
print("Fetching movie info...")
title, plot_summary, director, writers, cast, poster_url, imdb_id = get_movie_info(tmdb_id, api_key)
creators_or_director = director
episode_info = None
else:
tv_scope = input("Is this for the entire series or a single episode? (series/episode): ").strip().lower()
if tv_scope not in ['series', 'episode']:
print("Invalid choice. Please enter 'series' or 'episode'.")
input("Press Enter to exit.")
return
if tv_scope == 'episode':
season, episode = extract_season_episode(os.path.basename(video_path))
if not season or not episode:
print("Failed to extract season and episode from filename.")
input("Press Enter to exit.")
return
else:
season, episode = None, None
print("Fetching TV series info...")
title, plot_summary, creators, cast, poster_url, episode_info = get_tv_series_info(tmdb_id, api_key, season, episode)
creators_or_director = creators
writers = None
imdb_id = None
if poster_url:
print("Downloading poster...")
image_path = "poster.jpg"
with open(image_path, 'wb') as f:
f.write(requests.get(poster_url).content)
imgur_link = upload_image_to_imgur(image_path, imgur_client_id, imgur_client_secret)
os.remove(image_path) # Delete the poster image file after uploading
else:
imgur_link = None
print("Creating screenshots...")
screenshots_folder = create_screenshots(video_path)
print("Uploading screenshots to Imgur...")
screenshot_links = upload_screenshots(screenshots_folder, imgur_client_id, imgur_client_secret)
print("Fetching MediaInfo...")
mediainfo_output = subprocess.check_output(['mediainfo', '--Inform=file://' + template, video_path], text=True)
print("Formatting BBCode...")
bbcode = format_bbcode(title, plot_summary, creators_or_director, writers, cast, imgur_link, mediainfo_output, screenshot_links, media_type == 'movie', episode_info)
output_filename = sanitize_filename(title) + ".txt"
with open(output_filename, 'w') as f:
f.write(bbcode)
print(f"BBCode saved to {output_filename}")
except Exception as e:
print(f"An error occurred: {e}")
input("Press Enter to exit.")
if __name__ == "__main__":
main()