From c59cbb8cb59705be5626837b7b29acc46071ccfd Mon Sep 17 00:00:00 2001 From: r0oth3x49 Date: Sun, 4 Nov 2018 17:12:41 +0500 Subject: [PATCH] lynda-dl v0.3, added support for cookie based login, updated code quality, fixed #45, fixed #44 --- .gitignore | 1 + README.md | 15 ++++- lynda-dl.py | 133 ++++++++++++++++++++++++------------- lynda/__init__.py | 2 +- lynda/_auth.py | 30 ++++++++- lynda/_colorized/banner.py | 5 +- lynda/_compat.py | 3 + lynda/_extract.py | 31 +++++++-- lynda/_internal.py | 5 +- lynda/_lynda.py | 4 +- lynda/_shared.py | 5 +- 11 files changed, 171 insertions(+), 63 deletions(-) diff --git a/.gitignore b/.gitignore index 9b7302c..79d73ff 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ lynda.json course.json format.json course.txt +cookies.txt pack.sublime-project pack.sublime-workspace # Byte-compiled / optimized / DLL files diff --git a/README.md b/README.md index 4182995..93bd584 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,15 @@ experience the problem? All these details will help to fix any potential bugs as > > Any other information you want to share that is relevant to the issue being reported. +## ***Extracting Cookies*** + + - Login to your lynda account via browser. + - Once you are logged in right click on page the search for option called **Inspect Element** and click on that. + - Under that look for **Network Tab** and click on that. Under that **Network Tab** click on Requests type **XHR** . + - Now Visit the **Course URL** you want to download, You will see some requests under **Network Tab XHR**. + - Right click on any of the Requests which links to **lynda.com**. Simply copy **Request Headers** and save to text file. + - Done run the lynda-dl against that text file it will start downloading the course. + ## ***Requirements*** - Python (2 or 3) @@ -126,8 +135,8 @@ You can download the latest version of lynda-dl by cloning the GitHub repository

 Author: Nasir khan (r0ot h3x49)
 
-usage: lynda-dl.py [-h] [-v] [-u] [-p] [-o] [-d] [-q] [--info] [--sub-only]
-                   [--skip-sub]
+usage: lynda-dl.py [-h] [-v] [-k] [-u] [-p] [-o] [-d] [-q] [--info]
+                   [--sub-only] [--skip-sub]
                    course
 
 A cross-platform python based utility to download courses from lynda for
@@ -141,6 +150,7 @@ General:
   -v, --version         Shows the version.
 
 Authentication:
+  -k , --cookies        Cookies to authenticate with.
   -u , --username       Username or Library Card Number.
   -p , --password       Password or Library Card Pin.
   -o , --organization   Organization, registered at Lynda.
@@ -157,5 +167,6 @@ Others:
 Example:
   python lynda-dl.py  COURSE_URL
   python lynda-dl.py -o organization COURSE_URL
+  python lynda-dl.py -k cookies.txt COURSE_URL
 
 
diff --git a/lynda-dl.py b/lynda-dl.py index 8122dfc..f20be9e 100644 --- a/lynda-dl.py +++ b/lynda-dl.py @@ -18,10 +18,11 @@ class Lynda(ProgressBar): - def __init__(self, url, username='', password='', organization=''): + def __init__(self, url, username='', password='', organization='', cookies=''): self.url = url self.username = username self.password = password + self.cookies = cookies self.organization = organization super(Lynda, self).__init__() @@ -148,11 +149,14 @@ def download_lectures_and_captions(self, lecture_best='', lecture_title='', inne self.download_subtitles(subtitle=subtitle, filepath=filepath) def course_download(self, path='', quality='', caption_only=False, skip_captions=False): - if not self.organization: - sys.stdout.write(fc + sd + "[" + fm + sb + "*" + fc + sd + "] : " + fg + sb + "Trying to login as " + fm + sb +"(%s)" % (self.username) + fg + sb +"...\n") - if self.organization: - sys.stdout.write(fc + sd + "[" + fm + sb + "*" + fc + sd + "] : " + fg + sb + "Trying to login as organization " + fm + sb +"(%s)" % (self.organization) + fg + sb +"...\n") - course = lynda.course(url=self.url, username=self.username, password=self.password, organization=self.organization) + if self.cookies: + sys.stdout.write(fc + sd + "[" + fm + sb + "*" + fc + sd + "] : " + fg + sb + "Trying to login using cookies ...\n") + if not self.cookies: + if not self.organization: + sys.stdout.write(fc + sd + "[" + fm + sb + "*" + fc + sd + "] : " + fg + sb + "Trying to login as " + fm + sb +"(%s)" % (self.username) + fg + sb +"...\n") + if self.organization: + sys.stdout.write(fc + sd + "[" + fm + sb + "*" + fc + sd + "] : " + fg + sb + "Trying to login as organization " + fm + sb +"(%s)" % (self.organization) + fg + sb +"...\n") + course = lynda.course(url=self.url, username=self.username, password=self.password, organization=self.organization, cookies=self.cookies) course_id = course.id course_name = course.title chapters = course.get_chapters() @@ -218,6 +222,11 @@ def main(): help="Shows the version.") authentication = parser.add_argument_group("Authentication") + authentication.add_argument( + '-k', '--cookies',\ + dest='cookies',\ + type=str,\ + help="Cookies to authenticate with.",metavar='') authentication.add_argument( '-u', '--username',\ dest='username',\ @@ -272,13 +281,84 @@ def main(): sys.stdout.write (fc + sd + "[" + fw + sb + "+" + fc + sd + "] : " + fw + sd + "Found (%s) courses ..\n" % (len(courses))) for course in courses: + if options.cookies: + f_in = open(options.cookies) + cookies = '\n'.join([line for line in (l.strip() for l in f_in) if line]) + f_in.close() + lynda = Lynda(url=course, cookies=cookies) + if options.info: + lynda.course_list_down() + + if not options.info: + if options.caption_only and not options.skip_captions: + lynda.course_download(caption_only=options.caption_only, path=options.output) + elif not options.caption_only and options.skip_captions: + lynda.course_download(skip_captions=options.skip_captions, path=options.output, quality=options.quality) + else: + lynda.course_download(path=options.output, quality=options.quality) + + if not options.cookies: + if not options.username and not options.password: + username = fc + sd + "[" + fm + sb + "*" + fc + sd + "] : " + fg + sd + "Username : " + fg + sb + password = fc + sd + "[" + fm + sb + "*" + fc + sd + "] : " + fg + sd + "Password : " + fc + sb + email = getpass.getuser(prompt=username) + passwd = getpass.getpass(prompt=password) + if email and passwd: + lynda = Lynda(url=course, username=email, password=passwd, organization=options.org) + else: + sys.stdout.write('\n' + fc + sd + "[" + fr + sb + "-" + fc + sd + "] : " + fr + sb + "Username and password is required.\n") + sys.exit(0) + + if options.info: + lynda.course_list_down() + + if not options.info: + if options.caption_only and not options.skip_captions: + lynda.course_download(caption_only=options.caption_only, path=options.output) + elif not options.caption_only and options.skip_captions: + lynda.course_download(skip_captions=options.skip_captions, path=options.output, quality=options.quality) + else: + lynda.course_download(path=options.output, quality=options.quality) + + elif options.username and options.password: + lynda = Lynda(url=course, username=options.username, password=options.password, organization=options.org) + if options.info: + lynda.course_list_down() + + if not options.info: + if options.caption_only and not options.skip_captions: + lynda.course_download(caption_only=options.caption_only, path=options.output) + elif not options.caption_only and options.skip_captions: + lynda.course_download(skip_captions=options.skip_captions, path=options.output, quality=options.quality) + else: + lynda.course_download(path=options.output, quality=options.quality) + + if not os.path.isfile(options.course): + + if options.cookies: + f_in = open(options.cookies) + cookies = '\n'.join([line for line in (l.strip() for l in f_in) if line]) + f_in.close() + lynda = Lynda(url=options.course, cookies=cookies) + if options.info: + lynda.course_list_down() + + if not options.info: + if options.caption_only and not options.skip_captions: + lynda.course_download(caption_only=options.caption_only, path=options.output) + elif not options.caption_only and options.skip_captions: + lynda.course_download(skip_captions=options.skip_captions, path=options.output, quality=options.quality) + else: + lynda.course_download(path=options.output, quality=options.quality) + + if not options.cookies: if not options.username and not options.password: username = fc + sd + "[" + fm + sb + "*" + fc + sd + "] : " + fg + sd + "Username : " + fg + sb password = fc + sd + "[" + fm + sb + "*" + fc + sd + "] : " + fg + sd + "Password : " + fc + sb email = getpass.getuser(prompt=username) passwd = getpass.getpass(prompt=password) if email and passwd: - lynda = Lynda(url=course, username=email, password=passwd, organization=options.org) + lynda = Lynda(url=options.course, username=email, password=passwd, organization=options.org) else: sys.stdout.write('\n' + fc + sd + "[" + fr + sb + "-" + fc + sd + "] : " + fr + sb + "Username and password is required.\n") sys.exit(0) @@ -295,7 +375,7 @@ def main(): lynda.course_download(path=options.output, quality=options.quality) elif options.username and options.password: - lynda = Lynda(url=course, username=options.username, password=options.password, organization=options.org) + lynda = Lynda(url=options.course, username=options.username, password=options.password, organization=options.org) if options.info: lynda.course_list_down() @@ -307,43 +387,6 @@ def main(): else: lynda.course_download(path=options.output, quality=options.quality) - if not os.path.isfile(options.course): - - if not options.username and not options.password: - username = fc + sd + "[" + fm + sb + "*" + fc + sd + "] : " + fg + sd + "Username : " + fg + sb - password = fc + sd + "[" + fm + sb + "*" + fc + sd + "] : " + fg + sd + "Password : " + fc + sb - email = getpass.getuser(prompt=username) - passwd = getpass.getpass(prompt=password) - if email and passwd: - lynda = Lynda(url=options.course, username=email, password=passwd, organization=options.org) - else: - sys.stdout.write('\n' + fc + sd + "[" + fr + sb + "-" + fc + sd + "] : " + fr + sb + "Username and password is required.\n") - sys.exit(0) - - if options.info: - lynda.course_list_down() - - if not options.info: - if options.caption_only and not options.skip_captions: - lynda.course_download(caption_only=options.caption_only, path=options.output) - elif not options.caption_only and options.skip_captions: - lynda.course_download(skip_captions=options.skip_captions, path=options.output, quality=options.quality) - else: - lynda.course_download(path=options.output, quality=options.quality) - - elif options.username and options.password: - lynda = Lynda(url=options.course, username=options.username, password=options.password, organization=options.org) - if options.info: - lynda.course_list_down() - - if not options.info: - if options.caption_only and not options.skip_captions: - lynda.course_download(caption_only=options.caption_only, path=options.output) - elif not options.caption_only and options.skip_captions: - lynda.course_download(skip_captions=options.skip_captions, path=options.output, quality=options.quality) - else: - lynda.course_download(path=options.output, quality=options.quality) - if __name__ == '__main__': try: main() diff --git a/lynda/__init__.py b/lynda/__init__.py index a8795d5..7da69a6 100644 --- a/lynda/__init__.py +++ b/lynda/__init__.py @@ -1,7 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -__version__ = "0.2" +__version__ = "0.3" __author__ = "Nasir Khan (r0ot h3x49)" __license__ = 'MIT' __copyright__ = 'Copyright (c) 2018 Nasir Khan (r0ot h3x49)' diff --git a/lynda/_auth.py b/lynda/_auth.py index c897064..6eb0b47 100644 --- a/lynda/_auth.py +++ b/lynda/_auth.py @@ -32,6 +32,7 @@ conn_error, HEADERS, LOGOUT_URL, + ParseCookie, USER_LOGIN_URL, AJAX_USERNAME, AJAX_PASSWORD, @@ -158,8 +159,33 @@ def _organization_session(self): sys.stdout.write(fc + sd + "[" + fr + sb + "-" + fc + sd + "] : " + fr + sb + "Failed to extract login-form..\n") sys.exit(0) - def authenticate(self): + def _cookie_session_step(self, raw_cookies): + cookies = {} + cookie_parser = ParseCookie() + try: + cookie_string = re.search(r'Cookie:\s*(.+)\n', raw_cookies, flags=re.I).group(1) + except: + sys.stdout.write(fc + sd + "[" + fr + sb + "-" + fc + sd + "] : " + fr + sb + "Cookies error, Request Headers is required.\n") + sys.stdout.write(fc + sd + "[" + fm + sb + "i" + fc + sd + "] : " + fg + sb + "Copy Request Headers for single request to a file, while you are logged in.\n") + sys.exit(0) + cookie_parser.load(cookie_string) + for key, cookie in cookie_parser.items(): + cookies[key] = cookie.value + return cookies + + def _cookies_session(self, cookies): + auth_cookies = self._cookie_session_step(raw_cookies=cookies) + if auth_cookies: + self._session.cookies.update(auth_cookies) + return self._session + else: + return None + + def authenticate(self, cookies=''): + if cookies: + return self._cookies_session(cookies=cookies) if self.organization: return self._organization_session() else: - return self._user_session() \ No newline at end of file + return self._user_session() + diff --git a/lynda/_colorized/banner.py b/lynda/_colorized/banner.py index 5852c07..309ee4d 100644 --- a/lynda/_colorized/banner.py +++ b/lynda/_colorized/banner.py @@ -23,6 +23,7 @@ ''' from .colors import * +from .. import __version__ def banner(): banner = """%s%s @@ -35,8 +36,8 @@ def banner(): 888 `888' 888 888 888 888 d8( 888 888 888 888 o888o .8' o888o o888o `Y8bod88P" `Y888""8o `Y8bod88P" o888o .o..P' - `Y8P'\t\t\t\t%s%sVersion : %s%s0.2\n\t\t\t\t\t%s%sAuthor : %s%sNasir Khan (r0ot h3x49)\n\t\t\t\t\t%s%sGithub : %s%shttps://github.com/r0oth3x49 + `Y8P'\t\t\t\t%s%sVersion : %s%s%s\n\t\t\t\t\t%s%sAuthor : %s%sNasir Khan (r0ot h3x49)\n\t\t\t\t\t%s%sGithub : %s%shttps://github.com/r0oth3x49 -""" % (fc, sb, fm, sb, fc, sb, fm, sb, fc, sb, fy,sb, fg, sd, fy,sb, fg, sd, fy,sb, fg, sd) +""" % (fc, sb, fm, sb, fc, sb, fm, sb, fc, sb, fy,sb, fg, sd, __version__, fy,sb, fg, sd, fy,sb, fg, sd) return banner diff --git a/lynda/_compat.py b/lynda/_compat.py index 21c69c1..a50e2cf 100644 --- a/lynda/_compat.py +++ b/lynda/_compat.py @@ -41,6 +41,7 @@ from urllib.request import urlopen as compat_urlopen from urllib.request import build_opener as compat_opener from html.parser import HTMLParser as compat_HTMLParser + from http.cookies import SimpleCookie as ParseCookie from requests.exceptions import ConnectionError as conn_error encoding, pyver = str, 3 @@ -57,6 +58,7 @@ from urllib2 import build_opener as compat_opener from urlparse import urlparse as compat_urlparse from HTMLParser import HTMLParser as compat_HTMLParser + from Cookie import SimpleCookie as ParseCookie from requests.exceptions import ConnectionError as conn_error encoding, pyver = unicode, 2 @@ -110,6 +112,7 @@ 'compat_HTMLParser', 'HEADERS', 'NO_DEFAULT', + 'ParseCookie', 'USER_LOGIN_URL', 'AJAX_USERNAME', diff --git a/lynda/_extract.py b/lynda/_extract.py index 2b9c5d5..55e63ea 100644 --- a/lynda/_extract.py +++ b/lynda/_extract.py @@ -84,12 +84,16 @@ def _clean(self, text): return text def _sanitize(self, unsafetext): - text = slugify(unsafetext, lower=False, spaces=True, ok=SLUG_OK + '()._-') + text = sanitize(slugify(unsafetext, lower=False, spaces=True, ok=SLUG_OK + '().[]')) return text - def _login(self, username='', password='', organization=''): - auth = LyndaAuth(username=username, password=password, organization=organization) - self._session = auth.authenticate() + def _login(self, username='', password='', organization='', cookies=''): + if not cookies: + auth = LyndaAuth(username=username, password=password, organization=organization) + self._session = auth.authenticate() + if cookies: + auth = LyndaAuth() + self._session = auth.authenticate(cookies=cookies) if self._session is not None: return {'login' : 'successful'} else: @@ -129,15 +133,19 @@ def _extract_asset_download_url(self, url): sys.exit(0) return {'type' : 'file', 'file_size' : int(response.headers.get('Content-Length')), 'download_url' : response.url, 'extension' : response.headers.get('Content-Type').split('/')[-1]} - def _extract_assets(self, course_id): url = EXERCISE_FILES_URL.format(course_id=course_id) _temp = [] try: response = self._session.get(url).json() except conn_error as e: + print("") sys.stdout.write(fc + sd + "[" + fr + sb + "-" + fc + sd + "] : " + fr + sb + "Connection error : make sure your internet connection is working.\n") sys.exit(0) + except ValueError as e: + print("") + sys.stdout.write(fc + sd + "[" + fr + sb + "-" + fc + sd + "] : " + fr + sb + "JSONDecodeError : it seems your cookies got expired, provide again.\n") + sys.exit(0) if response and isinstance(response, dict): exercise_tab = (response.get('exercisetab')).replace('\r', '').replace('\n', '').replace('\t', '') _temp = [m.groupdict() for m in re.finditer(self._EXERCISE_FILES_REGEX, exercise_tab)] @@ -151,8 +159,13 @@ def _extract_subtitles(self, video_id): try: subs = self._session.get(url).json() except conn_error as e: + print("") sys.stdout.write(fc + sd + "[" + fr + sb + "-" + fc + sd + "] : " + fr + sb + "Connection error : make sure your internet connection is working.\n") sys.exit(0) + except ValueError as e: + print("") + sys.stdout.write(fc + sd + "[" + fr + sb + "-" + fc + sd + "] : " + fr + sb + "JSONDecodeError : it seems your cookies got expired, provide again.\n") + sys.exit(0) if subs: return { 'type' : 'subtitle', @@ -186,8 +199,13 @@ def _extract_sources(self, course_id, lecture_id): try: play = self._session.get(url).json() except conn_error as e: + print("") sys.stdout.write(fc + sd + "[" + fr + sb + "-" + fc + sd + "] : " + fr + sb + "Connection error : make sure your internet connection is working.\n") sys.exit(0) + except ValueError as e: + print("") + sys.stdout.write(fc + sd + "[" + fr + sb + "-" + fc + sd + "] : " + fr + sb + "JSONDecodeError : it seems your cookies got expired, provide again.\n") + sys.exit(0) if play and isinstance(play, list): _best_resolution = [{'url' : s['urls'].get('720')} for s in play if s['urls'].get('720')] _max_stream = self._max(list(map(self._get_max_stream, _best_resolution))) if _best_resolution and isinstance(_best_resolution, list) else {} @@ -288,4 +306,5 @@ def _real_extract(self, url=''): 'lectures_count' : len(_temp_lectures) }) - return _lynda \ No newline at end of file + return _lynda + diff --git a/lynda/_internal.py b/lynda/_internal.py index d7b831b..e9bba67 100644 --- a/lynda/_internal.py +++ b/lynda/_internal.py @@ -45,7 +45,10 @@ def __init__(self, *args, **kwargs): def _fetch_course(self): if self._have_basic: return - auth = self._login(username=self._username, password=self._password, organization=self._organization) + if not self._cookies: + auth = self._login(username=self._username, password=self._password, organization=self._organization) + if self._cookies: + auth = self._login(cookies=self._cookies) if auth.get('login') == 'successful': sys.stdout.write(fc + sd + "[" + fm + sb + "+" + fc + sd + "] : " + fg + sb + "Logged in successfully.\n") sys.stdout.write('\r' + fc + sd + "[" + fm + sb + "*" + fc + sd + "] : " + fg + sb + "Downloading course information .. \r") diff --git a/lynda/_lynda.py b/lynda/_lynda.py index ff3ffa1..d445c6f 100644 --- a/lynda/_lynda.py +++ b/lynda/_lynda.py @@ -26,7 +26,7 @@ from ._internal import InternLyndaCourse as Lynda -def course(url, username='', password='', organization='', basic=True, callback=None): +def course(url, username='', password='', organization='', cookies='', basic=True, callback=None): """Returns lynda course instance. @params: @@ -35,4 +35,4 @@ def course(url, username='', password='', organization='', basic=True, callback= password : Lynda account password required : type (string) organization : Lynda organization name optional : type (string) """ - return Lynda(url, username, password, organization, basic, callback) \ No newline at end of file + return Lynda(url, username, password, organization, cookies, basic, callback) \ No newline at end of file diff --git a/lynda/_shared.py b/lynda/_shared.py index 2c49d09..3c862eb 100644 --- a/lynda/_shared.py +++ b/lynda/_shared.py @@ -40,12 +40,13 @@ class LyndaCourse(object): - def __init__(self, url, username='', password='', organization='', basic=True, callback=None): + def __init__(self, url, username='', password='', organization='', cookies='', basic=True, callback=None): self._url = url self._username = username self._password = password - self._organization = organization + self._cookies = cookies + self._organization = organization self._callback = callback or (lambda x: None) self._have_basic = False