diff --git a/CHANGELOG b/CHANGELOG index 11df32cf..ce9e6443 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,9 +1,21 @@ +19 October 2014 +Version 0.3.64 + +[Update] - Cache javascript file locally to temp dir (less fetching) +[Update] - Added itag 278 (low-res VP9) +[Feature] - Retrieve DASH streams (more streams, higher quality audio) +[Feature] - added remux_audio parameter to download function to remux audio + files if ffmpeg or avconv is installed (better compatibility) + +------------------------------------------------------------------------------- + 27 August 2014 Version 0.3.62 [Feature] - Added url_https property to return https url ------------------------------------------------------------------------------- + 15 August 2014 Version 0.3.60 diff --git a/README.rst b/README.rst index f487f3bf..ef0e341a 100644 --- a/README.rst +++ b/README.rst @@ -136,13 +136,13 @@ Download video and show progress: >>> best.download(quiet=False) 3,734,976 Bytes [0.20%] received. Rate: [ 719 KB/s]. ETA: [3284 secs] -Download video, use specific filepath: +Download video, use specific directory and/or filename: .. code-block:: pycon - >>> myfilename = "/tmp/" + best.title + "." + best.extension - >>> best.download(filepath=myfilename) + >>> filename = best.download(filepath="/tmp/") + >>> filename = best.download(filepath="/tmp/Game." + best.extension) Get audio-only streams (m4a and/or ogg vorbis): @@ -152,8 +152,11 @@ Get audio-only streams (m4a and/or ogg vorbis): >>> for a in audiostreams: ... print(a.bitrate, a.extension, a.get_filesize()) ... - 128k m4a 165076649 + 256k m4a 331379079 + 192k ogg 172524223 + 128k m4a 166863001 128k ogg 108981120 + 48k m4a 62700449 Download the 2nd audio stream from the above list: @@ -168,7 +171,7 @@ Get the best quality audio stream: >>> bestaudio = video.getbestaudio() >>> bestaudio.bitrate - '128k' + '256' Download the best quality audio file: @@ -176,7 +179,7 @@ Download the best quality audio file: >>> bestaudio.download() -show ALL formats for a video (video+audio, video-only and audio-only): +show all media types for a video (video+audio, video-only and audio-only): .. code-block:: pycon @@ -184,6 +187,7 @@ show ALL formats for a video (video+audio, video-only and audio-only): >>> for s in allstreams: ... print(s.mediatype, s.extension, s.quality) ... + normal mp4 1280x720 normal webm 640x360 normal mp4 640x360 @@ -191,16 +195,20 @@ show ALL formats for a video (video+audio, video-only and audio-only): normal 3gp 320x240 normal 3gp 176x144 video m4v 1280x720 - video webm 720x480 + video webm 1280x720 video m4v 854x480 - video webm 640x480 + video webm 854x480 video m4v 640x360 - video webm 480x360 + video webm 640x360 video m4v 426x240 - video webm 360x240 + video webm 426x240 video m4v 256x144 + video webm 256x144 + audio m4a 256k + audio ogg 192k audio m4a 128k audio ogg 128k + audio m4a 48k Installation @@ -288,14 +296,16 @@ list available dowload streams: Stream Type Format Quality Size ------ ---- ------ ------- ---- - 1 normal webm [640x360] 33 MB - 2 normal mp4 [640x360] 24 MB - 3 normal flv [320x240] 13 MB - 4 normal 3gp [320x240] 10 MB - 5 normal 3gp [176x144] 3 MB - 6 audio m4a [48k] 2 MB - 7 audio m4a [128k] 5 MB - 8 audio m4a [256k] 10 MB + 1 normal webm [640x360] 33 MB + 2 normal mp4 [640x360] 23 MB + 3 normal flv [320x240] 14 MB + 4 normal 3gp [320x240] 9 MB + 5 normal 3gp [176x144] 3 MB + 6 audio m4a [48k] 2 MB + 7 audio m4a [128k] 5 MB + 8 audio ogg [128k] 5 MB + 9 audio ogg [192k] 7 MB + 10 audio m4a [256k] 10 MB Download mp4 640x360 (ie. stream number 2): @@ -308,4 +318,4 @@ Download m4a audio stream at 256k bitrate: .. code-block:: bash - $ ytdl -n8 cyMHZVT91Dw + $ ytdl -n10 cyMHZVT91Dw diff --git a/docs-sphinx/conf.py b/docs-sphinx/conf.py index 977746c4..f339eea0 100644 --- a/docs-sphinx/conf.py +++ b/docs-sphinx/conf.py @@ -14,6 +14,7 @@ import sys import os +import sphinx_rtd_theme # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -55,9 +56,9 @@ # built documents. # # The short X.Y version. -version = '0.3.62' +version = '0.3.64' # The full version, including alpha/beta/rc tags. -release = '0.3.62' +release = '0.3.64' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -102,8 +103,11 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' -#html_theme = 'nature' +html_theme = "sphinx_rtd_theme" +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + +# html_theme = 'default' +# html_theme = 'nature' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs-sphinx/index.rst b/docs-sphinx/index.rst index 01e7a03f..93f8eb13 100644 --- a/docs-sphinx/index.rst +++ b/docs-sphinx/index.rst @@ -2,7 +2,7 @@ Pafy Documentation ****************** .. module:: Pafy -This is the documentation for Pafy - a Python library for retrieving content and metadata from YouTube +This is the documentation for Pafy - a Python library to download YouTube content and retrieve metadata A quick start intro with usage examples is available in the `README `_ @@ -39,13 +39,13 @@ Create a Pafy object using the :func:`pafy.new` function, giving a YouTube video :type basic: bool :param gdata: fetch gdata info (upload date, description, category, username, likes, dislikes) :type gdata: bool - :param signature: fetch data required to decrypt urls, if encrypted + :param signature: Note: The signature argument now has no effect and will be removed in a future version :type signature: bool :param size: fetch the size of each stream (slow)(decrypts urls if needed) :type size: bool :param callback: a callback function to receive status strings :type callback: function - :rtype: Pafy object + :rtype: :class:`pafy.Pafy` If any of **basic**, **gdata**, **signature** or **size** are *False*, those data items will be fetched only when first called for. @@ -302,7 +302,7 @@ Stream Attributes .. attribute:: Stream.threed - Whether the stream is a 3D video (*boolean*) + True if the stream is a 3D video (*boolean*) .. attribute:: Stream.title @@ -331,24 +331,30 @@ An example of accessing Stream attributes:: Stream Methods -------------- + + + .. function:: Stream.get_filesize() Returns the filesize of a stream -.. function:: Stream.download([filepath=""][, quiet=False][, callback=None]) +.. function:: Stream.download([filepath=""][, quiet=False][, callback=None][, meta=False][, remux_audio=False]) - Downloads the stream object + Downloads the stream object, returns the path of the downloaded file. - :param filepath: The filepath to use to save the stream, defaults to *title.extension* if ommitted + :param filepath: The filepath to use to save the stream, defaults to (sanitised) *title.extension* if ommitted :type filepath: string - :param quiet: Whether to supress output of the download progress + :param quiet: If True, supress output of the download progress :type quiet: boolean :param callback: Call back function to use for receiving download progress :type callback: function or None + :param meta: If True, video id and itag are appended to filename + :type meta: bool + :param remux_audio: If True, remux audio file downloads (fixes some compatibility issues with file format, requires ffmpeg/avconv) + :type remux_audio: bool + :rtype: str - If a callback function is provided, it will be called repeatedly for each - chunk downloaded. It must be a function that takes five arguments. These - are: + If a callback function is provided, it will be called repeatedly for each chunk downloaded. It must be a function that takes the following five arguments; - total bytes in stream, *int* - total bytes downloaded, *int* @@ -366,10 +372,9 @@ Example of using stream.download():: v = pafy.new("cyMHZVT91Dw") s = v.getbest() print("Size is %s" % s.get_filesize()) - s.download() + filename = s.download() # starts download -Will download the file to the current working directory with the filename -*title.extension* (eg. "cute kittens.mp4") and output the following progress statistics:: +Will download to the current working directory and output the following progress statistics:: Size is 34775366 1,015,808 Bytes [2.92%] received. Rate: [ 640 kbps]. ETA: [51 secs] diff --git a/pafy/pafy.py b/pafy/pafy.py index eebe25f9..8efbb6e7 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -3,7 +3,7 @@ """ pafy.py. -Python library to retrieve YouTube content and metadata +Python library to download YouTube content and retrieve metadata https://github.com/np1/pafy @@ -27,7 +27,7 @@ from __future__ import unicode_literals -__version__ = "0.3.62" +__version__ = "0.3.64" __author__ = "nagev" __license__ = "GPLv3" @@ -38,6 +38,8 @@ import time import json import logging +import hashlib +import tempfile from xml.etree import ElementTree @@ -84,17 +86,21 @@ def parseqs(data): return data -def fetch_decode(url): +def fetch_decode(url, encoding=None): """ Fetch url and decode. """ req = g.opener.open(url) ct = req.headers['content-type'] - if "charset=" in ct: + if encoding: + return req.read().decode(encoding) + + elif "charset=" in ct: encoding = re.search(r"charset=([\w-]+)\s*(:?;|$)", ct).group(1) - dbg("encoding: %s", ct) + dbg("encoding detected: %s", ct) return req.read().decode(encoding) else: + dbg("encoding unknown") return req.read() @@ -102,32 +108,33 @@ def new(url, basic=True, gdata=False, signature=True, size=False, callback=lambda x: None): """ Return a new pafy instance given a url or video id. + NOTE: The signature argument has been deprecated and now has no effect, + it will be removed in a future version. + Optional arguments: basic - fetch basic metadata and streams gdata - fetch gdata info (upload date, description, category) - signature - fetch data required to decrypt urls, if encrypted size - fetch the size of each stream (slow)(decrypts urls if needed) callback - a callback function to receive status strings - If any of the first four above arguments are False, those data items will + If any of the first three above arguments are False, those data items will be fetched only when first called for. The defaults are recommended for most cases. If you wish to create - many video objects at once, you may want to set all to False, eg: + many video objects at once, you may want to set basic to False, eg: - video = pafy.new(basic=False, signature=False) + video = pafy.new(basic=False) This will be quick because no http requests will be made on initialisation. - Setting signature or size to True will override the basic argument - and force basic data to be fetched too (basic data is required to - obtain Stream objects and determine whether signatures are encrypted. - - Similarly, setting size to true will force the signature data to be - fetched if the videos have encrypted signatures, so will override the - value set in the signature argument. + Setting size to True will override the basic argument and force basic data + to be fetched too (basic data is required to obtain Stream objects. """ + if not signature: + logging.warning("signature argument has no effect and will be removed" + " in a future version.") + return Pafy(url, basic, gdata, signature, size, callback) @@ -206,10 +213,6 @@ class g(object): '44': ('854x480', 'webm', "normal", ''), '45': ('1280x720', 'webm', "normal", ''), '46': ('1920x1080', 'webm', "normal", ''), - - # '59': ('1x1', 'mp4', 'normal', ''), - # '78': ('1x1', 'mp4', 'normal', ''), - '82': ('640x360-3D', 'mp4', "normal", ''), '83': ('640x480-3D', 'mp4', 'normal', ''), '84': ('1280x720-3D', 'mp4', "normal", ''), @@ -235,9 +238,9 @@ class g(object): '219': ('854x480', 'webm', 'video', 'VP8'), '242': ('360x240', 'webm', 'video', 'VP9'), '243': ('480x360', 'webm', 'video', 'VP9'), - '244': ('640x480', 'webm', 'video', 'VP9'), - '245': ('640x480', 'webm', 'video', 'VP9'), - '246': ('640x480', 'webm', 'video', 'VP9'), + '244': ('640x480', 'webm', 'video', 'VP9 low'), + '245': ('640x480', 'webm', 'video', 'VP9 med'), + '246': ('640x480', 'webm', 'video', 'VP9 high'), '247': ('720x480', 'webm', 'video', 'VP9'), '248': ('1920x1080', 'webm', 'video', 'VP9'), '249': ('48k', 'ogg', 'audio', 'Opus'), @@ -247,7 +250,8 @@ class g(object): '258': ('320k', 'm4a', 'audio', '6-channel'), '264': ('2560x1440', 'm4v', 'video', ''), '271': ('1920x1280', 'webm', 'video', 'VP9'), - '272': ('3414x1080', 'webm', 'video', 'VP9') + '272': ('3414x1080', 'webm', 'video', 'VP9'), + '278': ('256x144', 'webm', 'video', 'VP9'), } @@ -263,6 +267,38 @@ def _extract_smap(map_name, dic, zero_idx=True): return [] +def _extract_dash(dashurl): + """ Download dash url and extract some data. """ + # pylint: disable = R0914 + dbg("Fetching dash page") + dashdata = fetch_decode(dashurl) + dbg("DASH list fetched") + ns = "{urn:mpeg:DASH:schema:MPD:2011}" + ytns = "{http://youtube.com/yt/2012/10/10}" + tree = ElementTree.fromstring(dashdata) + tlist = tree.findall(".//%sRepresentation" % ns) + dashmap = [] + + for x in tlist: + baseurl = x.find("%sBaseURL" % ns) + url = baseurl.text + size = baseurl.attrib["%scontentLength" % ytns] + bitrate = x.get("bandwidth") + itag = uni(x.get("id")) + width = uni(x.get("width")) + height = uni(x.get("height")) + type_ = re.search(r"(?:\?|&)mime=([\w\d\/]+)", url).group(1) + dashmap.append(dict(bitrate=bitrate, + dash=True, + itag=itag, + width=width, + height=height, + url=url, + size=size, + type=type_)) + return dashmap + + def _extract_function_from_js(name, js): """ Find a function definition called `name` and extract components. @@ -509,6 +545,94 @@ def _decodesig(sig, js_url): return solved +def remux(infile, outfile, quiet=False, muxer="ffmpeg"): + """ Remux audio. """ + from subprocess import call, STDOUT + muxer = muxer if isinstance(muxer, str) else "ffmpeg" + + for tool in set([muxer, "ffmpeg", "avconv"]): + cmd = [tool, "-y", "-i", infile, "-acodec", "copy", "-vn", outfile] + + try: + with open(os.devnull, "w") as devnull: + call(cmd, stdout=devnull, stderr=STDOUT) + + except OSError: + dbg("Failed to remux audio using %s", tool) + + else: + os.unlink(infile) + dbg("remuxed audio file using %s" % tool) + + if not quiet: + sys.stdout.write("\nAudio remuxed.\n") + + break + + else: + logging.warning("audio remux failed") + os.rename(infile, outfile) + + +def fetch_cached(url, encoding=None, dbg_ref="", file_prefix=""): + """ Fetch url - from tmpdir if already retrieved. """ + tmpdir = os.path.join(tempfile.gettempdir(), "pafy") + + if not os.path.exists(tmpdir): + os.makedirs(tmpdir) + + url_md5 = hashlib.md5(url.encode("utf8")).hexdigest() + cached_filename = os.path.join(tmpdir, file_prefix + url_md5) + + if os.path.exists(cached_filename): + dbg("fetched %s from cache", dbg_ref) + + with open(cached_filename) as f: + retval = f.read() + + return retval + + else: + data = fetch_decode(url, "utf8") # unicode + dbg("Fetched %s", dbg_ref) + new.callback("Fetched %s" % dbg_ref) + + with open(cached_filename, "w") as f: + f.write(data) + + # prune files after write + prune_files(tmpdir, file_prefix) + return data + + +def prune_files(path, prefix="", age_max=3600 * 24 * 14, count_max=4): + """ Remove oldest files from path that start with prefix. + + remove files older than age_max, leave maximum of count_max files. + """ + tempfiles = [] + + if not os.path.isdir(path): + return + + for f in os.listdir(path): + filepath = os.path.join(path, f) + + if os.path.isfile(filepath) and f.startswith(prefix): + age = time.time() - os.path.getmtime(filepath) + + if age > age_max: + os.unlink(filepath) + + else: + tempfiles.append((filepath, age)) + + tempfiles = sorted(tempfiles, key=lambda x: x[1], reverse=True) + + for f in tempfiles[:-count_max]: + os.unlink(f[0]) + + def get_js_sm(video_id): """ Fetch watchinfo page and extract stream map and js funcs if not known. @@ -525,34 +649,38 @@ def get_js_sm(video_id): if re.search(r'player-age-gate-content">', watchinfo) is not None: # create a new Pafy object - dbg("creating new instance for age restrictved video") - doppleganger = new(video_id, False, False, False) + dbg("age restricted video") + # doppleganger = new(video_id, False, False, False) video_info_url = g.urls['age_vidinfo'] % (video_id, video_id) - doppleganger.fetch_basic(ageurl=video_info_url) - return "age", "age", doppleganger + # doppleganger.fetch_basic(ageurl=video_info_url) + return video_info_url dbg("Fetched watchv page") new.callback("Fetched watchv page") m = re.search(g.jsplayer, watchinfo) myjson = json.loads(m.group(1)) stream_info = myjson['args'] - smap = _extract_smap(g.UEFSM, stream_info, False) - smap += _extract_smap(g.AF, stream_info, False) + dash_url = stream_info['dashmpd'] + sm = _extract_smap(g.UEFSM, stream_info, False) + asm = _extract_smap(g.AF, stream_info, False) js_url = myjson['assets']['js'] js_url = "https:" + js_url if js_url.startswith("//") else js_url funcs = Pafy.funcmap.get(js_url) if not funcs: + dbg("Fetching javascript") new.callback("Fetching javascript") - javascript = fetch_decode(js_url) # bytes - javascript = javascript.decode("utf8") # unicode - dbg("Fetched javascript") - new.callback("Fetched javascript") + javascript = fetch_cached(js_url, encoding="utf8", + dbg_ref="javascript", file_prefix="js-") mainfunc = _get_mainfunc_from_js(javascript) funcs = _get_other_funcs(mainfunc, javascript) funcs['mainfunction'] = mainfunc - return smap, js_url, funcs + elif funcs: + dbg("Using functions in memory extracted from %s", js_url) + dbg("Mem contains %s js func sets", len(Pafy.funcmap)) + + return (sm, asm), js_url, funcs, dash_url def _make_url(raw, sig, quick=True): @@ -562,7 +690,7 @@ def _make_url(raw, sig, quick=True): if "signature=" not in raw: - if not sig: + if sig is None: raise IOError("Error retrieving url") raw += "&signature=" + sig @@ -570,27 +698,6 @@ def _make_url(raw, sig, quick=True): return raw -def gen_ageurl(dop, itag): - """ Decrypt signature for age-restricted item. Return url. """ - for x in dop.sm + dop.asm: - - if x['itag'] == itag and len(x['s']) == 86: - s = x['s'] - s = s[2:63] + s[82] + s[64:82] + s[63] - dbg("decrypted agesig: %s%s", s[:22], "..") - return _make_url(x['url'], s) - - -def _get_matching_stream(smap, itag): - """ Return the url and signature for a stream matching itag in smap. """ - for x in smap: - - if x['itag'] == itag: - return x['url'], x.get('s') - - raise IOError("Error fetching stream") - - class Stream(object): """ YouTube video stream class. """ @@ -598,33 +705,57 @@ class Stream(object): def __init__(self, sm, parent): """ Set initial values. """ self._itag = sm['itag'] + # is_dash = "width" in sm and "height" in sm + is_dash = "dash" in sm if self._itag not in g.itags: logging.warning("Unknown itag: %s", self._itag) return None + self._mediatype = g.itags[self.itag][2] self._threed = 'stereo3d' in sm and sm['stereo3d'] == '1' - self._resolution = g.itags[self.itag][0] - self._dimensions = tuple(self.resolution.split("-")[0].split("x")) - self._dimensions = tuple([int(x) if x.isdigit() else x for x in - self._dimensions]) + + if is_dash: + + if sm['width'] != "None": # dash video + self._resolution = "%sx%s" % (sm['width'], sm['height']) + self._quality = self._resolution + self._dimensions = (int(sm['width']), int(sm['height'])) + + else: # dash audio + self._resolution = "0x0" + self._dimensions = (0, 0) + self._rawbitrate = int(sm['bitrate']) + # self._bitrate = uni(int(sm['bitrate']) // 1024) + "k" + self._bitrate = g.itags[self.itag][0] + self._quality = self._bitrate + + self._fsize = int(sm['size']) + # self._bitrate = sm['bitrate'] + # self._rawbitrate = uni(int(self._bitrate) // 1024) + "k" + + else: # not dash + self._resolution = g.itags[self.itag][0] + self._fsize = None + self._bitrate = self._rawbitrate = None + self._dimensions = tuple(self.resolution.split("-")[0].split("x")) + self._dimensions = tuple([int(x) if x.isdigit() else x for x in + self._dimensions]) + self._quality = self.resolution + self._vidformat = sm['type'].split(';')[0] # undocumented - self._quality = self.resolution self._extension = g.itags[self.itag][1] self._title = parent.title self.encrypted = 's' in sm self._parent = parent self._filename = self.generate_filename() - self._fsize = None - self._bitrate = self._rawbitrate = None - self._mediatype = g.itags[self.itag][2] self._notes = g.itags[self.itag][3] self._url = None self._rawurl = sm['url'] self._sig = sm['s'] if self.encrypted else sm.get("sig") self._active = False - if self.mediatype == "audio": + if self.mediatype == "audio" and not is_dash: self._dimensions = (0, 0) self._bitrate = self.resolution self._quality = self.bitrate @@ -718,48 +849,18 @@ def filename(self): @property def url(self): """ Return the url, decrypt if required. """ - if self._url: - pass - - elif self._parent.age: - self._url = gen_ageurl(self._parent.doppleganger, self.itag) - - elif not self.encrypted: - self._url = _make_url(self._rawurl, self._sig) - - else: - # encrypted url signatures - if self._parent.js_url: - # dbg("using cached js %s" % self._parent.js_url[-15:]) - enc_streams = self._parent.enc_streams - - else: - enc_streams, js_url, funcs = get_js_sm(self._parent.videoid) - self._parent.expiry = time.time() + g.lifespan - self._parent.js_url = js_url - - # check for age - if type(enc_streams) == uni and enc_streams == "age": - self._parent.age = True - dop = self._parent.doppleganger = funcs - self._url = gen_ageurl(dop, self.itag) - return self._url + if not self._url: - # Create Pafy funcmap dict for this js_url - if not Pafy.funcmap.get(js_url): - Pafy.funcmap[js_url] = funcs + if self._parent.age: - # else: - # Add javascript functions to Pafy funcmap dict - # in case same js_url has different functions - # Pafy.funcmap[js_url].update(funcs) + if self._sig: + s = self._sig + self._sig = s[2:63] + s[82] + s[64:82] + s[63] - # Stash usable urls and encrypted sigs in parent Pafy object - self._parent.enc_streams = enc_streams + elif self.encrypted: + self._sig = _decodesig(self._sig, self._parent.js_url) - url, s = _get_matching_stream(enc_streams, self.itag) - sig = _decodesig(s, self._parent.js_url) if s else None - self._url = _make_url(url, sig) + self._url = _make_url(self._rawurl, self._sig) return self._url @@ -795,10 +896,11 @@ def cancel(self): return True def download(self, filepath="", quiet=False, callback=lambda *x: None, - meta=False): + meta=False, remux_audio=False): """ Download. Use quiet=True to supress output. Return filename. Use meta=True to append video id and itag to generated filename + Use remax_audio=True to remux audio file downloads """ # pylint: disable=R0912,R0914 @@ -870,10 +972,16 @@ def download(self, filepath="", quiet=False, callback=lambda *x: None, callback(total, *progress_stats) if self._active: - os.rename(temp_filepath, filepath) + + if remux_audio and self.mediatype == "audio": + remux(temp_filepath, filepath, quiet=quiet, muxer=remux_audio) + + else: + os.rename(temp_filepath, filepath) + return filepath - else: + else: # download incomplete, return temp filepath outfh.close() return temp_filepath @@ -902,7 +1010,9 @@ def __init__(self, video_url, basic=True, gdata=False, self.sm = [] self.asm = [] + self.dash = [] self.js_url = None # if js_url is set then has new stream map + self._dashurl = None self.age = False self._streams = [] self._oggstreams = [] @@ -933,30 +1043,55 @@ def __init__(self, video_url, basic=True, gdata=False, if gdata: self._fetch_gdata() - if signature: - # pylint: disable=W0104 - s = self.streams - - if self.ciphertag: - s[0].url # forces signature decryption - if size: - for s in self.allstreams: # pylint: disable=W0104 s.get_filesize() - def fetch_basic(self, ageurl=None): - """ Fetch info url page and set member vars. """ + def fetch_basic(self): + """ Fetch basic data and streams. """ if self._have_basic: return - if ageurl: - allinfo = get_video_info("none", ageurl) + self._fetch_basic() - else: - allinfo = get_video_info(self.videoid) + if not self.ciphertag is ('s' in self.sm[0]): + logging.warning("ciphertag doesn't match signature type") + logging.warning(self.videoid) + + if self.ciphertag: + dbg("Encrypted signature detected.") + stuff = get_js_sm(self.videoid) + + if isinstance(stuff, tuple): + # smaps, js_url, funcs, dashurl = get_js_sm(self.videoid) + smaps, js_url, funcs, dashurl = stuff + Pafy.funcmap[js_url] = funcs + self.sm, self.asm = smaps + self.js_url = js_url + dashsig = re.search(r"/s/([\w\.]+)", dashurl).group(1) + dbg("decrypting dash sig") + goodsig = _decodesig(dashsig, js_url) + self._dashurl = re.sub(r"/s/[\w\.]+", + "/signature/%s" % goodsig, dashurl) + else: + self.age = True + info_url = stuff + self._fetch_basic(info_url=info_url) + s = re.search(r"/s/([\w\.]+)", self._dashurl).group(1) + s = s[2:63] + s[82] + s[64:82] + s[63] + self._dashurl = re.sub(r"/s/[\w\.]+", + "/signature/%s" % s, self._dashurl) + + self.dash = _extract_dash(self._dashurl) + self._have_basic = 1 + self._process_streams() + self.expiry = time.time() + g.lifespan + + def _fetch_basic(self, info_url=None): + """ Fetch info url page and set member vars. """ + allinfo = get_video_info(self.videoid, newurl=info_url) new.callback("Fetched video info") def _get_lst(key, default="unknown", dic=allinfo): @@ -965,6 +1100,7 @@ def _get_lst(key, default="unknown", dic=allinfo): return retval[0] if retval != default else default self._title = _get_lst('title') + self._dashurl = _get_lst('dashmpd') self._author = _get_lst('author') self._videoid = _get_lst('video_id') self._rating = float(_get_lst('avg_rating', 0.0)) @@ -976,21 +1112,9 @@ def _get_lst(key, default="unknown", dic=allinfo): self._bigthumb = _get_lst('iurlsd', "") self._bigthumbhd = _get_lst('iurlsdmaxres', "") self.ciphertag = _get_lst("use_cipher_signature") == "True" - - if ageurl: - self.ciphertag = False - dbg("Encrypted signature detected - age restricted") - - if self.ciphertag: - dbg("Encrypted signature detected.") - - # extract stream maps - self.sm = _extract_smap(g.UEFSM, allinfo, not self.js_url) - self.asm = _extract_smap(g.AF, allinfo, not self.js_url) - - self._have_basic = 1 - self._process_streams() - self.expiry = time.time() + g.lifespan + self.sm = _extract_smap(g.UEFSM, allinfo, True) + self.asm = _extract_smap(g.AF, allinfo, True) + dbg("extracted stream maps") def _fetch_gdata(self): """ Extract gdata values, fetch gdata if necessary. """ @@ -1029,8 +1153,19 @@ def _process_streams(self): streams = [x for x in streams if x.itag in g.itags] adpt_streams = [Stream(z, self) for z in self.asm] adpt_streams = [x for x in adpt_streams if x.itag in g.itags] + dash_streams = [Stream(z, self) for z in self.dash] + dash_streams = [x for x in dash_streams if x.itag in g.itags] audiostreams = [x for x in adpt_streams if x.bitrate] videostreams = [x for x in adpt_streams if not x.bitrate] + dash_itags = [x.itag for x in dash_streams] + audiostreams = [x for x in audiostreams if x.itag not in dash_itags] + videostreams = [x for x in videostreams if x.itag not in dash_itags] + audiostreams += [x for x in dash_streams if x.mediatype == "audio"] + videostreams += [x for x in dash_streams if x.mediatype != "audio"] + audiostreams = sorted(audiostreams, key=lambda x: x.rawbitrate, + reverse=True) + videostreams = sorted(videostreams, key=lambda x: x.dimensions, + reverse=True) m4astreams = [x for x in audiostreams if x.extension == "m4a"] oggstreams = [x for x in audiostreams if x.extension == "ogg"] self._streams = streams @@ -1273,7 +1408,7 @@ def populate_from_playlist(self, pl_data): self.playlist_meta = pl_data -def get_playlist(playlist_url, basic=False, gdata=False, signature=False, +def get_playlist(playlist_url, basic=False, gdata=False, signature=True, size=False, callback=lambda x: None): """ Return a dict containing Pafy objects from a YouTube Playlist. diff --git a/setup.py b/setup.py index 8f865401..58d0b772 100755 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ name='Pafy', packages=['pafy'], scripts=['scripts/ytdl'], - version='0.3.62', + version='0.3.64', description="Retrieve YouTube content and metadata", keywords=["Pafy", "API", "YouTube", "youtube", "download", "video"], author="nagev", diff --git a/tests/test.py b/tests/test.py index 1a5ff6c5..f64f84c6 100644 --- a/tests/test.py +++ b/tests/test.py @@ -58,6 +58,8 @@ def runOnce(self): del _ for playlist in Test.playlists: + + playlist['fetched'] = pafy.get_playlist(playlist['identifier']) Test.hasrun = True @@ -87,11 +89,6 @@ def test_make_url_no_sig(self): args = dict(raw="a=b&c=d", sig=None, quick=False) self.assertRaises(IOError, pafy._make_url, **args) - def test_no_matching_stream(self): - """ Test no matching stream found. """ - smap = dict() - self.assertRaises(IOError, pafy._get_matching_stream, smap, None) - def test_generate_filename_with_meta(self): """ Use meta argument to generate filename. """ if Test.quick: @@ -101,13 +98,13 @@ def test_generate_filename_with_meta(self): a = p.getbestaudio() filename = a.generate_filename(meta=True) self.assertEqual(filename, 'Jessie J - WILD (Official) ft. Big Sean' - ', Dizzee Rascal-jrNLsC_Y9Oo-171.ogg') + ', Dizzee Rascal-jrNLsC_Y9Oo-141.m4a') self.assertEqual(a.threed, False) self.assertEqual(a.title, 'Jessie J - WILD (Official) ft. Big Sean' ', Dizzee Rascal') self.assertEqual(a.notes, '') self.assertEqual(a.filename, 'Jessie J - WILD (Official) ft. Big Sean' - ', Dizzee Rascal.ogg') + ', Dizzee Rascal.m4a') @stdout_to_null def test_pafy_download(self): @@ -121,13 +118,13 @@ def test_pafy_download(self): @stdout_to_null def test_pafy_download_resume(self): """ Test resuming a partial download. """ - tempname = "WASTE 2 SECONDS OF YOUR LIFE-DsAn_n6O5Ns-171.ogg.temp" + tempname = "WASTE 2 SECONDS OF YOUR LIFE-DsAn_n6O5Ns-141.m4a.temp" with open(tempname, "w") as ladeeda: ladeeda.write("abc") vid = pafy.new("DsAn_n6O5Ns", gdata=True, basic=False, signature=False) - vstream = vid.audiostreams[-1].download(meta=True) - name = "WASTE 2 SECONDS OF YOUR LIFE.ogg" - self.assertEqual(22675, os.stat(name).st_size) + vstream = vid.audiostreams[-1].download(meta=True, remux_audio=True) + name = "WASTE 2 SECONDS OF YOUR LIFE.m4a" + self.assertEqual(12880, os.stat(name).st_size) # test fetching attributes vid._title = None @@ -167,8 +164,8 @@ def test_pafy_download_to_dir(self): """ Test user specified path. """ vid = pafy.new("DsAn_n6O5Ns", gdata=True) vstream = vid.audiostreams[-1].download("/tmp", meta=True) - name = "/tmp/WASTE 2 SECONDS OF YOUR LIFE.ogg" - self.assertEqual(22675, os.stat(name).st_size) + name = "/tmp/WASTE 2 SECONDS OF YOUR LIFE.m4a" + self.assertEqual(12880, os.stat(name).st_size) def test_lazy_pafy(self): """ Test create pafy object without fetching data. """ @@ -341,12 +338,12 @@ def test_misc_tests(self): 'category': 'Education', 'description': '1223db22b4a38d0a8ebfcafb549f40c39af26251', 'bestsize': 54284129, - 'all streams': 10, + 'all streams': 18, 'normal streams': 5, - 'video streams': 4, - 'audio streams': 1, - 'ogg streams': 0, - 'm4a streams': 1, + 'video streams': 8, + 'audio streams': 5, + 'ogg streams': 2, + 'm4a streams': 3, }, { 'identifier': 'www.youtube.com/watch?v=SeIJmciN8mo', @@ -361,12 +358,12 @@ def test_misc_tests(self): 'category': 'Music', 'description': 'fa34f2704be9c1b21949af515e813f644f14b89a', 'bestsize': 101836539, - 'all streams': 21, + 'all streams': 23, 'normal streams': 6, - 'video streams': 13, - 'audio streams': 2, - 'ogg streams': 1, - 'm4a streams': 1, + 'video streams': 12, + 'audio streams': 5, + 'ogg streams': 2, + 'm4a streams': 3, }, { 'identifier': 'https://youtu.be/watch?v=07FYdnEawAQ', @@ -381,12 +378,12 @@ def test_misc_tests(self): 'category': 'Music', 'description': '55e8e6e2b219712bf94d67c2434530474a503265', 'bestsize': 79885533, - 'all streams': 21, + 'all streams': 23, 'normal streams': 6, - 'video streams': 13, - 'audio streams': 2, - 'ogg streams': 1, - 'm4a streams': 1, + 'video streams': 12, + 'audio streams': 5, + 'ogg streams': 2, + 'm4a streams': 3, }, { 'identifier': 'EnHp24CVORc', @@ -402,12 +399,12 @@ def test_misc_tests(self): 'category': 'People', 'description': '3c884d9791be15646ddf351edffcb2dd22ec70f8', 'bestsize': 101083389, - 'all streams': 19, + 'all streams': 23, 'normal streams': 6, - 'video streams': 11, - 'audio streams': 2, - 'ogg streams': 1, - 'm4a streams': 1, + 'video streams': 12, + 'audio streams': 5, + 'ogg streams': 2, + 'm4a streams': 3, }, { 'identifier': 'http://youtube.com/watch?v=rYEDA3JcQqw', @@ -422,12 +419,12 @@ def test_misc_tests(self): 'category': 'Music', 'description': '72bfd9472e59a8f48b83af36197ebcf5d2227609', 'bestsize': 41334333, - 'all streams': 27, + 'all streams': 29, 'normal streams': 6, - 'video streams': 19, - 'audio streams': 2, - 'ogg streams': 1, - 'm4a streams': 1, + 'video streams': 18, + 'audio streams': 5, + 'ogg streams': 2, + 'm4a streams': 3, } ]