From 1795f55c00401044a3f26922066586e4148cf45f Mon Sep 17 00:00:00 2001 From: np1 Date: Mon, 22 Sep 2014 12:09:23 +1000 Subject: [PATCH 01/22] Use rtd sphinx theme for docs --- docs-sphinx/conf.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs-sphinx/conf.py b/docs-sphinx/conf.py index 977746c4..5bb5c7d5 100644 --- a/docs-sphinx/conf.py +++ b/docs-sphinx/conf.py @@ -14,6 +14,7 @@ import sys import os +import sphinx_rtd_theme # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -102,8 +103,11 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' -#html_theme = 'nature' +html_theme = "sphinx_rtd_theme" +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + +# html_theme = 'default' +# html_theme = 'nature' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the From 497d53bdc09f7916f8485719068076c7c135a44f Mon Sep 17 00:00:00 2001 From: np1 Date: Sat, 27 Sep 2014 02:00:10 +1000 Subject: [PATCH 02/22] Cache js file locally Useful for multiple invocations when pafy does not stay in memory --- pafy/pafy.py | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/pafy/pafy.py b/pafy/pafy.py index eebe25f9..b8ef075e 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -38,6 +38,8 @@ import time import json import logging +import hashlib +import tempfile from xml.etree import ElementTree @@ -84,17 +86,21 @@ def parseqs(data): return data -def fetch_decode(url): +def fetch_decode(url, encoding=None): """ Fetch url and decode. """ req = g.opener.open(url) ct = req.headers['content-type'] - if "charset=" in ct: + if encoding: + return req.read().decode(encoding) + + elif "charset=" in ct: encoding = re.search(r"charset=([\w-]+)\s*(:?;|$)", ct).group(1) - dbg("encoding: %s", ct) + dbg("encoding detected: %s", ct) return req.read().decode(encoding) else: + dbg("encoding unknown") return req.read() @@ -509,6 +515,29 @@ def _decodesig(sig, js_url): return solved +def fetch_cached(url, encoding=None, dbg_ref=""): + """ Fetch url - from tmpdir if already retrieved """ + # TODO: prune cache dir + tmpdir = os.path.join(tempfile.gettempdir(), "pafy") + + if not os.path.exists(tmpdir): + os.makedirs(tmpdir) + + url_md5 = hashlib.md5(url).hexdigest() + cached_filename = os.path.join(tmpdir, url_md5) + + if os.path.exists(cached_filename): + dbg("fetched %s from cache", dbg_ref) + return open(cached_filename).read() + + else: + data = fetch_decode(url, "utf8") # unicode + dbg("Fetched %s", dbg_ref) + new.callback("Fetched %s" % dbg_ref) + open(cached_filename, "w").write(data) + return data + + def get_js_sm(video_id): """ Fetch watchinfo page and extract stream map and js funcs if not known. @@ -543,15 +572,17 @@ def get_js_sm(video_id): funcs = Pafy.funcmap.get(js_url) if not funcs: + dbg("Fetching javascript") new.callback("Fetching javascript") - javascript = fetch_decode(js_url) # bytes - javascript = javascript.decode("utf8") # unicode - dbg("Fetched javascript") - new.callback("Fetched javascript") + javascript = fetch_cached(js_url, encoding="utf8", dbg_ref="javascript") mainfunc = _get_mainfunc_from_js(javascript) funcs = _get_other_funcs(mainfunc, javascript) funcs['mainfunction'] = mainfunc + elif funcs: + dbg("Using functions in memory extracted from %s", js_url) + dbg("Mem contains %s js func sets", len(Pafy.funcmap)) + return smap, js_url, funcs From 410cbb068cb4aef079699e80903a531ffeb4c430 Mon Sep 17 00:00:00 2001 From: np1 Date: Sat, 27 Sep 2014 14:39:27 +1000 Subject: [PATCH 03/22] Fetch streams from dash url --- pafy/pafy.py | 96 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 86 insertions(+), 10 deletions(-) diff --git a/pafy/pafy.py b/pafy/pafy.py index b8ef075e..bec901dc 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -268,6 +268,36 @@ def _extract_smap(map_name, dic, zero_idx=True): return [] +def _extract_dash(dashurl): + """ Download dash url and extract some data. """ + dbg("Fetching dash page") + dashdata = fetch_decode(dashurl) + dbg("DASH list fetched") + ns = {"dash": "urn:mpeg:DASH:schema:MPD:2011", + "yt": "http://youtube.com/yt/2012/10/10"} + tree = ElementTree.fromstring(dashdata) + tlist = tree.findall(".//dash:Representation", namespaces=ns) + dashmap = [] + + for x in tlist: + baseurl = x.find("dash:BaseURL", namespaces=ns) + url = baseurl.text + size = baseurl.items()[0][1] # be more specific, don't rely on pos + bitrate = x.get("bandwidth") + itag = uni(x.get("id")) + width = uni(x.get("width")) + height = uni(x.get("height")) + type_ = re.search(r"(?:\?|&)mime=([\w\d\/]+)", url).group(1) + dashmap.append(dict(bitrate=bitrate, + dash=True, + itag=itag, + width=width, + height=height, + url=url, + size=size, + type=type_)) + return dashmap + def _extract_function_from_js(name, js): """ Find a function definition called `name` and extract components. @@ -516,7 +546,7 @@ def _decodesig(sig, js_url): def fetch_cached(url, encoding=None, dbg_ref=""): - """ Fetch url - from tmpdir if already retrieved """ + """ Fetch url - from tmpdir if already retrieved. """ # TODO: prune cache dir tmpdir = os.path.join(tempfile.gettempdir(), "pafy") @@ -629,33 +659,57 @@ class Stream(object): def __init__(self, sm, parent): """ Set initial values. """ self._itag = sm['itag'] + # is_dash = "width" in sm and "height" in sm + is_dash = "dash" in sm if self._itag not in g.itags: logging.warning("Unknown itag: %s", self._itag) return None + self._mediatype = g.itags[self.itag][2] self._threed = 'stereo3d' in sm and sm['stereo3d'] == '1' - self._resolution = g.itags[self.itag][0] - self._dimensions = tuple(self.resolution.split("-")[0].split("x")) - self._dimensions = tuple([int(x) if x.isdigit() else x for x in - self._dimensions]) + + if is_dash: + + if sm['width'] != "None": # dash video + self._resolution = "%sx%s" % (sm['width'], sm['height']) + self._quality = self._resolution + self._dimensions = (int(sm['width']), int(sm['height'])) + + else: # dash audio + self._resolution = "0x0" + self._dimensions = (0, 0) + self._rawbitrate = int(sm['bitrate']) + self._bitrate = uni(int(sm['bitrate']) // 1024) + "k" + self._quality = self._bitrate + + + self._fsize = int(sm['size']) + # self._bitrate = sm['bitrate'] + # self._rawbitrate = uni(int(self._bitrate) // 1024) + "k" + + else: # not dash + self._resolution = g.itags[self.itag][0] + self._fsize = None + self._bitrate = self._rawbitrate = None + self._dimensions = tuple(self.resolution.split("-")[0].split("x")) + self._dimensions = tuple([int(x) if x.isdigit() else x for x in + self._dimensions]) + self._quality = self.resolution + self._vidformat = sm['type'].split(';')[0] # undocumented - self._quality = self.resolution self._extension = g.itags[self.itag][1] self._title = parent.title self.encrypted = 's' in sm self._parent = parent self._filename = self.generate_filename() - self._fsize = None - self._bitrate = self._rawbitrate = None - self._mediatype = g.itags[self.itag][2] self._notes = g.itags[self.itag][3] self._url = None self._rawurl = sm['url'] self._sig = sm['s'] if self.encrypted else sm.get("sig") self._active = False - if self.mediatype == "audio": + if self.mediatype == "audio" and not is_dash: self._dimensions = (0, 0) self._bitrate = self.resolution self._quality = self.bitrate @@ -933,6 +987,7 @@ def __init__(self, video_url, basic=True, gdata=False, self.sm = [] self.asm = [] + self.dash = [] self.js_url = None # if js_url is set then has new stream map self.age = False self._streams = [] @@ -1014,10 +1069,15 @@ def _get_lst(key, default="unknown", dic=allinfo): if self.ciphertag: dbg("Encrypted signature detected.") + # TODO: implement enc sig separately # extract stream maps self.sm = _extract_smap(g.UEFSM, allinfo, not self.js_url) self.asm = _extract_smap(g.AF, allinfo, not self.js_url) + # get dash streams + dashurl = allinfo['dashmpd'][0] + self.dash = _extract_dash(dashurl) + self._have_basic = 1 self._process_streams() @@ -1060,8 +1120,24 @@ def _process_streams(self): streams = [x for x in streams if x.itag in g.itags] adpt_streams = [Stream(z, self) for z in self.asm] adpt_streams = [x for x in adpt_streams if x.itag in g.itags] + dash_streams = [Stream(z, self) for z in self.dash] + dash_streams = [x for x in dash_streams if x.itag in g.itags] audiostreams = [x for x in adpt_streams if x.bitrate] videostreams = [x for x in adpt_streams if not x.bitrate] + + # delete streams that are also in dash_streams + dash_itags = [x.itag for x in dash_streams] + audiostreams = [x for x in audiostreams if not x.itag in dash_itags] + videostreams = [x for x in videostreams if not x.itag in dash_itags] + + # insert dash_streams + audiostreams += [x for x in dash_streams if x.mediatype == "audio"] + videostreams += [x for x in dash_streams if x.mediatype != "audio"] + + audiostreams = sorted(audiostreams, key=lambda x: x.rawbitrate, + reverse=True) + videostreams = sorted(videostreams, key=lambda x: x.dimensions, + reverse=True) m4astreams = [x for x in audiostreams if x.extension == "m4a"] oggstreams = [x for x in audiostreams if x.extension == "ogg"] self._streams = streams From af2e64cc53ae5a611d27bffc33d20e5f8ec01f87 Mon Sep 17 00:00:00 2001 From: np1 Date: Sun, 28 Sep 2014 23:29:12 +1000 Subject: [PATCH 04/22] Added dashmpd streams --- pafy/pafy.py | 184 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 111 insertions(+), 73 deletions(-) diff --git a/pafy/pafy.py b/pafy/pafy.py index bec901dc..a6b3aa3e 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -111,7 +111,6 @@ def new(url, basic=True, gdata=False, signature=True, size=False, Optional arguments: basic - fetch basic metadata and streams gdata - fetch gdata info (upload date, description, category) - signature - fetch data required to decrypt urls, if encrypted size - fetch the size of each stream (slow)(decrypts urls if needed) callback - a callback function to receive status strings @@ -134,6 +133,10 @@ def new(url, basic=True, gdata=False, signature=True, size=False, value set in the signature argument. """ + if not signature: + # pylint: disable=W0104 + logging.warn("Use of signature flag has been deprecated.") + return Pafy(url, basic, gdata, signature, size, callback) @@ -268,6 +271,7 @@ def _extract_smap(map_name, dic, zero_idx=True): return [] + def _extract_dash(dashurl): """ Download dash url and extract some data. """ dbg("Fetching dash page") @@ -553,7 +557,7 @@ def fetch_cached(url, encoding=None, dbg_ref=""): if not os.path.exists(tmpdir): os.makedirs(tmpdir) - url_md5 = hashlib.md5(url).hexdigest() + url_md5 = hashlib.md5(url.encode("utf8")).hexdigest() cached_filename = os.path.join(tmpdir, url_md5) if os.path.exists(cached_filename): @@ -584,19 +588,20 @@ def get_js_sm(video_id): if re.search(r'player-age-gate-content">', watchinfo) is not None: # create a new Pafy object - dbg("creating new instance for age restrictved video") - doppleganger = new(video_id, False, False, False) + dbg("age restricted video") + # doppleganger = new(video_id, False, False, False) video_info_url = g.urls['age_vidinfo'] % (video_id, video_id) - doppleganger.fetch_basic(ageurl=video_info_url) - return "age", "age", doppleganger + # doppleganger.fetch_basic(ageurl=video_info_url) + return video_info_url dbg("Fetched watchv page") new.callback("Fetched watchv page") m = re.search(g.jsplayer, watchinfo) myjson = json.loads(m.group(1)) stream_info = myjson['args'] - smap = _extract_smap(g.UEFSM, stream_info, False) - smap += _extract_smap(g.AF, stream_info, False) + dash_url = stream_info['dashmpd'] + sm = _extract_smap(g.UEFSM, stream_info, False) + asm = _extract_smap(g.AF, stream_info, False) js_url = myjson['assets']['js'] js_url = "https:" + js_url if js_url.startswith("//") else js_url funcs = Pafy.funcmap.get(js_url) @@ -613,7 +618,7 @@ def get_js_sm(video_id): dbg("Using functions in memory extracted from %s", js_url) dbg("Mem contains %s js func sets", len(Pafy.funcmap)) - return smap, js_url, funcs + return (sm, asm), js_url, funcs, dash_url def _make_url(raw, sig, quick=True): @@ -631,15 +636,12 @@ def _make_url(raw, sig, quick=True): return raw -def gen_ageurl(dop, itag): +def gen_ageurl(url, s): """ Decrypt signature for age-restricted item. Return url. """ - for x in dop.sm + dop.asm: - if x['itag'] == itag and len(x['s']) == 86: - s = x['s'] - s = s[2:63] + s[82] + s[64:82] + s[63] - dbg("decrypted agesig: %s%s", s[:22], "..") - return _make_url(x['url'], s) + print(url, s) + # s = s[2:63] + s[82] + s[64:82] + s[63] + # return _make_url(x['url'], s) def _get_matching_stream(smap, itag): @@ -680,7 +682,8 @@ def __init__(self, sm, parent): self._resolution = "0x0" self._dimensions = (0, 0) self._rawbitrate = int(sm['bitrate']) - self._bitrate = uni(int(sm['bitrate']) // 1024) + "k" + # self._bitrate = uni(int(sm['bitrate']) // 1024) + "k" + self._bitrate = g.itags[self.itag][0] self._quality = self._bitrate @@ -803,36 +806,51 @@ def filename(self): @property def url(self): """ Return the url, decrypt if required. """ - if self._url: - pass + if not self._url: - elif self._parent.age: - self._url = gen_ageurl(self._parent.doppleganger, self.itag) + if self._parent.age: + if self._sig: + print("has _sig") + s = self._sig + self._sig = s[2:63] + s[82] + s[64:82] + s[63] - elif not self.encrypted: - self._url = _make_url(self._rawurl, self._sig) + self._url = _make_url(self._rawurl, self._sig) - else: + elif self.encrypted: + sig = _decodesig(self._sig, self._parent.js_url) + self._url = _make_url(self._rawurl, sig) + + elif not self.encrypted: + self._url = _make_url(self._rawurl, self._sig) + + # elif self._parent.age: + # self._url = gen_ageurl(self._parent.doppleganger, self.itag) + + # elif not self.encrypted: + # self._url = _make_url(self._rawurl, self._sig) + + # else: # encrypted url signatures - if self._parent.js_url: + # if self._parent.js_url: # dbg("using cached js %s" % self._parent.js_url[-15:]) - enc_streams = self._parent.enc_streams + # enc_streams = self._parent.enc_streams + # pass - else: - enc_streams, js_url, funcs = get_js_sm(self._parent.videoid) - self._parent.expiry = time.time() + g.lifespan - self._parent.js_url = js_url + # else: + # enc_streams, js_url, funcs = get_js_sm(self._parent.videoid) + # self._parent.expiry = time.time() + g.lifespan + # self._parent.js_url = js_url # check for age - if type(enc_streams) == uni and enc_streams == "age": - self._parent.age = True - dop = self._parent.doppleganger = funcs - self._url = gen_ageurl(dop, self.itag) - return self._url + # if type(enc_streams) == uni and enc_streams == "age": + # self._parent.age = True + # dop = self._parent.doppleganger = funcs + # self._url = gen_ageurl(dop, self.itag) + # return self._url # Create Pafy funcmap dict for this js_url - if not Pafy.funcmap.get(js_url): - Pafy.funcmap[js_url] = funcs + # if not Pafy.funcmap.get(js_url): + # Pafy.funcmap[js_url] = funcs # else: # Add javascript functions to Pafy funcmap dict @@ -840,14 +858,14 @@ def url(self): # Pafy.funcmap[js_url].update(funcs) # Stash usable urls and encrypted sigs in parent Pafy object - self._parent.enc_streams = enc_streams - - url, s = _get_matching_stream(enc_streams, self.itag) - sig = _decodesig(s, self._parent.js_url) if s else None - self._url = _make_url(url, sig) + # self._parent.enc_streams = enc_streams + # url, s = _get_matching_stream(enc_streams, self.itag) + # sig = _decodesig(s, self._parent.js_url) if s else None + # self._url = _make_url(url, sig) return self._url + @property def url_https(self): """ Return https url. """ @@ -1019,30 +1037,63 @@ def __init__(self, video_url, basic=True, gdata=False, if gdata: self._fetch_gdata() - if signature: - # pylint: disable=W0104 - s = self.streams - - if self.ciphertag: - s[0].url # forces signature decryption - if size: - for s in self.allstreams: # pylint: disable=W0104 s.get_filesize() - def fetch_basic(self, ageurl=None): - """ Fetch info url page and set member vars. """ + def fetch_basic(self): + if self._have_basic: return - if ageurl: - allinfo = get_video_info("none", ageurl) + self._fetch_basic() + # Ensure ciphertag matches url type + assert self.ciphertag is ('s' in self.sm[0]) - else: - allinfo = get_video_info(self.videoid) + if self.ciphertag: + dbg("Encrypted signature detected.") + stuff = get_js_sm(self.videoid) + if isinstance(stuff, tuple): + # smaps, js_url, funcs, dashurl = get_js_sm(self.videoid) + smaps, js_url, funcs, dashurl = stuff + + # Add functions to funcmap + Pafy.funcmap[js_url] = funcs + + # replace stream maps + self.sm, self.asm = smaps + self.js_url = js_url + + # get sig for dash url + dashsig = re.search(r"/s/([\w\.]+)", dashurl).group(1) + dbg("decrypting dash sig") + goodsig = _decodesig(dashsig, js_url) + self._dashurl = re.sub("/s/[\w\.]+", "/signature/%s" % goodsig, dashurl) + + + # deal with age + else: + self.age = True + info_url = stuff + # refetch basic using other url + self._fetch_basic(info_url=info_url) + # get dashurl + s = re.search(r"/s/([\w\.]+)", self._dashurl).group(1) + s = s[2:63] + s[82] + s[64:82] + s[63] + self._dashurl = re.sub("/s/[\w\.]+", + "/signature/%s" % s, self._dashurl) + + self.dash = _extract_dash(self._dashurl) + self._have_basic = 1 + self._process_streams() + self.expiry = time.time() + g.lifespan + + def _fetch_basic(self, info_url=None): + """ Fetch info url page and set member vars. """ + + allinfo = get_video_info(self.videoid, newurl=info_url) new.callback("Fetched video info") def _get_lst(key, default="unknown", dic=allinfo): @@ -1051,6 +1102,7 @@ def _get_lst(key, default="unknown", dic=allinfo): return retval[0] if retval != default else default self._title = _get_lst('title') + self._dashurl = _get_lst('dashmpd') self._author = _get_lst('author') self._videoid = _get_lst('video_id') self._rating = float(_get_lst('avg_rating', 0.0)) @@ -1063,25 +1115,11 @@ def _get_lst(key, default="unknown", dic=allinfo): self._bigthumbhd = _get_lst('iurlsdmaxres', "") self.ciphertag = _get_lst("use_cipher_signature") == "True" - if ageurl: - self.ciphertag = False - dbg("Encrypted signature detected - age restricted") - - if self.ciphertag: - dbg("Encrypted signature detected.") - # TODO: implement enc sig separately - # extract stream maps - self.sm = _extract_smap(g.UEFSM, allinfo, not self.js_url) - self.asm = _extract_smap(g.AF, allinfo, not self.js_url) - # get dash streams - dashurl = allinfo['dashmpd'][0] - self.dash = _extract_dash(dashurl) - + self.sm = _extract_smap(g.UEFSM, allinfo, True) + self.asm = _extract_smap(g.AF, allinfo, True) + dbg("extracted stream maps") - self._have_basic = 1 - self._process_streams() - self.expiry = time.time() + g.lifespan def _fetch_gdata(self): """ Extract gdata values, fetch gdata if necessary. """ From a0e28d4ee46ee476580c1226bc65018ecaccf0f0 Mon Sep 17 00:00:00 2001 From: np1 Date: Sun, 28 Sep 2014 23:53:01 +1000 Subject: [PATCH 05/22] PEP tidy --- pafy/pafy.py | 71 ++++++++-------------------------------------------- 1 file changed, 10 insertions(+), 61 deletions(-) diff --git a/pafy/pafy.py b/pafy/pafy.py index a6b3aa3e..b572ce10 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -636,14 +636,6 @@ def _make_url(raw, sig, quick=True): return raw -def gen_ageurl(url, s): - """ Decrypt signature for age-restricted item. Return url. """ - - print(url, s) - # s = s[2:63] + s[82] + s[64:82] + s[63] - # return _make_url(x['url'], s) - - def _get_matching_stream(smap, itag): """ Return the url and signature for a stream matching itag in smap. """ for x in smap: @@ -686,7 +678,6 @@ def __init__(self, sm, parent): self._bitrate = g.itags[self.itag][0] self._quality = self._bitrate - self._fsize = int(sm['size']) # self._bitrate = sm['bitrate'] # self._rawbitrate = uni(int(self._bitrate) // 1024) + "k" @@ -697,7 +688,7 @@ def __init__(self, sm, parent): self._bitrate = self._rawbitrate = None self._dimensions = tuple(self.resolution.split("-")[0].split("x")) self._dimensions = tuple([int(x) if x.isdigit() else x for x in - self._dimensions]) + self._dimensions]) self._quality = self.resolution self._vidformat = sm['type'].split(';')[0] # undocumented @@ -810,7 +801,7 @@ def url(self): if self._parent.age: if self._sig: - print("has _sig") + s = self._sig self._sig = s[2:63] + s[82] + s[64:82] + s[63] @@ -820,52 +811,11 @@ def url(self): sig = _decodesig(self._sig, self._parent.js_url) self._url = _make_url(self._rawurl, sig) - elif not self.encrypted: + else: self._url = _make_url(self._rawurl, self._sig) - # elif self._parent.age: - # self._url = gen_ageurl(self._parent.doppleganger, self.itag) - - # elif not self.encrypted: - # self._url = _make_url(self._rawurl, self._sig) - - # else: - # encrypted url signatures - # if self._parent.js_url: - # dbg("using cached js %s" % self._parent.js_url[-15:]) - # enc_streams = self._parent.enc_streams - # pass - - # else: - # enc_streams, js_url, funcs = get_js_sm(self._parent.videoid) - # self._parent.expiry = time.time() + g.lifespan - # self._parent.js_url = js_url - - # check for age - # if type(enc_streams) == uni and enc_streams == "age": - # self._parent.age = True - # dop = self._parent.doppleganger = funcs - # self._url = gen_ageurl(dop, self.itag) - # return self._url - - # Create Pafy funcmap dict for this js_url - # if not Pafy.funcmap.get(js_url): - # Pafy.funcmap[js_url] = funcs - - # else: - # Add javascript functions to Pafy funcmap dict - # in case same js_url has different functions - # Pafy.funcmap[js_url].update(funcs) - - # Stash usable urls and encrypted sigs in parent Pafy object - # self._parent.enc_streams = enc_streams - - # url, s = _get_matching_stream(enc_streams, self.itag) - # sig = _decodesig(s, self._parent.js_url) if s else None - # self._url = _make_url(url, sig) return self._url - @property def url_https(self): """ Return https url. """ @@ -1007,6 +957,7 @@ def __init__(self, video_url, basic=True, gdata=False, self.asm = [] self.dash = [] self.js_url = None # if js_url is set then has new stream map + self._dashurl = None self.age = False self._streams = [] self._oggstreams = [] @@ -1043,7 +994,7 @@ def __init__(self, video_url, basic=True, gdata=False, s.get_filesize() def fetch_basic(self): - + """ Fetch basic data and streams. """ if self._have_basic: return @@ -1070,8 +1021,8 @@ def fetch_basic(self): dashsig = re.search(r"/s/([\w\.]+)", dashurl).group(1) dbg("decrypting dash sig") goodsig = _decodesig(dashsig, js_url) - self._dashurl = re.sub("/s/[\w\.]+", "/signature/%s" % goodsig, dashurl) - + self._dashurl = re.sub(r"/s/[\w\.]+", + "/signature/%s" % goodsig, dashurl) # deal with age else: @@ -1082,7 +1033,7 @@ def fetch_basic(self): # get dashurl s = re.search(r"/s/([\w\.]+)", self._dashurl).group(1) s = s[2:63] + s[82] + s[64:82] + s[63] - self._dashurl = re.sub("/s/[\w\.]+", + self._dashurl = re.sub(r"/s/[\w\.]+", "/signature/%s" % s, self._dashurl) self.dash = _extract_dash(self._dashurl) @@ -1092,7 +1043,6 @@ def fetch_basic(self): def _fetch_basic(self, info_url=None): """ Fetch info url page and set member vars. """ - allinfo = get_video_info(self.videoid, newurl=info_url) new.callback("Fetched video info") @@ -1120,7 +1070,6 @@ def _get_lst(key, default="unknown", dic=allinfo): self.asm = _extract_smap(g.AF, allinfo, True) dbg("extracted stream maps") - def _fetch_gdata(self): """ Extract gdata values, fetch gdata if necessary. """ if self._have_gdata: @@ -1165,8 +1114,8 @@ def _process_streams(self): # delete streams that are also in dash_streams dash_itags = [x.itag for x in dash_streams] - audiostreams = [x for x in audiostreams if not x.itag in dash_itags] - videostreams = [x for x in videostreams if not x.itag in dash_itags] + audiostreams = [x for x in audiostreams if x.itag not in dash_itags] + videostreams = [x for x in videostreams if x.itag not in dash_itags] # insert dash_streams audiostreams += [x for x in dash_streams if x.mediatype == "audio"] From cc944522ef13cf2fa5b5aca8d6c160dcc8db02e3 Mon Sep 17 00:00:00 2001 From: np1 Date: Mon, 29 Sep 2014 00:17:39 +1000 Subject: [PATCH 06/22] Use dash streams (higher quality audio files #54) --- pafy/pafy.py | 239 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 143 insertions(+), 96 deletions(-) diff --git a/pafy/pafy.py b/pafy/pafy.py index b8ef075e..df28df3e 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -111,7 +111,6 @@ def new(url, basic=True, gdata=False, signature=True, size=False, Optional arguments: basic - fetch basic metadata and streams gdata - fetch gdata info (upload date, description, category) - signature - fetch data required to decrypt urls, if encrypted size - fetch the size of each stream (slow)(decrypts urls if needed) callback - a callback function to receive status strings @@ -134,6 +133,10 @@ def new(url, basic=True, gdata=False, signature=True, size=False, value set in the signature argument. """ + if not signature: + # pylint: disable=W0104 + logging.warn("Use of signature flag has been deprecated.") + return Pafy(url, basic, gdata, signature, size, callback) @@ -269,6 +272,37 @@ def _extract_smap(map_name, dic, zero_idx=True): return [] +def _extract_dash(dashurl): + """ Download dash url and extract some data. """ + dbg("Fetching dash page") + dashdata = fetch_decode(dashurl) + dbg("DASH list fetched") + ns = {"dash": "urn:mpeg:DASH:schema:MPD:2011", + "yt": "http://youtube.com/yt/2012/10/10"} + tree = ElementTree.fromstring(dashdata) + tlist = tree.findall(".//dash:Representation", namespaces=ns) + dashmap = [] + + for x in tlist: + baseurl = x.find("dash:BaseURL", namespaces=ns) + url = baseurl.text + size = baseurl.items()[0][1] # be more specific, don't rely on pos + bitrate = x.get("bandwidth") + itag = uni(x.get("id")) + width = uni(x.get("width")) + height = uni(x.get("height")) + type_ = re.search(r"(?:\?|&)mime=([\w\d\/]+)", url).group(1) + dashmap.append(dict(bitrate=bitrate, + dash=True, + itag=itag, + width=width, + height=height, + url=url, + size=size, + type=type_)) + return dashmap + + def _extract_function_from_js(name, js): """ Find a function definition called `name` and extract components. @@ -516,14 +550,14 @@ def _decodesig(sig, js_url): def fetch_cached(url, encoding=None, dbg_ref=""): - """ Fetch url - from tmpdir if already retrieved """ + """ Fetch url - from tmpdir if already retrieved. """ # TODO: prune cache dir tmpdir = os.path.join(tempfile.gettempdir(), "pafy") if not os.path.exists(tmpdir): os.makedirs(tmpdir) - url_md5 = hashlib.md5(url).hexdigest() + url_md5 = hashlib.md5(url.encode("utf8")).hexdigest() cached_filename = os.path.join(tmpdir, url_md5) if os.path.exists(cached_filename): @@ -554,19 +588,20 @@ def get_js_sm(video_id): if re.search(r'player-age-gate-content">', watchinfo) is not None: # create a new Pafy object - dbg("creating new instance for age restrictved video") - doppleganger = new(video_id, False, False, False) + dbg("age restricted video") + # doppleganger = new(video_id, False, False, False) video_info_url = g.urls['age_vidinfo'] % (video_id, video_id) - doppleganger.fetch_basic(ageurl=video_info_url) - return "age", "age", doppleganger + # doppleganger.fetch_basic(ageurl=video_info_url) + return video_info_url dbg("Fetched watchv page") new.callback("Fetched watchv page") m = re.search(g.jsplayer, watchinfo) myjson = json.loads(m.group(1)) stream_info = myjson['args'] - smap = _extract_smap(g.UEFSM, stream_info, False) - smap += _extract_smap(g.AF, stream_info, False) + dash_url = stream_info['dashmpd'] + sm = _extract_smap(g.UEFSM, stream_info, False) + asm = _extract_smap(g.AF, stream_info, False) js_url = myjson['assets']['js'] js_url = "https:" + js_url if js_url.startswith("//") else js_url funcs = Pafy.funcmap.get(js_url) @@ -583,7 +618,7 @@ def get_js_sm(video_id): dbg("Using functions in memory extracted from %s", js_url) dbg("Mem contains %s js func sets", len(Pafy.funcmap)) - return smap, js_url, funcs + return (sm, asm), js_url, funcs, dash_url def _make_url(raw, sig, quick=True): @@ -601,17 +636,6 @@ def _make_url(raw, sig, quick=True): return raw -def gen_ageurl(dop, itag): - """ Decrypt signature for age-restricted item. Return url. """ - for x in dop.sm + dop.asm: - - if x['itag'] == itag and len(x['s']) == 86: - s = x['s'] - s = s[2:63] + s[82] + s[64:82] + s[63] - dbg("decrypted agesig: %s%s", s[:22], "..") - return _make_url(x['url'], s) - - def _get_matching_stream(smap, itag): """ Return the url and signature for a stream matching itag in smap. """ for x in smap: @@ -629,33 +653,57 @@ class Stream(object): def __init__(self, sm, parent): """ Set initial values. """ self._itag = sm['itag'] + # is_dash = "width" in sm and "height" in sm + is_dash = "dash" in sm if self._itag not in g.itags: logging.warning("Unknown itag: %s", self._itag) return None + self._mediatype = g.itags[self.itag][2] self._threed = 'stereo3d' in sm and sm['stereo3d'] == '1' - self._resolution = g.itags[self.itag][0] - self._dimensions = tuple(self.resolution.split("-")[0].split("x")) - self._dimensions = tuple([int(x) if x.isdigit() else x for x in - self._dimensions]) + + if is_dash: + + if sm['width'] != "None": # dash video + self._resolution = "%sx%s" % (sm['width'], sm['height']) + self._quality = self._resolution + self._dimensions = (int(sm['width']), int(sm['height'])) + + else: # dash audio + self._resolution = "0x0" + self._dimensions = (0, 0) + self._rawbitrate = int(sm['bitrate']) + # self._bitrate = uni(int(sm['bitrate']) // 1024) + "k" + self._bitrate = g.itags[self.itag][0] + self._quality = self._bitrate + + self._fsize = int(sm['size']) + # self._bitrate = sm['bitrate'] + # self._rawbitrate = uni(int(self._bitrate) // 1024) + "k" + + else: # not dash + self._resolution = g.itags[self.itag][0] + self._fsize = None + self._bitrate = self._rawbitrate = None + self._dimensions = tuple(self.resolution.split("-")[0].split("x")) + self._dimensions = tuple([int(x) if x.isdigit() else x for x in + self._dimensions]) + self._quality = self.resolution + self._vidformat = sm['type'].split(';')[0] # undocumented - self._quality = self.resolution self._extension = g.itags[self.itag][1] self._title = parent.title self.encrypted = 's' in sm self._parent = parent self._filename = self.generate_filename() - self._fsize = None - self._bitrate = self._rawbitrate = None - self._mediatype = g.itags[self.itag][2] self._notes = g.itags[self.itag][3] self._url = None self._rawurl = sm['url'] self._sig = sm['s'] if self.encrypted else sm.get("sig") self._active = False - if self.mediatype == "audio": + if self.mediatype == "audio" and not is_dash: self._dimensions = (0, 0) self._bitrate = self.resolution self._quality = self.bitrate @@ -749,48 +797,22 @@ def filename(self): @property def url(self): """ Return the url, decrypt if required. """ - if self._url: - pass - - elif self._parent.age: - self._url = gen_ageurl(self._parent.doppleganger, self.itag) - - elif not self.encrypted: - self._url = _make_url(self._rawurl, self._sig) - - else: - # encrypted url signatures - if self._parent.js_url: - # dbg("using cached js %s" % self._parent.js_url[-15:]) - enc_streams = self._parent.enc_streams + if not self._url: - else: - enc_streams, js_url, funcs = get_js_sm(self._parent.videoid) - self._parent.expiry = time.time() + g.lifespan - self._parent.js_url = js_url - - # check for age - if type(enc_streams) == uni and enc_streams == "age": - self._parent.age = True - dop = self._parent.doppleganger = funcs - self._url = gen_ageurl(dop, self.itag) - return self._url + if self._parent.age: + if self._sig: - # Create Pafy funcmap dict for this js_url - if not Pafy.funcmap.get(js_url): - Pafy.funcmap[js_url] = funcs + s = self._sig + self._sig = s[2:63] + s[82] + s[64:82] + s[63] - # else: - # Add javascript functions to Pafy funcmap dict - # in case same js_url has different functions - # Pafy.funcmap[js_url].update(funcs) + self._url = _make_url(self._rawurl, self._sig) - # Stash usable urls and encrypted sigs in parent Pafy object - self._parent.enc_streams = enc_streams + elif self.encrypted: + sig = _decodesig(self._sig, self._parent.js_url) + self._url = _make_url(self._rawurl, sig) - url, s = _get_matching_stream(enc_streams, self.itag) - sig = _decodesig(s, self._parent.js_url) if s else None - self._url = _make_url(url, sig) + else: + self._url = _make_url(self._rawurl, self._sig) return self._url @@ -933,7 +955,9 @@ def __init__(self, video_url, basic=True, gdata=False, self.sm = [] self.asm = [] + self.dash = [] self.js_url = None # if js_url is set then has new stream map + self._dashurl = None self.age = False self._streams = [] self._oggstreams = [] @@ -964,30 +988,53 @@ def __init__(self, video_url, basic=True, gdata=False, if gdata: self._fetch_gdata() - if signature: - # pylint: disable=W0104 - s = self.streams - - if self.ciphertag: - s[0].url # forces signature decryption - if size: - for s in self.allstreams: # pylint: disable=W0104 s.get_filesize() - def fetch_basic(self, ageurl=None): - """ Fetch info url page and set member vars. """ + def fetch_basic(self): + """ Fetch basic data and streams. """ if self._have_basic: return - if ageurl: - allinfo = get_video_info("none", ageurl) + self._fetch_basic() + # Ensure ciphertag matches url type + assert self.ciphertag is ('s' in self.sm[0]) - else: - allinfo = get_video_info(self.videoid) + if self.ciphertag: + dbg("Encrypted signature detected.") + stuff = get_js_sm(self.videoid) + + if isinstance(stuff, tuple): + # smaps, js_url, funcs, dashurl = get_js_sm(self.videoid) + smaps, js_url, funcs, dashurl = stuff + Pafy.funcmap[js_url] = funcs + self.sm, self.asm = smaps + self.js_url = js_url + dashsig = re.search(r"/s/([\w\.]+)", dashurl).group(1) + dbg("decrypting dash sig") + goodsig = _decodesig(dashsig, js_url) + self._dashurl = re.sub(r"/s/[\w\.]+", + "/signature/%s" % goodsig, dashurl) + + else: + self.age = True + info_url = stuff + self._fetch_basic(info_url=info_url) + s = re.search(r"/s/([\w\.]+)", self._dashurl).group(1) + s = s[2:63] + s[82] + s[64:82] + s[63] + self._dashurl = re.sub(r"/s/[\w\.]+", + "/signature/%s" % s, self._dashurl) + + self.dash = _extract_dash(self._dashurl) + self._have_basic = 1 + self._process_streams() + self.expiry = time.time() + g.lifespan + def _fetch_basic(self, info_url=None): + """ Fetch info url page and set member vars. """ + allinfo = get_video_info(self.videoid, newurl=info_url) new.callback("Fetched video info") def _get_lst(key, default="unknown", dic=allinfo): @@ -996,6 +1043,7 @@ def _get_lst(key, default="unknown", dic=allinfo): return retval[0] if retval != default else default self._title = _get_lst('title') + self._dashurl = _get_lst('dashmpd') self._author = _get_lst('author') self._videoid = _get_lst('video_id') self._rating = float(_get_lst('avg_rating', 0.0)) @@ -1007,21 +1055,9 @@ def _get_lst(key, default="unknown", dic=allinfo): self._bigthumb = _get_lst('iurlsd', "") self._bigthumbhd = _get_lst('iurlsdmaxres', "") self.ciphertag = _get_lst("use_cipher_signature") == "True" - - if ageurl: - self.ciphertag = False - dbg("Encrypted signature detected - age restricted") - - if self.ciphertag: - dbg("Encrypted signature detected.") - - # extract stream maps - self.sm = _extract_smap(g.UEFSM, allinfo, not self.js_url) - self.asm = _extract_smap(g.AF, allinfo, not self.js_url) - - self._have_basic = 1 - self._process_streams() - self.expiry = time.time() + g.lifespan + self.sm = _extract_smap(g.UEFSM, allinfo, True) + self.asm = _extract_smap(g.AF, allinfo, True) + dbg("extracted stream maps") def _fetch_gdata(self): """ Extract gdata values, fetch gdata if necessary. """ @@ -1060,8 +1096,19 @@ def _process_streams(self): streams = [x for x in streams if x.itag in g.itags] adpt_streams = [Stream(z, self) for z in self.asm] adpt_streams = [x for x in adpt_streams if x.itag in g.itags] + dash_streams = [Stream(z, self) for z in self.dash] + dash_streams = [x for x in dash_streams if x.itag in g.itags] audiostreams = [x for x in adpt_streams if x.bitrate] videostreams = [x for x in adpt_streams if not x.bitrate] + dash_itags = [x.itag for x in dash_streams] + audiostreams = [x for x in audiostreams if x.itag not in dash_itags] + videostreams = [x for x in videostreams if x.itag not in dash_itags] + audiostreams += [x for x in dash_streams if x.mediatype == "audio"] + videostreams += [x for x in dash_streams if x.mediatype != "audio"] + audiostreams = sorted(audiostreams, key=lambda x: x.rawbitrate, + reverse=True) + videostreams = sorted(videostreams, key=lambda x: x.dimensions, + reverse=True) m4astreams = [x for x in audiostreams if x.extension == "m4a"] oggstreams = [x for x in audiostreams if x.extension == "ogg"] self._streams = streams From 3475bd337d5706f22913f7ec76bab68895de7fcf Mon Sep 17 00:00:00 2001 From: np1 Date: Mon, 29 Sep 2014 01:26:38 +1000 Subject: [PATCH 07/22] Update tests for additional streams --- tests/test.py | 54 +++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tests/test.py b/tests/test.py index 1a5ff6c5..6d075d2f 100644 --- a/tests/test.py +++ b/tests/test.py @@ -101,7 +101,7 @@ def test_generate_filename_with_meta(self): a = p.getbestaudio() filename = a.generate_filename(meta=True) self.assertEqual(filename, 'Jessie J - WILD (Official) ft. Big Sean' - ', Dizzee Rascal-jrNLsC_Y9Oo-171.ogg') + ', Dizzee Rascal-jrNLsC_Y9Oo-141.m4a') self.assertEqual(a.threed, False) self.assertEqual(a.title, 'Jessie J - WILD (Official) ft. Big Sean' ', Dizzee Rascal') @@ -121,13 +121,13 @@ def test_pafy_download(self): @stdout_to_null def test_pafy_download_resume(self): """ Test resuming a partial download. """ - tempname = "WASTE 2 SECONDS OF YOUR LIFE-DsAn_n6O5Ns-171.ogg.temp" + tempname = "WASTE 2 SECONDS OF YOUR LIFE-DsAn_n6O5Ns-139.m4a.temp" with open(tempname, "w") as ladeeda: ladeeda.write("abc") vid = pafy.new("DsAn_n6O5Ns", gdata=True, basic=False, signature=False) vstream = vid.audiostreams[-1].download(meta=True) - name = "WASTE 2 SECONDS OF YOUR LIFE.ogg" - self.assertEqual(22675, os.stat(name).st_size) + name = "WASTE 2 SECONDS OF YOUR LIFE.m4a" + self.assertEqual(12880, os.stat(name).st_size) # test fetching attributes vid._title = None @@ -167,8 +167,8 @@ def test_pafy_download_to_dir(self): """ Test user specified path. """ vid = pafy.new("DsAn_n6O5Ns", gdata=True) vstream = vid.audiostreams[-1].download("/tmp", meta=True) - name = "/tmp/WASTE 2 SECONDS OF YOUR LIFE.ogg" - self.assertEqual(22675, os.stat(name).st_size) + name = "/tmp/WASTE 2 SECONDS OF YOUR LIFE.m4a" + self.assertEqual(12880, os.stat(name).st_size) def test_lazy_pafy(self): """ Test create pafy object without fetching data. """ @@ -341,12 +341,12 @@ def test_misc_tests(self): 'category': 'Education', 'description': '1223db22b4a38d0a8ebfcafb549f40c39af26251', 'bestsize': 54284129, - 'all streams': 10, + 'all streams': 17, 'normal streams': 5, - 'video streams': 4, - 'audio streams': 1, - 'ogg streams': 0, - 'm4a streams': 1, + 'video streams': 7, + 'audio streams': 5, + 'ogg streams': 2, + 'm4a streams': 3, }, { 'identifier': 'www.youtube.com/watch?v=SeIJmciN8mo', @@ -361,12 +361,12 @@ def test_misc_tests(self): 'category': 'Music', 'description': 'fa34f2704be9c1b21949af515e813f644f14b89a', 'bestsize': 101836539, - 'all streams': 21, + 'all streams': 24, 'normal streams': 6, 'video streams': 13, - 'audio streams': 2, - 'ogg streams': 1, - 'm4a streams': 1, + 'audio streams': 5, + 'ogg streams': 2, + 'm4a streams': 3, }, { 'identifier': 'https://youtu.be/watch?v=07FYdnEawAQ', @@ -381,12 +381,12 @@ def test_misc_tests(self): 'category': 'Music', 'description': '55e8e6e2b219712bf94d67c2434530474a503265', 'bestsize': 79885533, - 'all streams': 21, + 'all streams': 24, 'normal streams': 6, 'video streams': 13, - 'audio streams': 2, - 'ogg streams': 1, - 'm4a streams': 1, + 'audio streams': 5, + 'ogg streams': 2, + 'm4a streams': 3, }, { 'identifier': 'EnHp24CVORc', @@ -402,12 +402,12 @@ def test_misc_tests(self): 'category': 'People', 'description': '3c884d9791be15646ddf351edffcb2dd22ec70f8', 'bestsize': 101083389, - 'all streams': 19, + 'all streams': 22, 'normal streams': 6, 'video streams': 11, - 'audio streams': 2, - 'ogg streams': 1, - 'm4a streams': 1, + 'audio streams': 5, + 'ogg streams': 2, + 'm4a streams': 3, }, { 'identifier': 'http://youtube.com/watch?v=rYEDA3JcQqw', @@ -422,12 +422,12 @@ def test_misc_tests(self): 'category': 'Music', 'description': '72bfd9472e59a8f48b83af36197ebcf5d2227609', 'bestsize': 41334333, - 'all streams': 27, + 'all streams': 30, 'normal streams': 6, 'video streams': 19, - 'audio streams': 2, - 'ogg streams': 1, - 'm4a streams': 1, + 'audio streams': 5, + 'ogg streams': 2, + 'm4a streams': 3, } ] From 6c63b45368f80ce2083a79a0af57cf0323cc4ffe Mon Sep 17 00:00:00 2001 From: np1 Date: Mon, 29 Sep 2014 01:52:59 +1000 Subject: [PATCH 08/22] Python 2.6 ElementTree compatibility --- pafy/pafy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pafy/pafy.py b/pafy/pafy.py index b572ce10..982b8e0a 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -277,14 +277,14 @@ def _extract_dash(dashurl): dbg("Fetching dash page") dashdata = fetch_decode(dashurl) dbg("DASH list fetched") - ns = {"dash": "urn:mpeg:DASH:schema:MPD:2011", - "yt": "http://youtube.com/yt/2012/10/10"} + ns = "{urn:mpeg:DASH:schema:MPD:2011}" + # "yt": "http://youtube.com/yt/2012/10/10"} tree = ElementTree.fromstring(dashdata) - tlist = tree.findall(".//dash:Representation", namespaces=ns) + tlist = tree.findall(".//%sRepresentation" % ns) dashmap = [] for x in tlist: - baseurl = x.find("dash:BaseURL", namespaces=ns) + baseurl = x.find("%sBaseURL" % ns) url = baseurl.text size = baseurl.items()[0][1] # be more specific, don't rely on pos bitrate = x.get("bandwidth") From 68adf218179b2a380dee3cf8d5d920f0ce29ec75 Mon Sep 17 00:00:00 2001 From: np1 Date: Mon, 29 Sep 2014 02:07:24 +1000 Subject: [PATCH 09/22] Update tests --- tests/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test.py b/tests/test.py index 6d075d2f..5eec7ddb 100644 --- a/tests/test.py +++ b/tests/test.py @@ -107,7 +107,7 @@ def test_generate_filename_with_meta(self): ', Dizzee Rascal') self.assertEqual(a.notes, '') self.assertEqual(a.filename, 'Jessie J - WILD (Official) ft. Big Sean' - ', Dizzee Rascal.ogg') + ', Dizzee Rascal.m4a') @stdout_to_null def test_pafy_download(self): @@ -121,7 +121,7 @@ def test_pafy_download(self): @stdout_to_null def test_pafy_download_resume(self): """ Test resuming a partial download. """ - tempname = "WASTE 2 SECONDS OF YOUR LIFE-DsAn_n6O5Ns-139.m4a.temp" + tempname = "WASTE 2 SECONDS OF YOUR LIFE-DsAn_n6O5Ns-141.m4a.temp" with open(tempname, "w") as ladeeda: ladeeda.write("abc") vid = pafy.new("DsAn_n6O5Ns", gdata=True, basic=False, signature=False) From 956fec2b600596070517ded2f90ce7d51be38366 Mon Sep 17 00:00:00 2001 From: np1 Date: Mon, 29 Sep 2014 02:30:24 +1000 Subject: [PATCH 10/22] Update version number --- docs-sphinx/conf.py | 4 ++-- pafy/pafy.py | 2 +- setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs-sphinx/conf.py b/docs-sphinx/conf.py index 5bb5c7d5..b3a0d8a4 100644 --- a/docs-sphinx/conf.py +++ b/docs-sphinx/conf.py @@ -56,9 +56,9 @@ # built documents. # # The short X.Y version. -version = '0.3.62' +version = '0.3.63' # The full version, including alpha/beta/rc tags. -release = '0.3.62' +release = '0.3.63' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pafy/pafy.py b/pafy/pafy.py index 982b8e0a..3ffb0914 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -27,7 +27,7 @@ from __future__ import unicode_literals -__version__ = "0.3.62" +__version__ = "0.3.63" __author__ = "nagev" __license__ = "GPLv3" diff --git a/setup.py b/setup.py index 8f865401..3db49938 100755 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ name='Pafy', packages=['pafy'], scripts=['scripts/ytdl'], - version='0.3.62', + version='0.3.63', description="Retrieve YouTube content and metadata", keywords=["Pafy", "API", "YouTube", "youtube", "download", "video"], author="nagev", From a580da7fb88a4151df6c259f7141bddea6cdb47a Mon Sep 17 00:00:00 2001 From: np1 Date: Mon, 29 Sep 2014 07:23:07 +1000 Subject: [PATCH 11/22] Increase stream count --- tests/test.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/tests/test.py b/tests/test.py index 5eec7ddb..af31aca4 100644 --- a/tests/test.py +++ b/tests/test.py @@ -58,6 +58,8 @@ def runOnce(self): del _ for playlist in Test.playlists: + + playlist['fetched'] = pafy.get_playlist(playlist['identifier']) Test.hasrun = True @@ -87,11 +89,6 @@ def test_make_url_no_sig(self): args = dict(raw="a=b&c=d", sig=None, quick=False) self.assertRaises(IOError, pafy._make_url, **args) - def test_no_matching_stream(self): - """ Test no matching stream found. """ - smap = dict() - self.assertRaises(IOError, pafy._get_matching_stream, smap, None) - def test_generate_filename_with_meta(self): """ Use meta argument to generate filename. """ if Test.quick: @@ -341,9 +338,9 @@ def test_misc_tests(self): 'category': 'Education', 'description': '1223db22b4a38d0a8ebfcafb549f40c39af26251', 'bestsize': 54284129, - 'all streams': 17, + 'all streams': 18, 'normal streams': 5, - 'video streams': 7, + 'video streams': 8, 'audio streams': 5, 'ogg streams': 2, 'm4a streams': 3, @@ -361,9 +358,9 @@ def test_misc_tests(self): 'category': 'Music', 'description': 'fa34f2704be9c1b21949af515e813f644f14b89a', 'bestsize': 101836539, - 'all streams': 24, + 'all streams': 25, 'normal streams': 6, - 'video streams': 13, + 'video streams': 14, 'audio streams': 5, 'ogg streams': 2, 'm4a streams': 3, @@ -381,9 +378,9 @@ def test_misc_tests(self): 'category': 'Music', 'description': '55e8e6e2b219712bf94d67c2434530474a503265', 'bestsize': 79885533, - 'all streams': 24, + 'all streams': 25, 'normal streams': 6, - 'video streams': 13, + 'video streams': 14, 'audio streams': 5, 'ogg streams': 2, 'm4a streams': 3, @@ -402,9 +399,9 @@ def test_misc_tests(self): 'category': 'People', 'description': '3c884d9791be15646ddf351edffcb2dd22ec70f8', 'bestsize': 101083389, - 'all streams': 22, + 'all streams': 23, 'normal streams': 6, - 'video streams': 11, + 'video streams': 12, 'audio streams': 5, 'ogg streams': 2, 'm4a streams': 3, @@ -422,9 +419,9 @@ def test_misc_tests(self): 'category': 'Music', 'description': '72bfd9472e59a8f48b83af36197ebcf5d2227609', 'bestsize': 41334333, - 'all streams': 30, + 'all streams': 31, 'normal streams': 6, - 'video streams': 19, + 'video streams': 20, 'audio streams': 5, 'ogg streams': 2, 'm4a streams': 3, From de1f5ef3788c7baf5aaf8c21f713d3495bfb7d77 Mon Sep 17 00:00:00 2001 From: np1 Date: Mon, 29 Sep 2014 07:35:35 +1000 Subject: [PATCH 12/22] Added itag 278 --- pafy/pafy.py | 49 +++++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/pafy/pafy.py b/pafy/pafy.py index 3ffb0914..38f85c5a 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -108,34 +108,32 @@ def new(url, basic=True, gdata=False, signature=True, size=False, callback=lambda x: None): """ Return a new pafy instance given a url or video id. + NOTE: The signature argument has been deprecated and now has no effect, + it will be removed in a forthcoming version. + Optional arguments: basic - fetch basic metadata and streams gdata - fetch gdata info (upload date, description, category) size - fetch the size of each stream (slow)(decrypts urls if needed) callback - a callback function to receive status strings - If any of the first four above arguments are False, those data items will + If any of the first three above arguments are False, those data items will be fetched only when first called for. The defaults are recommended for most cases. If you wish to create - many video objects at once, you may want to set all to False, eg: + many video objects at once, you may want to set basic to False, eg: - video = pafy.new(basic=False, signature=False) + video = pafy.new(basic=False) This will be quick because no http requests will be made on initialisation. - Setting signature or size to True will override the basic argument - and force basic data to be fetched too (basic data is required to - obtain Stream objects and determine whether signatures are encrypted. - - Similarly, setting size to true will force the signature data to be - fetched if the videos have encrypted signatures, so will override the - value set in the signature argument. + Setting size to True will override the basic argument and force basic data + to be fetched too (basic data is required to obtain Stream objects. """ if not signature: # pylint: disable=W0104 - logging.warn("Use of signature flag has been deprecated.") + logging.warning("Use of signature flag has been deprecated.") return Pafy(url, basic, gdata, signature, size, callback) @@ -244,9 +242,9 @@ class g(object): '219': ('854x480', 'webm', 'video', 'VP8'), '242': ('360x240', 'webm', 'video', 'VP9'), '243': ('480x360', 'webm', 'video', 'VP9'), - '244': ('640x480', 'webm', 'video', 'VP9'), - '245': ('640x480', 'webm', 'video', 'VP9'), - '246': ('640x480', 'webm', 'video', 'VP9'), + '244': ('640x480', 'webm', 'video', 'VP9 low'), + '245': ('640x480', 'webm', 'video', 'VP9 med'), + '246': ('640x480', 'webm', 'video', 'VP9 high'), '247': ('720x480', 'webm', 'video', 'VP9'), '248': ('1920x1080', 'webm', 'video', 'VP9'), '249': ('48k', 'ogg', 'audio', 'Opus'), @@ -256,7 +254,8 @@ class g(object): '258': ('320k', 'm4a', 'audio', '6-channel'), '264': ('2560x1440', 'm4v', 'video', ''), '271': ('1920x1280', 'webm', 'video', 'VP9'), - '272': ('3414x1080', 'webm', 'video', 'VP9') + '272': ('3414x1080', 'webm', 'video', 'VP9'), + '278': ('256x144', 'webm', 'video', 'VP9'), } @@ -562,13 +561,17 @@ def fetch_cached(url, encoding=None, dbg_ref=""): if os.path.exists(cached_filename): dbg("fetched %s from cache", dbg_ref) - return open(cached_filename).read() + with open(cached_filename) as O: + return O.read() else: data = fetch_decode(url, "utf8") # unicode dbg("Fetched %s", dbg_ref) new.callback("Fetched %s" % dbg_ref) - open(cached_filename, "w").write(data) + + with open(cached_filename, "w") as W: + W.write(data) + return data @@ -628,7 +631,7 @@ def _make_url(raw, sig, quick=True): if "signature=" not in raw: - if not sig: + if sig is None: raise IOError("Error retrieving url") raw += "&signature=" + sig @@ -636,16 +639,6 @@ def _make_url(raw, sig, quick=True): return raw -def _get_matching_stream(smap, itag): - """ Return the url and signature for a stream matching itag in smap. """ - for x in smap: - - if x['itag'] == itag: - return x['url'], x.get('s') - - raise IOError("Error fetching stream") - - class Stream(object): """ YouTube video stream class. """ From bca5755773c8d1df46f483dfc02a4324f5e41ca6 Mon Sep 17 00:00:00 2001 From: np1 Date: Tue, 30 Sep 2014 17:29:08 +1000 Subject: [PATCH 13/22] Tidyup --- pafy/pafy.py | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/pafy/pafy.py b/pafy/pafy.py index 38f85c5a..9540ebae 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -3,7 +3,7 @@ """ pafy.py. -Python library to retrieve YouTube content and metadata +Python library to download YouTube content and retrieve metadata https://github.com/np1/pafy @@ -132,8 +132,7 @@ def new(url, basic=True, gdata=False, signature=True, size=False, """ if not signature: - # pylint: disable=W0104 - logging.warning("Use of signature flag has been deprecated.") + logging.warning("signature argument has been deprecated.") return Pafy(url, basic, gdata, signature, size, callback) @@ -213,10 +212,6 @@ class g(object): '44': ('854x480', 'webm', "normal", ''), '45': ('1280x720', 'webm', "normal", ''), '46': ('1920x1080', 'webm', "normal", ''), - - # '59': ('1x1', 'mp4', 'normal', ''), - # '78': ('1x1', 'mp4', 'normal', ''), - '82': ('640x360-3D', 'mp4', "normal", ''), '83': ('640x480-3D', 'mp4', 'normal', ''), '84': ('1280x720-3D', 'mp4', "normal", ''), @@ -277,7 +272,7 @@ def _extract_dash(dashurl): dashdata = fetch_decode(dashurl) dbg("DASH list fetched") ns = "{urn:mpeg:DASH:schema:MPD:2011}" - # "yt": "http://youtube.com/yt/2012/10/10"} + ytns = "{http://youtube.com/yt/2012/10/10}" tree = ElementTree.fromstring(dashdata) tlist = tree.findall(".//%sRepresentation" % ns) dashmap = [] @@ -285,7 +280,7 @@ def _extract_dash(dashurl): for x in tlist: baseurl = x.find("%sBaseURL" % ns) url = baseurl.text - size = baseurl.items()[0][1] # be more specific, don't rely on pos + size = baseurl.attrib["%scontentLength" % ytns] bitrate = x.get("bandwidth") itag = uni(x.get("id")) width = uni(x.get("width")) @@ -548,7 +543,7 @@ def _decodesig(sig, js_url): return solved -def fetch_cached(url, encoding=None, dbg_ref=""): +def fetch_cached(url, encoding=None, dbg_ref="", file_prefix=""): """ Fetch url - from tmpdir if already retrieved. """ # TODO: prune cache dir tmpdir = os.path.join(tempfile.gettempdir(), "pafy") @@ -557,20 +552,21 @@ def fetch_cached(url, encoding=None, dbg_ref=""): os.makedirs(tmpdir) url_md5 = hashlib.md5(url.encode("utf8")).hexdigest() - cached_filename = os.path.join(tmpdir, url_md5) + cached_filename = os.path.join(tmpdir, file_prefix + url_md5) if os.path.exists(cached_filename): dbg("fetched %s from cache", dbg_ref) - with open(cached_filename) as O: - return O.read() + + with open(cached_filename) as f: + return f.read() else: data = fetch_decode(url, "utf8") # unicode dbg("Fetched %s", dbg_ref) new.callback("Fetched %s" % dbg_ref) - with open(cached_filename, "w") as W: - W.write(data) + with open(cached_filename, "w") as f: + f.write(data) return data @@ -612,7 +608,8 @@ def get_js_sm(video_id): if not funcs: dbg("Fetching javascript") new.callback("Fetching javascript") - javascript = fetch_cached(js_url, encoding="utf8", dbg_ref="javascript") + javascript = fetch_cached(js_url, encoding="utf8", + dbg_ref="javascript", file_prefix="js-") mainfunc = _get_mainfunc_from_js(javascript) funcs = _get_other_funcs(mainfunc, javascript) funcs['mainfunction'] = mainfunc @@ -793,19 +790,15 @@ def url(self): if not self._url: if self._parent.age: - if self._sig: + if self._sig: s = self._sig self._sig = s[2:63] + s[82] + s[64:82] + s[63] - self._url = _make_url(self._rawurl, self._sig) - elif self.encrypted: - sig = _decodesig(self._sig, self._parent.js_url) - self._url = _make_url(self._rawurl, sig) + self._sig = _decodesig(self._sig, self._parent.js_url) - else: - self._url = _make_url(self._rawurl, self._sig) + self._url = _make_url(self._rawurl, self._sig) return self._url @@ -1360,7 +1353,7 @@ def populate_from_playlist(self, pl_data): self.playlist_meta = pl_data -def get_playlist(playlist_url, basic=False, gdata=False, signature=False, +def get_playlist(playlist_url, basic=False, gdata=False, signature=True, size=False, callback=lambda x: None): """ Return a dict containing Pafy objects from a YouTube Playlist. From f5dde3bb23f37df3379902075e21d5943a5d3a4b Mon Sep 17 00:00:00 2001 From: np1 Date: Tue, 30 Sep 2014 17:44:23 +1000 Subject: [PATCH 14/22] Remux audio downloads (gh-55 and gh-41) This adds the remux parameter to the Stream download function. If remux=True and ffmpeg or avconv are available, audio downloads will be remuxed to fix some compatibility issues with the downloaded file. --- pafy/pafy.py | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/pafy/pafy.py b/pafy/pafy.py index 2611411a..ddae3f81 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -543,6 +543,35 @@ def _decodesig(sig, js_url): return solved +def remux(infile, outfile, quiet=False, muxer="ffmpeg"): + """ Remux audio. """ + from subprocess import call, STDOUT + muxer = muxer if isinstance(muxer, str) else "ffmpeg" + + for tool in set([muxer, "ffmpeg", "avconv"]): + cmd = [tool, "-y", "-i", infile, "-acodec", "copy", "-vn", outfile] + + try: + with open(os.devnull, "w") as devnull: + call(cmd, stdout=devnull, stderr=STDOUT) + + except OSError: + dbg("Failed to remux audio using %s", tool) + + else: + os.unlink(infile) + dbg("remuxed audio file using %s" % tool) + + if not quiet: + sys.stdout.write("\nAudio remuxed.\n") + + break + + else: + logging.warning("Failed to remux audio") + os.rename(infile, outfile) + + def fetch_cached(url, encoding=None, dbg_ref="", file_prefix=""): """ Fetch url - from tmpdir if already retrieved. """ # TODO: prune cache dir @@ -834,10 +863,11 @@ def cancel(self): return True def download(self, filepath="", quiet=False, callback=lambda *x: None, - meta=False): + meta=False, remux_audio=False): """ Download. Use quiet=True to supress output. Return filename. Use meta=True to append video id and itag to generated filename + Use remax_audio=True to remux audio file downloads """ # pylint: disable=R0912,R0914 @@ -909,14 +939,19 @@ def download(self, filepath="", quiet=False, callback=lambda *x: None, callback(total, *progress_stats) if self._active: - os.rename(temp_filepath, filepath) + + if remux_audio and self.mediatype == "audio": + remux(temp_filepath, filepath, quiet=quiet, muxer=remux_audio) + + else: + os.rename(temp_filepath, filepath) + return filepath - else: + else: # download incomplete, return temp filepath outfh.close() return temp_filepath - class Pafy(object): """ Class to represent a YouTube video. """ From 00ef6d1bf0fa723e1726b82b80d2bf4e427f3879 Mon Sep 17 00:00:00 2001 From: np1 Date: Tue, 30 Sep 2014 17:44:23 +1000 Subject: [PATCH 15/22] Remux audio downloads (gh-55 and gh-41) This adds the remux_audio parameter to the Stream download function. If remux_audio=True and ffmpeg or avconv are available, audio downloads will be remuxed to fix some compatibility issues with the downloaded file. --- pafy/pafy.py | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/pafy/pafy.py b/pafy/pafy.py index 2611411a..ddae3f81 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -543,6 +543,35 @@ def _decodesig(sig, js_url): return solved +def remux(infile, outfile, quiet=False, muxer="ffmpeg"): + """ Remux audio. """ + from subprocess import call, STDOUT + muxer = muxer if isinstance(muxer, str) else "ffmpeg" + + for tool in set([muxer, "ffmpeg", "avconv"]): + cmd = [tool, "-y", "-i", infile, "-acodec", "copy", "-vn", outfile] + + try: + with open(os.devnull, "w") as devnull: + call(cmd, stdout=devnull, stderr=STDOUT) + + except OSError: + dbg("Failed to remux audio using %s", tool) + + else: + os.unlink(infile) + dbg("remuxed audio file using %s" % tool) + + if not quiet: + sys.stdout.write("\nAudio remuxed.\n") + + break + + else: + logging.warning("Failed to remux audio") + os.rename(infile, outfile) + + def fetch_cached(url, encoding=None, dbg_ref="", file_prefix=""): """ Fetch url - from tmpdir if already retrieved. """ # TODO: prune cache dir @@ -834,10 +863,11 @@ def cancel(self): return True def download(self, filepath="", quiet=False, callback=lambda *x: None, - meta=False): + meta=False, remux_audio=False): """ Download. Use quiet=True to supress output. Return filename. Use meta=True to append video id and itag to generated filename + Use remax_audio=True to remux audio file downloads """ # pylint: disable=R0912,R0914 @@ -909,14 +939,19 @@ def download(self, filepath="", quiet=False, callback=lambda *x: None, callback(total, *progress_stats) if self._active: - os.rename(temp_filepath, filepath) + + if remux_audio and self.mediatype == "audio": + remux(temp_filepath, filepath, quiet=quiet, muxer=remux_audio) + + else: + os.rename(temp_filepath, filepath) + return filepath - else: + else: # download incomplete, return temp filepath outfh.close() return temp_filepath - class Pafy(object): """ Class to represent a YouTube video. """ From 3f186f2fd878e1f613dd08be07f02ce4a0981c51 Mon Sep 17 00:00:00 2001 From: np1 Date: Sun, 19 Oct 2014 01:02:41 +1100 Subject: [PATCH 16/22] prune temp files --- pafy/pafy.py | 50 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/pafy/pafy.py b/pafy/pafy.py index ddae3f81..8a5b051b 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -109,7 +109,7 @@ def new(url, basic=True, gdata=False, signature=True, size=False, """ Return a new pafy instance given a url or video id. NOTE: The signature argument has been deprecated and now has no effect, - it will be removed in a forthcoming version. + it will be removed in a future version. Optional arguments: basic - fetch basic metadata and streams @@ -132,7 +132,8 @@ def new(url, basic=True, gdata=False, signature=True, size=False, """ if not signature: - logging.warning("signature argument has been deprecated.") + logging.warning("signature argument has no effect and will be removed" + " in a future version.") return Pafy(url, basic, gdata, signature, size, callback) @@ -268,6 +269,7 @@ def _extract_smap(map_name, dic, zero_idx=True): def _extract_dash(dashurl): """ Download dash url and extract some data. """ + # pylint: disable = R0914 dbg("Fetching dash page") dashdata = fetch_decode(dashurl) dbg("DASH list fetched") @@ -568,13 +570,12 @@ def remux(infile, outfile, quiet=False, muxer="ffmpeg"): break else: - logging.warning("Failed to remux audio") + logging.warning("audio remux failed") os.rename(infile, outfile) def fetch_cached(url, encoding=None, dbg_ref="", file_prefix=""): """ Fetch url - from tmpdir if already retrieved. """ - # TODO: prune cache dir tmpdir = os.path.join(tempfile.gettempdir(), "pafy") if not os.path.exists(tmpdir): @@ -587,7 +588,9 @@ def fetch_cached(url, encoding=None, dbg_ref="", file_prefix=""): dbg("fetched %s from cache", dbg_ref) with open(cached_filename) as f: - return f.read() + retval = f.read() + + return retval else: data = fetch_decode(url, "utf8") # unicode @@ -597,9 +600,39 @@ def fetch_cached(url, encoding=None, dbg_ref="", file_prefix=""): with open(cached_filename, "w") as f: f.write(data) + # prune files after write + prune_files(tmpdir, file_prefix) return data +def prune_files(path, prefix="", age_max=3600 * 24 * 14, count_max=4): + """ Remove oldest files from path that start with prefix. + + remove files older than age_max, leave maximum of count_max files. + """ + tempfiles = [] + + if not os.path.isdir(path): + return + + for f in os.listdir(path): + filepath = os.path.join(path, f) + + if os.path.isfile(filepath) and f.startswith(prefix): + age = time.time() - os.path.getmtime(filepath) + + if age > age_max: + os.unlink(filepath) + + else: + tempfiles.append((filepath, age)) + + tempfiles = sorted(tempfiles, key=lambda x: x[1], reverse=True) + + for f in tempfiles[:-count_max]: + os.unlink(f[0]) + + def get_js_sm(video_id): """ Fetch watchinfo page and extract stream map and js funcs if not known. @@ -952,6 +985,7 @@ def download(self, filepath="", quiet=False, callback=lambda *x: None, outfh.close() return temp_filepath + class Pafy(object): """ Class to represent a YouTube video. """ @@ -1020,8 +1054,10 @@ def fetch_basic(self): return self._fetch_basic() - # Ensure ciphertag matches url type - assert self.ciphertag is ('s' in self.sm[0]) + + if not self.ciphertag is ('s' in self.sm[0]): + logging.warning("ciphertag doesn't match signature type") + logging.warning(self.videoid) if self.ciphertag: dbg("Encrypted signature detected.") From 5b5d8a6fd928a5c1707587618facb8ad7880126b Mon Sep 17 00:00:00 2001 From: np1 Date: Sun, 19 Oct 2014 01:06:03 +1100 Subject: [PATCH 17/22] update README examples --- README.rst | 48 +++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/README.rst b/README.rst index 23d0ab2b..d54798fc 100644 --- a/README.rst +++ b/README.rst @@ -136,13 +136,13 @@ Download video and show progress: >>> best.download(quiet=False) 3,734,976 Bytes [0.20%] received. Rate: [ 719 KB/s]. ETA: [3284 secs] -Download video, use specific filepath: +Download video, use specific directory and/or filename: .. code-block:: pycon - >>> myfilename = "/tmp/" + best.title + "." + best.extension - >>> best.download(filepath=myfilename) + >>> filename = best.download(filepath="/tmp/") + >>> filename = best.download(filepath="/tmp/Game." + best.extension) Get audio-only streams (m4a and/or ogg vorbis): @@ -152,8 +152,11 @@ Get audio-only streams (m4a and/or ogg vorbis): >>> for a in audiostreams: ... print(a.bitrate, a.extension, a.get_filesize()) ... - 128k m4a 165076649 + 256k m4a 331379079 + 192k ogg 172524223 + 128k m4a 166863001 128k ogg 108981120 + 48k m4a 62700449 Download the 2nd audio stream from the above list: @@ -168,7 +171,7 @@ Get the best quality audio stream: >>> bestaudio = video.getbestaudio() >>> bestaudio.bitrate - '128k' + '256' Download the best quality audio file: @@ -176,7 +179,7 @@ Download the best quality audio file: >>> bestaudio.download() -show ALL formats for a video (video+audio, video-only and audio-only): +show all media types for a video (video+audio, video-only and audio-only): .. code-block:: pycon @@ -184,6 +187,7 @@ show ALL formats for a video (video+audio, video-only and audio-only): >>> for s in allstreams: ... print(s.mediatype, s.extension, s.quality) ... + normal mp4 1280x720 normal webm 640x360 normal mp4 640x360 @@ -191,16 +195,20 @@ show ALL formats for a video (video+audio, video-only and audio-only): normal 3gp 320x240 normal 3gp 176x144 video m4v 1280x720 - video webm 720x480 + video webm 1280x720 video m4v 854x480 - video webm 640x480 + video webm 854x480 video m4v 640x360 - video webm 480x360 + video webm 640x360 video m4v 426x240 - video webm 360x240 + video webm 426x240 video m4v 256x144 + video webm 256x144 + audio m4a 256k + audio ogg 192k audio m4a 128k audio ogg 128k + audio m4a 48k Installation @@ -288,14 +296,16 @@ list available dowload streams: Stream Type Format Quality Size ------ ---- ------ ------- ---- - 1 normal webm [640x360] 33 MB - 2 normal mp4 [640x360] 24 MB - 3 normal flv [320x240] 13 MB - 4 normal 3gp [320x240] 10 MB - 5 normal 3gp [176x144] 3 MB - 6 audio m4a [48k] 2 MB - 7 audio m4a [128k] 5 MB - 8 audio m4a [256k] 10 MB + 1 normal webm [640x360] 33 MB + 2 normal mp4 [640x360] 23 MB + 3 normal flv [320x240] 14 MB + 4 normal 3gp [320x240] 9 MB + 5 normal 3gp [176x144] 3 MB + 6 audio m4a [48k] 2 MB + 7 audio m4a [128k] 5 MB + 8 audio ogg [128k] 5 MB + 9 audio ogg [192k] 7 MB + 10 audio m4a [256k] 10 MB Download mp4 640x360 (ie. stream number 2): @@ -308,4 +318,4 @@ Download m4a audio stream at 256k bitrate: .. code-block:: bash - $ ytdl -n8 cyMHZVT91Dw + $ ytdl -n10 cyMHZVT91Dw From 600fa94a005a28820ea4ec6bd331952b090eee10 Mon Sep 17 00:00:00 2001 From: np1 Date: Sun, 19 Oct 2014 01:06:38 +1100 Subject: [PATCH 18/22] Update version number for docs --- docs-sphinx/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs-sphinx/conf.py b/docs-sphinx/conf.py index b3a0d8a4..f339eea0 100644 --- a/docs-sphinx/conf.py +++ b/docs-sphinx/conf.py @@ -56,9 +56,9 @@ # built documents. # # The short X.Y version. -version = '0.3.63' +version = '0.3.64' # The full version, including alpha/beta/rc tags. -release = '0.3.63' +release = '0.3.64' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From f44b3bd8ab5fe0a2008994c17d4ab453d481b61a Mon Sep 17 00:00:00 2001 From: np1 Date: Sun, 19 Oct 2014 01:07:17 +1100 Subject: [PATCH 19/22] Update tests, number of streams --- tests/test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test.py b/tests/test.py index af31aca4..f64f84c6 100644 --- a/tests/test.py +++ b/tests/test.py @@ -122,7 +122,7 @@ def test_pafy_download_resume(self): with open(tempname, "w") as ladeeda: ladeeda.write("abc") vid = pafy.new("DsAn_n6O5Ns", gdata=True, basic=False, signature=False) - vstream = vid.audiostreams[-1].download(meta=True) + vstream = vid.audiostreams[-1].download(meta=True, remux_audio=True) name = "WASTE 2 SECONDS OF YOUR LIFE.m4a" self.assertEqual(12880, os.stat(name).st_size) @@ -358,9 +358,9 @@ def test_misc_tests(self): 'category': 'Music', 'description': 'fa34f2704be9c1b21949af515e813f644f14b89a', 'bestsize': 101836539, - 'all streams': 25, + 'all streams': 23, 'normal streams': 6, - 'video streams': 14, + 'video streams': 12, 'audio streams': 5, 'ogg streams': 2, 'm4a streams': 3, @@ -378,9 +378,9 @@ def test_misc_tests(self): 'category': 'Music', 'description': '55e8e6e2b219712bf94d67c2434530474a503265', 'bestsize': 79885533, - 'all streams': 25, + 'all streams': 23, 'normal streams': 6, - 'video streams': 14, + 'video streams': 12, 'audio streams': 5, 'ogg streams': 2, 'm4a streams': 3, @@ -419,9 +419,9 @@ def test_misc_tests(self): 'category': 'Music', 'description': '72bfd9472e59a8f48b83af36197ebcf5d2227609', 'bestsize': 41334333, - 'all streams': 31, + 'all streams': 29, 'normal streams': 6, - 'video streams': 20, + 'video streams': 18, 'audio streams': 5, 'ogg streams': 2, 'm4a streams': 3, From 1ca21173954a077621f53fa51583233d519b57d3 Mon Sep 17 00:00:00 2001 From: np1 Date: Sun, 19 Oct 2014 01:10:23 +1100 Subject: [PATCH 20/22] Update docs for download function; deprecate signature argument --- docs-sphinx/index.rst | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/docs-sphinx/index.rst b/docs-sphinx/index.rst index 01e7a03f..93f8eb13 100644 --- a/docs-sphinx/index.rst +++ b/docs-sphinx/index.rst @@ -2,7 +2,7 @@ Pafy Documentation ****************** .. module:: Pafy -This is the documentation for Pafy - a Python library for retrieving content and metadata from YouTube +This is the documentation for Pafy - a Python library to download YouTube content and retrieve metadata A quick start intro with usage examples is available in the `README `_ @@ -39,13 +39,13 @@ Create a Pafy object using the :func:`pafy.new` function, giving a YouTube video :type basic: bool :param gdata: fetch gdata info (upload date, description, category, username, likes, dislikes) :type gdata: bool - :param signature: fetch data required to decrypt urls, if encrypted + :param signature: Note: The signature argument now has no effect and will be removed in a future version :type signature: bool :param size: fetch the size of each stream (slow)(decrypts urls if needed) :type size: bool :param callback: a callback function to receive status strings :type callback: function - :rtype: Pafy object + :rtype: :class:`pafy.Pafy` If any of **basic**, **gdata**, **signature** or **size** are *False*, those data items will be fetched only when first called for. @@ -302,7 +302,7 @@ Stream Attributes .. attribute:: Stream.threed - Whether the stream is a 3D video (*boolean*) + True if the stream is a 3D video (*boolean*) .. attribute:: Stream.title @@ -331,24 +331,30 @@ An example of accessing Stream attributes:: Stream Methods -------------- + + + .. function:: Stream.get_filesize() Returns the filesize of a stream -.. function:: Stream.download([filepath=""][, quiet=False][, callback=None]) +.. function:: Stream.download([filepath=""][, quiet=False][, callback=None][, meta=False][, remux_audio=False]) - Downloads the stream object + Downloads the stream object, returns the path of the downloaded file. - :param filepath: The filepath to use to save the stream, defaults to *title.extension* if ommitted + :param filepath: The filepath to use to save the stream, defaults to (sanitised) *title.extension* if ommitted :type filepath: string - :param quiet: Whether to supress output of the download progress + :param quiet: If True, supress output of the download progress :type quiet: boolean :param callback: Call back function to use for receiving download progress :type callback: function or None + :param meta: If True, video id and itag are appended to filename + :type meta: bool + :param remux_audio: If True, remux audio file downloads (fixes some compatibility issues with file format, requires ffmpeg/avconv) + :type remux_audio: bool + :rtype: str - If a callback function is provided, it will be called repeatedly for each - chunk downloaded. It must be a function that takes five arguments. These - are: + If a callback function is provided, it will be called repeatedly for each chunk downloaded. It must be a function that takes the following five arguments; - total bytes in stream, *int* - total bytes downloaded, *int* @@ -366,10 +372,9 @@ Example of using stream.download():: v = pafy.new("cyMHZVT91Dw") s = v.getbest() print("Size is %s" % s.get_filesize()) - s.download() + filename = s.download() # starts download -Will download the file to the current working directory with the filename -*title.extension* (eg. "cute kittens.mp4") and output the following progress statistics:: +Will download to the current working directory and output the following progress statistics:: Size is 34775366 1,015,808 Bytes [2.92%] received. Rate: [ 640 kbps]. ETA: [51 secs] From 6f9f21fb9db33a05f8e43f10cac24127dbae1b6c Mon Sep 17 00:00:00 2001 From: np1 Date: Sun, 19 Oct 2014 01:11:22 +1100 Subject: [PATCH 21/22] Update changelog --- CHANGELOG | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 11df32cf..f2db1c84 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,9 +1,21 @@ +Development version - not yet released +Version 0.3.63 + +[Update] - Cache javascript file locally +[Update] - Added itag 278 (low-res VP9) +[Feature] - Retrieve DASH streams (more streams, higher quality audio) +[Feature] - added remux_audio parameter to download function to remux audio + files if ffmpeg or avconv is installed (better compatibility) + +------------------------------------------------------------------------------- + 27 August 2014 Version 0.3.62 [Feature] - Added url_https property to return https url ------------------------------------------------------------------------------- + 15 August 2014 Version 0.3.60 From f01e04c046d386a06ae0c3cf04fd49873b4b1fc5 Mon Sep 17 00:00:00 2001 From: np1 Date: Sun, 19 Oct 2014 01:28:49 +1100 Subject: [PATCH 22/22] Update version number to 0.3.64 --- CHANGELOG | 6 +++--- pafy/pafy.py | 2 +- setup.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index f2db1c84..ce9e6443 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,7 +1,7 @@ -Development version - not yet released -Version 0.3.63 +19 October 2014 +Version 0.3.64 -[Update] - Cache javascript file locally +[Update] - Cache javascript file locally to temp dir (less fetching) [Update] - Added itag 278 (low-res VP9) [Feature] - Retrieve DASH streams (more streams, higher quality audio) [Feature] - added remux_audio parameter to download function to remux audio diff --git a/pafy/pafy.py b/pafy/pafy.py index 8a5b051b..8efbb6e7 100644 --- a/pafy/pafy.py +++ b/pafy/pafy.py @@ -27,7 +27,7 @@ from __future__ import unicode_literals -__version__ = "0.3.63" +__version__ = "0.3.64" __author__ = "nagev" __license__ = "GPLv3" diff --git a/setup.py b/setup.py index 3db49938..58d0b772 100755 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ name='Pafy', packages=['pafy'], scripts=['scripts/ytdl'], - version='0.3.63', + version='0.3.64', description="Retrieve YouTube content and metadata", keywords=["Pafy", "API", "YouTube", "youtube", "download", "video"], author="nagev",