diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-02-09 17:56:10 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-02-09 17:56:10 +0100 | 
| commit | 81c2f20b5386d89a62dc27293654d75b77f47473 (patch) | |
| tree | 1c47e338c343d09884819f0f81544a45209f6dec | |
| parent | 1afe753462f0293122dc7a9b534b4f5cdb1e5c4e (diff) | |
[youtube] Correct invalid JSON (Fixes #2353)
| -rw-r--r-- | youtube_dl/extractor/common.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 10 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 6 | 
3 files changed, 15 insertions, 6 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2c0c75604..84fca8ba0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -271,8 +271,11 @@ class InfoExtractor(object):      def _download_json(self, url_or_request, video_id,                         note=u'Downloading JSON metadata', -                       errnote=u'Unable to download JSON metadata'): +                       errnote=u'Unable to download JSON metadata', +                       transform_source=None):          json_string = self._download_webpage(url_or_request, video_id, note, errnote) +        if transform_source: +            json_string = transform_source(json_string)          try:              return json.loads(json_string)          except ValueError as ve: diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e038c7752..18a92e101 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -34,6 +34,7 @@ from ..utils import (      unified_strdate,      orderedSet,      write_json_file, +    uppercase_escape,  )  class YoutubeBaseInfoExtractor(InfoExtractor): @@ -1590,11 +1591,10 @@ class YoutubeChannelIE(InfoExtractor):              # Download all channel pages using the json-based channel_ajax query              for pagenum in itertools.count(1):                  url = self._MORE_PAGES_URL % (pagenum, channel_id) -                page = self._download_webpage(url, channel_id, -                                              u'Downloading page #%s' % pagenum) -     -                page = json.loads(page) -     +                page = self._download_json( +                    url, channel_id, note=u'Downloading page #%s' % pagenum, +                    transform_source=uppercase_escape) +                  ids_in_page = self.extract_videos_from_page(page['content_html'])                  video_ids.extend(ids_in_page) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 01c8c017d..fa8f80e02 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1214,3 +1214,9 @@ class PagedList(object):              if end == nextfirstid:                  break          return res + + +def uppercase_escape(s): +    return re.sub( +        r'\\U([0-9a-fA-F]{8})', +        lambda m: compat_chr(int(m.group(1), base=16)), s) | 
