diff options
| author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-03-21 22:14:24 +0100 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-03-21 22:14:24 +0100 | 
| commit | 0f2a2ba14b2cbf4bd8bec0ce32f8d7c27a733c68 (patch) | |
| tree | a7dbab5be496c91b5b4727e1a6052587f064ec82 | |
| parent | e24b5a861035c185eacc86a14967e67474235ca7 (diff) | |
| parent | d95e35d659ea0dd2aa1dba3a4ee9925c25505fda (diff) | |
Merge remote-tracking branch 'dstftw/generic-webpage-unescape'
Conflicts:
	youtube_dl/extractor/generic.py
| -rw-r--r-- | youtube_dl/extractor/generic.py | 18 | 
1 files changed, 17 insertions, 1 deletions
| diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2254ade90..238913256 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -185,7 +185,18 @@ class GenericIE(InfoExtractor):                  'uploader': 'Ze Frank',                  'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',              } -        } +        }, +        # nowvideo embed hidden behind percent encoding +        { +            'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/', +            'md5': '2baf4ddd70f697d94b1c18cf796d5107', +            'info_dict': { +                'id': '06e53103ca9aa', +                'ext': 'flv', +                'title': 'Macross Episode 001  Watch Macross Episode 001 onl', +                'description': 'No description', +            }, +        },      ]      def report_download_webpage(self, video_id): @@ -337,6 +348,11 @@ class GenericIE(InfoExtractor):          except compat_xml_parse_error:              pass +        # Sometimes embedded video player is hidden behind percent encoding +        # (e.g. https://github.com/rg3/youtube-dl/issues/2448) +        # Unescaping the whole page allows to handle those cases in a generic way +        webpage = compat_urllib_parse.unquote(webpage) +          # it's tempting to parse this further, but you would          # have to take into account all the variations like          #   Video Title - Site Name | 
