diff options
| author | kitty <magicvidyakitty@gmail.com> | 2015-10-08 06:31:23 -0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2015-10-08 21:05:16 +0600 | 
| commit | b90b0c4ffa6f84a2ea5556d4df99de5f8ef2c7dd (patch) | |
| tree | 5dfa5ea52556990ac44e541206899aa3c5d0480d | |
| parent | c725e2c8b038307723a890db19f21176722a1fe7 (diff) | |
Fixed 4tube.com extractor to pull metadata from associated Javascript and not the HTML of the desired page.
| -rw-r--r-- | youtube_dl/extractor/fourtube.py | 11 | 
1 files changed, 6 insertions, 5 deletions
| diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py index 3bb4f6239..226ee67f0 100644 --- a/youtube_dl/extractor/fourtube.py +++ b/youtube_dl/extractor/fourtube.py @@ -45,11 +45,9 @@ class FourTubeIE(InfoExtractor):              'uploadDate', webpage))          thumbnail = self._html_search_meta('thumbnailUrl', webpage)          uploader_id = self._html_search_regex( -            r'<a class="img-avatar" href="[^"]+/channels/([^/"]+)" title="Go to [^"]+ page">', -            webpage, 'uploader id') +            r'<a class="img-avatar" href="[^"]+/users/([^/"]+)" title="Go to [^"]+ page">', webpage, 'uploader id')          uploader = self._html_search_regex( -            r'<a class="img-avatar" href="[^"]+/channels/[^/"]+" title="Go to ([^"]+) page">', -            webpage, 'uploader') +            r'<a class="img-avatar" href="[^"]+/users/[^/"]+" title="Go to ([^"]+) page">', webpage, 'uploader')          categories_html = self._search_regex(              r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="list">(.*?)</ul>', @@ -68,9 +66,12 @@ class FourTubeIE(InfoExtractor):              webpage, 'like count', fatal=False))          duration = parse_duration(self._html_search_meta('duration', webpage)) +        player_url = self._search_regex(r'<script id="playerembed" src="([^"]+)">',webpage,'player javascript') +        player_js = self._download_webpage(player_url,video_id,'Downloading player Javascript') +          params_js = self._search_regex(              r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)', -            webpage, 'initialization parameters' +            player_js, 'initialization parameters'          )          params = self._parse_json('[%s]' % params_js, video_id)          media_id = params[0] | 
