aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/__init__.py4
-rw-r--r--youtube_dl/extractor/adultswim.py139
-rw-r--r--youtube_dl/extractor/allocine.py4
-rw-r--r--youtube_dl/extractor/ard.py111
-rw-r--r--youtube_dl/extractor/comedycentral.py6
-rw-r--r--youtube_dl/extractor/cracked.py61
-rw-r--r--youtube_dl/extractor/dfb.py44
-rw-r--r--youtube_dl/extractor/firedrive.py1
-rw-r--r--youtube_dl/extractor/francetv.py4
-rw-r--r--youtube_dl/extractor/generic.py2
-rw-r--r--youtube_dl/extractor/livestream.py9
-rw-r--r--youtube_dl/extractor/mlb.py102
-rw-r--r--youtube_dl/extractor/npo.py2
-rw-r--r--youtube_dl/extractor/redtube.py4
-rw-r--r--youtube_dl/extractor/rtbf.py2
-rw-r--r--youtube_dl/extractor/sapo.py119
-rw-r--r--youtube_dl/extractor/tenplay.py2
-rw-r--r--youtube_dl/extractor/youtube.py476
18 files changed, 573 insertions, 519 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index faf473548..a4c7c713a 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -1,5 +1,6 @@
from .academicearth import AcademicEarthCourseIE
from .addanime import AddAnimeIE
+from .adultswim import AdultSwimIE
from .aftonbladet import AftonbladetIE
from .anitube import AnitubeIE
from .aol import AolIE
@@ -63,6 +64,7 @@ from .dailymotion import (
DailymotionUserIE,
)
from .daum import DaumIE
+from .dfb import DFBIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
from .drtv import DRTVIE
@@ -171,6 +173,7 @@ from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mixcloud import MixcloudIE
+from .mlb import MLBIE
from .mpora import MporaIE
from .mofosex import MofosexIE
from .mooshare import MooshareIE
@@ -250,6 +253,7 @@ from .rutube import (
RutubePersonIE,
)
from .rutv import RUTVIE
+from .sapo import SapoIE
from .savefrom import SaveFromIE
from .scivee import SciVeeIE
from .screencast import ScreencastIE
diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py
new file mode 100644
index 000000000..a00bfcb35
--- /dev/null
+++ b/youtube_dl/extractor/adultswim.py
@@ -0,0 +1,139 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+class AdultSwimIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.adultswim\.com/(?P<path>.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$'
+ _TEST = {
+ 'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title',
+ 'playlist': [
+ {
+ 'md5': '4da359ec73b58df4575cd01a610ba5dc',
+ 'info_dict': {
+ 'id': '8a250ba1450996e901453d7f02ca02f5',
+ 'ext': 'flv',
+ 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1',
+ 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
+ 'uploader': 'Rick and Morty',
+ 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
+ }
+ },
+ {
+ 'md5': 'ffbdf55af9331c509d95350bd0cc1819',
+ 'info_dict': {
+ 'id': '8a250ba1450996e901453d7f4bd102f6',
+ 'ext': 'flv',
+ 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2',
+ 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
+ 'uploader': 'Rick and Morty',
+ 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
+ }
+ },
+ {
+ 'md5': 'b92409635540304280b4b6c36bd14a0a',
+ 'info_dict': {
+ 'id': '8a250ba1450996e901453d7fa73c02f7',
+ 'ext': 'flv',
+ 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3',
+ 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
+ 'uploader': 'Rick and Morty',
+ 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
+ }
+ },
+ {
+ 'md5': 'e8818891d60e47b29cd89d7b0278156d',
+ 'info_dict': {
+ 'id': '8a250ba1450996e901453d7fc8ba02f8',
+ 'ext': 'flv',
+ 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4',
+ 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?',
+ 'uploader': 'Rick and Morty',
+ 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg'
+ }
+ }
+ ]
+ }
+
+ _video_extensions = {
+ '3500': 'flv',
+ '640': 'mp4',
+ '150': 'mp4',
+ 'ipad': 'm3u8',
+ 'iphone': 'm3u8'
+ }
+ _video_dimensions = {
+ '3500': (1280, 720),
+ '640': (480, 270),
+ '150': (320, 180)
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_path = mobj.group('path')
+
+ webpage = self._download_webpage(url, video_path)
+ episode_id = self._html_search_regex(r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', webpage, 'episode_id')
+ title = self._og_search_title(webpage)
+
+ index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id
+ idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index')
+
+ episode_el = idoc.find('.//episode')
+ show_title = episode_el.attrib.get('collectionTitle')
+ episode_title = episode_el.attrib.get('title')
+ thumbnail = episode_el.attrib.get('thumbnailUrl')
+ description = episode_el.find('./description').text.strip()
+
+ entries = []
+ segment_els = episode_el.findall('./segments/segment')
+
+ for part_num, segment_el in enumerate(segment_els):
+ segment_id = segment_el.attrib.get('id')
+ segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1)
+ thumbnail = segment_el.attrib.get('thumbnailUrl')
+ duration = segment_el.attrib.get('duration')
+
+ segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id
+ idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information')
+
+ formats = []
+ file_els = idoc.findall('.//files/file')
+
+ for file_el in file_els:
+ bitrate = file_el.attrib.get('bitrate')
+ type = file_el.attrib.get('type')
+ width, height = self._video_dimensions.get(bitrate, (None, None))
+ formats.append({
+ 'format_id': '%s-%s' % (bitrate, type),
+ 'url': file_el.text,
+ 'ext': self._video_extensions.get(bitrate, 'mp4'),
+ # The bitrate may not be a number (for example: 'iphone')
+ 'tbr': int(bitrate) if bitrate.isdigit() else None,
+ 'height': height,
+ 'width': width
+ })
+
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': segment_id,
+ 'title': segment_title,
+ 'formats': formats,
+ 'uploader': show_title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'description': description
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': episode_id,
+ 'display_id': video_path,
+ 'entries': entries,
+ 'title': '%s %s' % (show_title, episode_title),
+ 'description': description,
+ 'thumbnail': thumbnail
+ }
diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py
index 34f0cd49b..7bd797884 100644
--- a/youtube_dl/extractor/allocine.py
+++ b/youtube_dl/extractor/allocine.py
@@ -32,7 +32,7 @@ class AllocineIE(InfoExtractor):
'id': '19540403',
'ext': 'mp4',
'title': 'Planes 2 Bande-annonce VF',
- 'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d',
+ 'description': 'md5:eeaffe7c2d634525e21159b93acf3b1e',
'thumbnail': 're:http://.*\.jpg',
},
}, {
@@ -42,7 +42,7 @@ class AllocineIE(InfoExtractor):
'id': '19544709',
'ext': 'mp4',
'title': 'Dragons 2 - Bande annonce finale VF',
- 'description': 'md5:e74a4dc750894bac300ece46c7036490',
+ 'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac',
'thumbnail': 're:http://.*\.jpg',
},
}]
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index b36a4d46a..30a85c8c1 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -7,23 +7,32 @@ from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
+ qualities,
)
class ARDIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
+ _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
- _TEST = {
- 'url': 'http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786',
- 'file': '19288786.mp4',
- 'md5': '515bf47ce209fb3f5a61b7aad364634c',
+ _TESTS = [{
+ 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
+ 'file': '22429276.mp4',
+ 'md5': '469751912f1de0816a9fc9df8336476c',
'info_dict': {
- 'title': 'Edward Snowden im Interview - Held oder Verräter?',
- 'description': 'Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdcberwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend.',
- 'thumbnail': 'http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037',
+ 'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?',
+ 'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014',
},
'skip': 'Blocked outside of Germany',
- }
+ }, {
+ 'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',
+ 'info_dict': {
+ 'id': '22490580',
+ 'ext': 'mp4',
+ 'title': 'Das Wunder von Wolbeck (Video tgl. ab 20 Uhr)',
+ 'description': 'Auf einem restaurierten Hof bei Wolbeck wird der Heilpraktiker Raffael Lembeck eines morgens von seiner Frau Stella tot aufgefunden. Das Opfer war offensichtlich in seiner Praxis zu Fall gekommen und ist dann verblutet, erklärt Prof. Boerne am Tatort.',
+ },
+ 'skip': 'Blocked outside of Germany',
+ }]
def _real_extract(self, url):
# determine video id from url
@@ -43,40 +52,64 @@ class ARDIE(InfoExtractor):
r'<h4 class="headline">(.*?)</h4>'],
webpage, 'title')
description = self._html_search_meta(
- 'dcterms.abstract', webpage, 'description')
- thumbnail = self._og_search_thumbnail(webpage)
-
-
- media_info = self._download_json(
- 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
- # The second element of the _mediaArray contains the standard http urls
- streams = media_info['_mediaArray'][1]['_mediaStreamArray']
- if not streams:
- if '"fsk"' in webpage:
- raise ExtractorError('This video is only available after 20:00')
-
- formats = []
-
- for s in streams:
- if type(s['_stream']) == list:
- for index, url in enumerate(s['_stream'][::-1]):
- quality = s['_quality'] + index
- formats.append({
- 'quality': quality,
- 'url': url,
- 'format_id': '%s-%s' % (determine_ext(url), quality)
+ 'dcterms.abstract', webpage, 'description', default=None)
+ if description is None:
+ description = self._html_search_meta(
+ 'description', webpage, 'meta description')
+
+ # Thumbnail is sometimes not present.
+ # It is in the mobile version, but that seems to use a different URL
+ # structure altogether.
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+ media_streams = re.findall(r'''(?x)
+ mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
+ "([^"]+)"''', webpage)
+
+ if media_streams:
+ QUALITIES = qualities(['lo', 'hi', 'hq'])
+ formats = []
+ for furl in set(media_streams):
+ if furl.endswith('.f4m'):
+ fid = 'f4m'
+ else:
+ fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
+ fid = fid_m.group(1) if fid_m else None
+ formats.append({
+ 'quality': QUALITIES(fid),
+ 'format_id': fid,
+ 'url': furl,
+ })
+ else: # request JSON file
+ media_info = self._download_json(
+ 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
+ # The second element of the _mediaArray contains the standard http urls
+ streams = media_info['_mediaArray'][1]['_mediaStreamArray']
+ if not streams:
+ if '"fsk"' in webpage:
+ raise ExtractorError('This video is only available after 20:00')
+
+ formats = []
+ for s in streams:
+ if type(s['_stream']) == list:
+ for index, url in enumerate(s['_stream'][::-1]):
+ quality = s['_quality'] + index
+ formats.append({
+ 'quality': quality,
+ 'url': url,
+ 'format_id': '%s-%s' % (determine_ext(url), quality)
})
- continue
+ continue
- format = {
- 'quality': s['_quality'],
- 'url': s['_stream'],
- }
+ format = {
+ 'quality': s['_quality'],
+ 'url': s['_stream'],
+ }
- format['format_id'] = '%s-%s' % (
- determine_ext(format['url']), format['quality'])
+ format['format_id'] = '%s-%s' % (
+ determine_ext(format['url']), format['quality'])
- formats.append(format)
+ formats.append(format)
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index 8af0abade..c81ce5a96 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -14,13 +14,13 @@ from ..utils import (
class ComedyCentralIE(MTVServicesInfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/
- (video-clips|episodes|cc-studios|video-collections)
+ _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
+ (video-clips|episodes|cc-studios|video-collections|full-episodes)
/(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TEST = {
- 'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
+ 'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
'info_dict': {
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py
index 37c0f7ffb..74b880ffc 100644
--- a/youtube_dl/extractor/cracked.py
+++ b/youtube_dl/extractor/cracked.py
@@ -1,23 +1,26 @@
-# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ str_to_int,
+)
+
class CrackedIE(InfoExtractor):
- _VALID_URL = r'http?://.*?\.cracked\.com/video_+(?P<id>.*)_.*'
+ _VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html'
_TEST = {
- 'url': 'http://www.cracked.com/video_18803_4-social-criticisms-hidden-in-sonic-hedgehog-games.html',
-
+ 'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
+ 'md5': '4b29a5eeec292cd5eca6388c7558db9e',
'info_dict': {
- 'id': '18803',
+ 'id': '19006',
'ext': 'mp4',
- 'title': "4 Social Criticisms Hidden in 'Sonic the Hedgehog' Games | Cracked.com",
- 'height': 375,
- 'width': 666,
-
-
+ 'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies',
+ 'description': 'md5:3b909e752661db86007d10e5ec2df769',
+ 'timestamp': 1405659600,
+ 'upload_date': '20140718',
}
}
@@ -26,21 +29,37 @@ class CrackedIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- title = self._search_regex(r'<title>(.*?)</title>',webpage,'title')
- video_url = self._search_regex(r'var CK_vidSrc = "+(.*)"',webpage,'url')
- width = self._search_regex(r'width="(.*?)"',webpage,'width')
- height = re.findall(r'height="(.*?)"',webpage)[1]
+ video_url = self._html_search_regex(
+ [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL')
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ timestamp = self._html_search_regex(r'<time datetime="([^"]+)"', webpage, 'upload date', fatal=False)
+ if timestamp:
+ timestamp = parse_iso8601(timestamp[:-6])
- return {
- 'url':video_url,
- 'id': video_id,
- 'ext':'mp4',
- 'title':title,
- 'height':int(height),
- 'width':int(width)
+ view_count = str_to_int(self._html_search_regex(
+ r'<span class="views" id="viewCounts">([\d,\.]+) Views</span>', webpage, 'view count', fatal=False))
+ comment_count = str_to_int(self._html_search_regex(
+ r'<span id="commentCounts">([\d,\.]+)</span>', webpage, 'comment count', fatal=False))
+ m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url)
+ if m:
+ width = int(m.group('width'))
+ height = int(m.group('height'))
+ else:
+ width = height = None
+ return {
+ 'id': video_id,
+ 'url':video_url,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'height': height,
+ 'width': width,
} \ No newline at end of file
diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py
new file mode 100644
index 000000000..cb8e06822
--- /dev/null
+++ b/youtube_dl/extractor/dfb.py
@@ -0,0 +1,44 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class DFBIE(InfoExtractor):
+ IE_NAME = 'tv.dfb.de'
+ _VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/',
+ # The md5 is different each time
+ 'info_dict': {
+ 'id': '9070',
+ 'ext': 'flv',
+ 'title': 'Highlights des Empfangs in Berlin',
+ 'upload_date': '20140716',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ player_info = self._download_xml(
+ 'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,
+ video_id)
+ video_info = player_info.find('video')
+
+ f4m_info = self._download_xml(video_info.find('url').text, video_id)
+ token_el = f4m_info.find('token')
+ manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
+
+ return {
+ 'id': video_id,
+ 'title': video_info.find('title').text,
+ 'url': manifest_url,
+ 'ext': 'flv',
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]),
+ }
diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py
index d26145db1..6d73c8a4a 100644
--- a/youtube_dl/extractor/firedrive.py
+++ b/youtube_dl/extractor/firedrive.py
@@ -8,7 +8,6 @@ from ..utils import (
ExtractorError,
compat_urllib_parse,
compat_urllib_request,
- determine_ext,
)
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index f3e0f38b7..1fbe6d175 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -48,7 +48,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetvinfo.fr'
- _VALID_URL = r'https?://www\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
+ _VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
_TESTS = [{
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
@@ -211,7 +211,7 @@ class GenerationQuoiIE(InfoExtractor):
class CultureboxIE(FranceTVBaseInfoExtractor):
IE_NAME = 'culturebox.francetvinfo.fr'
- _VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
+ _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
_TEST = {
'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index f97b59845..9db27f9aa 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -402,7 +402,7 @@ class GenericIE(InfoExtractor):
elif default_search == 'error':
raise ExtractorError(
('%r is not a valid URL. '
- 'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
+ 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
) % (url, url), expected=True)
else:
assert ':' in default_search
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index 2c100d424..1ea1bbab4 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -28,11 +28,13 @@ class LivestreamIE(InfoExtractor):
}
def _extract_video_info(self, video_data):
- video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url')
+ video_url = (
+ video_data.get('progressive_url_hd') or
+ video_data.get('progressive_url')
+ )
return {
'id': compat_str(video_data['id']),
'url': video_url,
- 'ext': 'mp4',
'title': video_data['caption'],
'thumbnail': video_data['thumbnail_url'],
'upload_date': video_data['updated_at'].replace('-', '')[:8],
@@ -50,7 +52,8 @@ class LivestreamIE(InfoExtractor):
r'window.config = ({.*?});', webpage, 'window config')
info = json.loads(config_json)['event']
videos = [self._extract_video_info(video_data['data'])
- for video_data in info['feed']['data'] if video_data['type'] == 'video']
+ for video_data in info['feed']['data']
+ if video_data['type'] == 'video']
return self.playlist_result(videos, info['id'], info['full_name'])
else:
og_video = self._og_search_video_url(webpage, 'player url')
diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py
new file mode 100644
index 000000000..c28be3a7d
--- /dev/null
+++ b/youtube_dl/extractor/mlb.py
@@ -0,0 +1,102 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+ find_xpath_attr,
+)
+
+
+class MLBIE(InfoExtractor):
+ _VALID_URL = r'https?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
+ 'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
+ 'info_dict': {
+ 'id': '34496663',
+ 'ext': 'mp4',
+ 'title': 'Stanton prepares for Derby',
+ 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
+ 'duration': 46,
+ 'timestamp': 1405105800,
+ 'upload_date': '20140711',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby',
+ 'md5': '0e6e73d509321e142409b695eadd541f',
+ 'info_dict': {
+ 'id': '34578115',
+ 'ext': 'mp4',
+ 'title': 'Cespedes repeats as Derby champ',
+ 'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
+ 'duration': 488,
+ 'timestamp': 1405399936,
+ 'upload_date': '20140715',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance',
+ 'md5': 'b8fd237347b844365d74ea61d4245967',
+ 'info_dict': {
+ 'id': '34577915',
+ 'ext': 'mp4',
+ 'title': 'Bautista on Home Run Derby',
+ 'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
+ 'duration': 52,
+ 'timestamp': 1405390722,
+ 'upload_date': '20140715',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ detail = self._download_xml(
+ 'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
+ % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
+
+ title = detail.find('./headline').text
+ description = detail.find('./big-blurb').text
+ duration = parse_duration(detail.find('./duration').text)
+ timestamp = parse_iso8601(detail.attrib['date'][:-5])
+
+ thumbnail = find_xpath_attr(
+ detail, './thumbnailScenarios/thumbnailScenario', 'type', '45').text
+
+ formats = []
+ for media_url in detail.findall('./url'):
+ playback_scenario = media_url.attrib['playback_scenario']
+ fmt = {
+ 'url': media_url.text,
+ 'format_id': playback_scenario,
+ }
+ m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)
+ if m:
+ fmt.update({
+ 'vbr': int(m.group('vbr')) * 1000,
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ formats.append(fmt)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index fbcbe1f40..12e85a716 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -32,7 +32,7 @@ class NPOIE(InfoExtractor):
'http://e.omroep.nl/metadata/aflevering/%s' % video_id,
video_id,
# We have to remove the javascript callback
- transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j)
+ transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//.*$', r'\1', j)
)
token_page = self._download_webpage(
'http://ida.omroep.nl/npoplayer/i.js',
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py
index 4295cf93a..d1e12dd8d 100644
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -35,9 +35,7 @@ class RedTubeIE(InfoExtractor):
r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
webpage, u'title')
- video_thumbnail = self._html_search_regex(
- r'playerInnerHTML.+?<img\s+src="(.+?)"',
- webpage, u'thumbnail', fatal=False)
+ video_thumbnail = self._og_search_thumbnail(webpage)
# No self-labeling, but they describe themselves as
# "Home of Videos Porno"
diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py
index 205f8a167..dce64e151 100644
--- a/youtube_dl/extractor/rtbf.py
+++ b/youtube_dl/extractor/rtbf.py
@@ -30,7 +30,7 @@ class RTBFIE(InfoExtractor):
page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
data = json.loads(self._html_search_regex(
- r'<div class="js-player-embed" data-video="([^"]+)"', page, 'data video'))['data']
+ r'<div class="js-player-embed(?: player-embed)?" data-video="([^"]+)"', page, 'data video'))['data']
video_url = data.get('downloadUrl') or data.get('url')
diff --git a/youtube_dl/extractor/sapo.py b/youtube_dl/extractor/sapo.py
new file mode 100644
index 000000000..172cc1275
--- /dev/null
+++ b/youtube_dl/extractor/sapo.py
@@ -0,0 +1,119 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ unified_strdate,
+)
+
+
+class SapoIE(InfoExtractor):
+ IE_DESC = 'SAPO Vídeos'
+ _VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})'
+
+ _TESTS = [
+ {
+ 'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi',
+ 'md5': '79ee523f6ecb9233ac25075dee0eda83',
+ 'note': 'SD video',
+ 'info_dict': {
+ 'id': 'UBz95kOtiWYUMTA5Ghfi',
+ 'ext': 'mp4',
+ 'title': 'Benfica - Marcas na Hitória',
+ 'description': 'md5:c9082000a128c3fd57bf0299e1367f22',
+ 'duration': 264,
+ 'uploader': 'tiago_1988',
+ 'upload_date': '20080229',
+ 'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'],
+ },
+ },
+ {
+ 'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF',
+ 'md5': '90a2f283cfb49193fe06e861613a72aa',
+ 'note': 'HD video',
+ 'info_dict': {
+ 'id': 'IyusNAZ791ZdoCY5H5IF',
+ 'ext': 'mp4',
+ 'title': 'Codebits VII - Report',
+ 'description': 'md5:6448d6fd81ce86feac05321f354dbdc8',
+ 'duration': 144,
+ 'uploader': 'codebits',
+ 'upload_date': '20140427',
+ 'categories': ['codebits', 'codebits2014'],
+ },
+ },
+ {
+ 'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz',
+ 'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac',
+ 'note': 'v2 video',
+ 'info_dict': {
+ 'id': 'yLqjzPtbTimsn2wWBKHz',
+ 'ext': 'mp4',
+ 'title': 'Hipnose Condicionativa 4',
+ 'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40',
+ 'duration': 692,
+ 'uploader': 'sapozen',
+ 'upload_date': '20090609',
+ 'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'],
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ item = self._download_xml(
+ 'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item')
+
+ title = item.find('./title').text
+ description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text
+ thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url')
+ duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text)
+ uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text
+ upload_date = unified_strdate(item.find('./pubDate').text)
+ view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text)
+ comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text)
+ tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text
+ categories = tags.split() if tags else []
+ age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0
+
+ video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text
+ video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x')
+
+ formats = [{
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'format_id': 'sd',
+ 'width': int(video_size[0]),
+ 'height': int(video_size[1]),
+ }]
+
+ if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true':
+ formats.append({
+ 'url': re.sub(r'/mov/1$', '/mov/39', video_url),
+ 'ext': 'mp4',
+ 'format_id': 'hd',
+ 'width': 1280,
+ 'height': 720,
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py
index 8477840fc..81ba169fb 100644
--- a/youtube_dl/extractor/tenplay.py
+++ b/youtube_dl/extractor/tenplay.py
@@ -1,8 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6123e1256..072e711c2 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1,19 +1,17 @@
# coding: utf-8
-import collections
import errno
import io
import itertools
import json
import os.path
import re
-import struct
import traceback
-import zlib
from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..jsinterp import JSInterpreter
+from ..swfinterp import SWFInterpreter
from ..utils import (
compat_chr,
compat_parse_qs,
@@ -347,8 +345,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self.to_screen(u'RTMP download detected')
def _extract_signature_function(self, video_id, player_url, slen):
- id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
- player_url)
+ id_m = re.match(
+ r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3)?\.(?P<ext>[a-z]+)$',
+ player_url)
player_type = id_m.group('ext')
player_id = id_m.group('id')
@@ -449,417 +448,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return lambda s: initial_function([s])
def _parse_sig_swf(self, file_contents):
- if file_contents[1:3] != b'WS':
- raise ExtractorError(
- u'Not an SWF file; header is %r' % file_contents[:3])
- if file_contents[:1] == b'C':
- content = zlib.decompress(file_contents[8:])
- else:
- raise NotImplementedError(u'Unsupported compression format %r' %
- file_contents[:1])
-
- def extract_tags(content):
- pos = 0
- while pos < len(content):
- header16 = struct.unpack('<H', content[pos:pos+2])[0]
- pos += 2
- tag_code = header16 >> 6
- tag_len = header16 & 0x3f
- if tag_len == 0x3f:
- tag_len = struct.unpack('<I', content[pos:pos+4])[0]
- pos += 4
- assert pos+tag_len <= len(content)
- yield (tag_code, content[pos:pos+tag_len])
- pos += tag_len
-
- code_tag = next(tag
- for tag_code, tag in extract_tags(content)
- if tag_code == 82)
- p = code_tag.index(b'\0', 4) + 1
- code_reader = io.BytesIO(code_tag[p:])
-
- # Parse ABC (AVM2 ByteCode)
- def read_int(reader=None):
- if reader is None:
- reader = code_reader
- res = 0
- shift = 0
- for _ in range(5):
- buf = reader.read(1)
- assert len(buf) == 1
- b = struct.unpack('<B', buf)[0]
- res = res | ((b & 0x7f) << shift)
- if b & 0x80 == 0:
- break
- shift += 7
- return res
-
- def u30(reader=None):
- res = read_int(reader)
- assert res & 0xf0000000 == 0
- return res
- u32 = read_int
-
- def s32(reader=None):
- v = read_int(reader)
- if v & 0x80000000 != 0:
- v = - ((v ^ 0xffffffff) + 1)
- return v
-
- def read_string(reader=None):
- if reader is None:
- reader = code_reader
- slen = u30(reader)
- resb = reader.read(slen)
- assert len(resb) == slen
- return resb.decode('utf-8')
-
- def read_bytes(count, reader=None):
- if reader is None:
- reader = code_reader
- resb = reader.read(count)
- assert len(resb) == count
- return resb
-
- def read_byte(reader=None):
- resb = read_bytes(1, reader=reader)
- res = struct.unpack('<B', resb)[0]
- return res
-
- # minor_version + major_version
- read_bytes(2 + 2)
-
- # Constant pool
- int_count = u30()
- for _c in range(1, int_count):
- s32()
- uint_count = u30()
- for _c in range(1, uint_count):
- u32()
- double_count = u30()
- read_bytes((double_count-1) * 8)
- string_count = u30()
- constant_strings = [u'']
- for _c in range(1, string_count):
- s = read_string()
- constant_strings.append(s)
- namespace_count = u30()
- for _c in range(1, namespace_count):
- read_bytes(1) # kind
- u30() # name
- ns_set_count = u30()
- for _c in range(1, ns_set_count):
- count = u30()
- for _c2 in range(count):
- u30()
- multiname_count = u30()
- MULTINAME_SIZES = {
- 0x07: 2, # QName
- 0x0d: 2, # QNameA
- 0x0f: 1, # RTQName
- 0x10: 1, # RTQNameA
- 0x11: 0, # RTQNameL
- 0x12: 0, # RTQNameLA
- 0x09: 2, # Multiname
- 0x0e: 2, # MultinameA
- 0x1b: 1, # MultinameL
- 0x1c: 1, # MultinameLA
- }
- multinames = [u'']
- for _c in range(1, multiname_count):
- kind = u30()
- assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
- if kind == 0x07:
- u30() # namespace_idx
- name_idx = u30()
- multinames.append(constant_strings[name_idx])
- else:
- multinames.append('[MULTINAME kind: %d]' % kind)
- for _c2 in range(MULTINAME_SIZES[kind]):
- u30()
-
- # Methods
- method_count = u30()
- MethodInfo = collections.namedtuple(
- 'MethodInfo',
- ['NEED_ARGUMENTS', 'NEED_REST'])
- method_infos = []
- for method_id in range(method_count):
- param_count = u30()
- u30() # return type
- for _ in range(param_count):
- u30() # param type
- u30() # name index (always 0 for youtube)
- flags = read_byte()
- if flags & 0x08 != 0:
- # Options present
- option_count = u30()
- for c in range(option_count):
- u30() # val
- read_bytes(1) # kind
- if flags & 0x80 != 0:
- # Param names present
- for _ in range(param_count):
- u30() # param name
- mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
- method_infos.append(mi)
-
- # Metadata
- metadata_count = u30()
- for _c in range(metadata_count):
- u30() # name
- item_count = u30()
- for _c2 in range(item_count):
- u30() # key
- u30() # value
-
- def parse_traits_info():
- trait_name_idx = u30()
- kind_full = read_byte()
- kind = kind_full & 0x0f
- attrs = kind_full >> 4
- methods = {}
- if kind in [0x00, 0x06]: # Slot or Const
- u30() # Slot id
- u30() # type_name_idx
- vindex = u30()
- if vindex != 0:
- read_byte() # vkind
- elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
- u30() # disp_id
- method_idx = u30()
- methods[multinames[trait_name_idx]] = method_idx
- elif kind == 0x04: # Class
- u30() # slot_id
- u30() # classi
- elif kind == 0x05: # Function
- u30() # slot_id
- function_idx = u30()
- methods[function_idx] = multinames[trait_name_idx]
- else:
- raise ExtractorError(u'Unsupported trait kind %d' % kind)
-
- if attrs & 0x4 != 0: # Metadata present
- metadata_count = u30()
- for _c3 in range(metadata_count):
- u30() # metadata index
-
- return methods
-
- # Classes
+ swfi = SWFInterpreter(file_contents)
TARGET_CLASSNAME = u'SignatureDecipher'
- searched_idx = multinames.index(TARGET_CLASSNAME)
- searched_class_id = None
- class_count = u30()
- for class_id in range(class_count):
- name_idx = u30()
- if name_idx == searched_idx:
- # We found the class we're looking for!
- searched_class_id = class_id
- u30() # super_name idx
- flags = read_byte()
- if flags & 0x08 != 0: # Protected namespace is present
- u30() # protected_ns_idx
- intrf_count = u30()
- for _c2 in range(intrf_count):
- u30()
- u30() # iinit
- trait_count = u30()
- for _c2 in range(trait_count):
- parse_traits_info()
-
- if searched_class_id is None:
- raise ExtractorError(u'Target class %r not found' %
- TARGET_CLASSNAME)
-
- method_names = {}
- method_idxs = {}
- for class_id in range(class_count):
- u30() # cinit
- trait_count = u30()
- for _c2 in range(trait_count):
- trait_methods = parse_traits_info()
- if class_id == searched_class_id:
- method_names.update(trait_methods.items())
- method_idxs.update(dict(
- (idx, name)
- for name, idx in trait_methods.items()))
-
- # Scripts
- script_count = u30()
- for _c in range(script_count):
- u30() # init
- trait_count = u30()
- for _c2 in range(trait_count):
- parse_traits_info()
-
- # Method bodies
- method_body_count = u30()
- Method = collections.namedtuple('Method', ['code', 'local_count'])
- methods = {}
- for _c in range(method_body_count):
- method_idx = u30()
- u30() # max_stack
- local_count = u30()
- u30() # init_scope_depth
- u30() # max_scope_depth
- code_length = u30()
- code = read_bytes(code_length)
- if method_idx in method_idxs:
- m = Method(code, local_count)
- methods[method_idxs[method_idx]] = m
- exception_count = u30()
- for _c2 in range(exception_count):
- u30() # from
- u30() # to
- u30() # target
- u30() # exc_type
- u30() # var_name
- trait_count = u30()
- for _c2 in range(trait_count):
- parse_traits_info()
-
- assert p + code_reader.tell() == len(code_tag)
- assert len(methods) == len(method_idxs)
-
- method_pyfunctions = {}
-
- def extract_function(func_name):
- if func_name in method_pyfunctions:
- return method_pyfunctions[func_name]
- if func_name not in methods:
- raise ExtractorError(u'Cannot find function %r' % func_name)
- m = methods[func_name]
-
- def resfunc(args):
- registers = ['(this)'] + list(args) + [None] * m.local_count
- stack = []
- coder = io.BytesIO(m.code)
- while True:
- opcode = struct.unpack('!B', coder.read(1))[0]
- if opcode == 36: # pushbyte
- v = struct.unpack('!B', coder.read(1))[0]
- stack.append(v)
- elif opcode == 44: # pushstring
- idx = u30(coder)
- stack.append(constant_strings[idx])
- elif opcode == 48: # pushscope
- # We don't implement the scope register, so we'll just
- # ignore the popped value
- stack.pop()
- elif opcode == 70: # callproperty
- index = u30(coder)
- mname = multinames[index]
- arg_count = u30(coder)
- args = list(reversed(
- [stack.pop() for _ in range(arg_count)]))
- obj = stack.pop()
- if mname == u'split':
- assert len(args) == 1
- assert isinstance(args[0], compat_str)
- assert isinstance(obj, compat_str)
- if args[0] == u'':
- res = list(obj)
- else:
- res = obj.split(args[0])
- stack.append(res)
- elif mname == u'slice':
- assert len(args) == 1
- assert isinstance(args[0], int)
- assert isinstance(obj, list)
- res = obj[args[0]:]
- stack.append(res)
- elif mname == u'join':
- assert len(args) == 1
- assert isinstance(args[0], compat_str)
- assert isinstance(obj, list)
- res = args[0].join(obj)
- stack.append(res)
- elif mname in method_pyfunctions:
- stack.append(method_pyfunctions[mname](args))
- else:
- raise NotImplementedError(
- u'Unsupported property %r on %r'
- % (mname, obj))
- elif opcode == 72: # returnvalue
- res = stack.pop()
- return res
- elif opcode == 79: # callpropvoid
- index = u30(coder)
- mname = multinames[index]
- arg_count = u30(coder)
- args = list(reversed(
- [stack.pop() for _ in range(arg_count)]))
- obj = stack.pop()
- if mname == u'reverse':
- assert isinstance(obj, list)
- obj.reverse()
- else:
- raise NotImplementedError(
- u'Unsupported (void) property %r on %r'
- % (mname, obj))
- elif opcode == 93: # findpropstrict
- index = u30(coder)
- mname = multinames[index]
- res = extract_function(mname)
- stack.append(res)
- elif opcode == 97: # setproperty
- index = u30(coder)
- value = stack.pop()
- idx = stack.pop()
- obj = stack.pop()
- assert isinstance(obj, list)
- assert isinstance(idx, int)
- obj[idx] = value
- elif opcode == 98: # getlocal
- index = u30(coder)
- stack.append(registers[index])
- elif opcode == 99: # setlocal
- index = u30(coder)
- value = stack.pop()
- registers[index] = value
- elif opcode == 102: # getproperty
- index = u30(coder)
- pname = multinames[index]
- if pname == u'length':
- obj = stack.pop()
- assert isinstance(obj, list)
- stack.append(len(obj))
- else: # Assume attribute access
- idx = stack.pop()
- assert isinstance(idx, int)
- obj = stack.pop()
- assert isinstance(obj, list)
- stack.append(obj[idx])
- elif opcode == 128: # coerce
- u30(coder)
- elif opcode == 133: # coerce_s
- assert isinstance(stack[-1], (type(None), compat_str))
- elif opcode == 164: # modulo
- value2 = stack.pop()
- value1 = stack.pop()
- res = value1 % value2
- stack.append(res)
- elif opcode == 208: # getlocal_0
- stack.append(registers[0])
- elif opcode == 209: # getlocal_1
- stack.append(registers[1])
- elif opcode == 210: # getlocal_2
- stack.append(registers[2])
- elif opcode == 211: # getlocal_3
- stack.append(registers[3])
- elif opcode == 214: # setlocal_2
- registers[2] = stack.pop()
- elif opcode == 215: # setlocal_3
- registers[3] = stack.pop()
- else:
- raise NotImplementedError(
- u'Unsupported opcode %d' % opcode)
-
- method_pyfunctions[func_name] = resfunc
- return resfunc
-
- initial_function = extract_function(u'decipher')
+ searched_class = swfi.extract_class(TARGET_CLASSNAME)
+ initial_function = swfi.extract_function(searched_class, u'decipher')
return lambda s: initial_function([s])
def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
@@ -1014,14 +606,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube
- data = compat_urllib_parse.urlencode({'video_id': video_id,
- 'el': 'player_embedded',
- 'gl': 'US',
- 'hl': 'en',
- 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
- 'asv': 3,
- 'sts':'1588',
- })
+ data = compat_urllib_parse.urlencode({
+ 'video_id': video_id,
+ 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
+ 'sts':'16268',
+ })
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
video_info_webpage = self._download_webpage(video_info_url, video_id,
note=False,
@@ -1220,31 +809,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
url += '&signature=' + url_data['sig'][0]
elif 's' in url_data:
encrypted_sig = url_data['s'][0]
+
+ if not age_gate:
+ jsplayer_url_json = self._search_regex(
+ r'"assets":.+?"js":\s*("[^"]+")',
+ video_webpage, u'JS player URL')
+ player_url = json.loads(jsplayer_url_json)
+ if player_url is None:
+ player_url_json = self._search_regex(
+ r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+ video_webpage, u'age gate player URL')
+ player_url = json.loads(player_url_json)
+
if self._downloader.params.get('verbose'):
- if age_gate:
- if player_url is None:
- player_version = 'unknown'
- else:
+ if player_url is None:
+ player_version = 'unknown'
+ player_desc = 'unknown'
+ else:
+ if player_url.endswith('swf'):
player_version = self._search_regex(
- r'-(.+)\.swf$', player_url,
+ r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
u'flash player', fatal=False)
- player_desc = 'flash player %s' % player_version
- else:
- player_version = self._search_regex(
- r'html5player-(.+?)\.js', video_webpage,
- 'html5 player', fatal=False)
- player_desc = u'html5 player %s' % player_version
+ player_desc = 'flash player %s' % player_version
+ else:
+ player_version = self._search_regex(
+ r'html5player-(.+?)\.js', video_webpage,
+ 'html5 player', fatal=False)
+ player_desc = u'html5 player %s' % player_version
parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
- if not age_gate:
- jsplayer_url_json = self._search_regex(
- r'"assets":.+?"js":\s*("[^"]+")',
- video_webpage, u'JS player URL')
- player_url = json.loads(jsplayer_url_json)
-
signature = self._decrypt_signature(
encrypted_sig, video_id, player_url, age_gate)
url += '&signature=' + signature