aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CONTRIBUTING.md2
-rw-r--r--README.md2
-rw-r--r--docs/supportedsites.md21
-rw-r--r--test/test_subtitles.py13
-rw-r--r--youtube_dl/extractor/__init__.py20
-rw-r--r--youtube_dl/extractor/abc.py5
-rw-r--r--youtube_dl/extractor/adultswim.py6
-rw-r--r--youtube_dl/extractor/aftenposten.py23
-rw-r--r--youtube_dl/extractor/amp.py84
-rw-r--r--youtube_dl/extractor/appletrailers.py86
-rw-r--r--youtube_dl/extractor/arte.py20
-rw-r--r--youtube_dl/extractor/atresplayer.py123
-rw-r--r--youtube_dl/extractor/audimedia.py2
-rw-r--r--youtube_dl/extractor/audiomack.py2
-rw-r--r--youtube_dl/extractor/bleacherreport.py106
-rw-r--r--youtube_dl/extractor/bliptv.py290
-rw-r--r--youtube_dl/extractor/br.py127
-rw-r--r--youtube_dl/extractor/canal13cl.py48
-rw-r--r--youtube_dl/extractor/cinemassacre.py18
-rw-r--r--youtube_dl/extractor/comcarcoff.py24
-rw-r--r--youtube_dl/extractor/daum.py18
-rw-r--r--youtube_dl/extractor/dramafever.py69
-rw-r--r--youtube_dl/extractor/faz.py36
-rw-r--r--youtube_dl/extractor/flickr.py112
-rw-r--r--youtube_dl/extractor/foxnews.py73
-rw-r--r--youtube_dl/extractor/franceinter.py7
-rw-r--r--youtube_dl/extractor/generic.py18
-rw-r--r--youtube_dl/extractor/googledrive.py88
-rw-r--r--youtube_dl/extractor/imgur.py51
-rw-r--r--youtube_dl/extractor/instagram.py2
-rw-r--r--youtube_dl/extractor/jwplatform.py71
-rw-r--r--youtube_dl/extractor/makertv.py32
-rw-r--r--youtube_dl/extractor/pbs.py2
-rw-r--r--youtube_dl/extractor/periscope.py3
-rw-r--r--youtube_dl/extractor/soompi.py146
-rw-r--r--youtube_dl/extractor/tele13.py81
-rw-r--r--youtube_dl/extractor/twentyfourvideo.py39
-rw-r--r--youtube_dl/extractor/vgtv.py166
-rw-r--r--youtube_dl/extractor/viki.py24
-rw-r--r--youtube_dl/extractor/xstream.py14
-rw-r--r--youtube_dl/version.py2
41 files changed, 1131 insertions, 945 deletions
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f3fe0d432..d15267d7e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -28,7 +28,7 @@ So please elaborate on what feature you are requesting, or what bug you want to
- How it could be fixed
- How your proposed solution would look like
-If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
+If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
diff --git a/README.md b/README.md
index 7002f45e0..3a4707227 100644
--- a/README.md
+++ b/README.md
@@ -830,7 +830,7 @@ So please elaborate on what feature you are requesting, or what bug you want to
- How it could be fixed
- How your proposed solution would look like
-If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a commiter myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
+If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 8253335e3..1a5c7cde9 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -23,7 +23,6 @@
- **AdobeTVShow**
- **AdobeTVVideo**
- **AdultSwim**
- - **Aftenposten**
- **Aftonbladet**
- **AirMozilla**
- **AlJazeera**
@@ -34,7 +33,8 @@
- **Aparat**
- **AppleConnect**
- **AppleDaily**: 臺灣蘋果日報
- - **AppleTrailers**
+ - **appletrailers**
+ - **appletrailers:section**
- **archive.org**: archive.org videos
- **ARD**
- **ARD:mediathek**
@@ -65,9 +65,9 @@
- **Bet**
- **Bild**: Bild.de
- **BiliBili**
+ - **BleacherReport**
+ - **BleacherReportCMS**
- **blinkx**
- - **blip.tv:user**
- - **BlipTV**
- **Bloomberg**
- **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek
@@ -80,7 +80,6 @@
- **BYUtv**
- **Camdemy**
- **CamdemyFolder**
- - **Canal13cl**
- **canalc2.tv**
- **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv
- **CBS**
@@ -210,7 +209,9 @@
- **GodTube**
- **GoldenMoustache**
- **Golem**
+ - **GoogleDrive**
- **Goshgay**
+ - **GPUTechConf**
- **Groupon**
- **Hark**
- **HearThisAt**
@@ -252,6 +253,7 @@
- **Jove**
- **jpopsuki.tv**
- **Jukebox**
+ - **JWPlatform**
- **Kaltura**
- **KanalPlay**: Kanal 5/9/11 Play
- **Kankan**
@@ -292,6 +294,7 @@
- **m6**
- **macgamestore**: MacGameStore trailers
- **mailru**: Видео@Mail.Ru
+ - **MakerTV**
- **Malemotion**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
@@ -499,8 +502,6 @@
- **SnagFilmsEmbed**
- **Snotr**
- **Sohu**
- - **soompi**
- - **soompi:show**
- **soundcloud**
- **soundcloud:playlist**
- **soundcloud:search**: Soundcloud search
@@ -551,6 +552,7 @@
- **TechTalks**
- **techtv.mit.edu**
- **ted**
+ - **Tele13**
- **TeleBruxelles**
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
- **Telegraaf**
@@ -573,6 +575,7 @@
- **TMZ**
- **TMZArticle**
- **TNAFlix**
+ - **toggle**
- **tou.tv**
- **Toypics**: Toypics user profile
- **ToypicsUser**: Toypics user profile
@@ -622,7 +625,7 @@
- **Vessel**
- **Vesti**: Вести.Ru
- **Vevo**
- - **VGTV**: VGTV and BTTV
+ - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- **vh1.com**
- **Vice**
- **Viddler**
@@ -711,6 +714,7 @@
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
- **youtube:playlist**: YouTube.com playlists
+ - **youtube:playlists**: YouTube.com user/channel playlists
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
- **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first
@@ -718,7 +722,6 @@
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- - **youtube:user:playlists**: YouTube.com user playlists
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks**
- **ZDF**
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 75f0ea75f..9ed9fe622 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -11,7 +11,6 @@ from test.helper import FakeYDL, md5
from youtube_dl.extractor import (
- BlipTVIE,
YoutubeIE,
DailymotionIE,
TEDIE,
@@ -145,18 +144,6 @@ class TestTedSubtitles(BaseTestSubtitles):
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
-class TestBlipTVSubtitles(BaseTestSubtitles):
- url = 'http://blip.tv/a/a-6603250'
- IE = BlipTVIE
-
- def test_allsubtitles(self):
- self.DL.params['writesubtitles'] = True
- self.DL.params['allsubtitles'] = True
- subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['en']))
- self.assertEqual(md5(subtitles['en']), '5b75c300af65fe4476dff79478bb93e4')
-
-
class TestVimeoSubtitles(BaseTestSubtitles):
url = 'http://vimeo.com/76979871'
IE = VimeoIE
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 042b1e921..365c0b86f 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -15,7 +15,6 @@ from .adobetv import (
AdobeTVVideoIE,
)
from .adultswim import AdultSwimIE
-from .aftenposten import AftenpostenIE
from .aftonbladet import AftonbladetIE
from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE
@@ -26,7 +25,10 @@ from .aol import AolIE
from .allocine import AllocineIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
-from .appletrailers import AppleTrailersIE
+from .appletrailers import (
+ AppleTrailersIE,
+ AppleTrailersSectionIE,
+)
from .archiveorg import ArchiveOrgIE
from .ard import (
ARDIE,
@@ -61,8 +63,11 @@ from .beatportpro import BeatportProIE
from .bet import BetIE
from .bild import BildIE
from .bilibili import BiliBiliIE
+from .bleacherreport import (
+ BleacherReportIE,
+ BleacherReportCMSIE,
+)
from .blinkx import BlinkxIE
-from .bliptv import BlipTVIE, BlipTVUserIE
from .bloomberg import BloombergIE
from .bpb import BpbIE
from .br import BRIE
@@ -78,7 +83,6 @@ from .camdemy import (
CamdemyIE,
CamdemyFolderIE
)
-from .canal13cl import Canal13clIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .cbs import CBSIE
@@ -232,6 +236,7 @@ from .globo import (
from .godtube import GodTubeIE
from .goldenmoustache import GoldenMoustacheIE
from .golem import GolemIE
+from .googledrive import GoogleDriveIE
from .googleplus import GooglePlusIE
from .googlesearch import GoogleSearchIE
from .goshgay import GoshgayIE
@@ -282,6 +287,7 @@ from .jadorecettepub import JadoreCettePubIE
from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
from .jukebox import JukeboxIE
+from .jwplatform import JWPlatformIE
from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE
from .kanalplay import KanalPlayIE
@@ -336,6 +342,7 @@ from .lynda import (
from .m6 import M6IE
from .macgamestore import MacGameStoreIE
from .mailru import MailRuIE
+from .makertv import MakerTVIE
from .malemotion import MalemotionIE
from .mdr import MDRIE
from .metacafe import MetacafeIE
@@ -586,10 +593,6 @@ from .snagfilms import (
)
from .snotr import SnotrIE
from .sohu import SohuIE
-from .soompi import (
- SoompiIE,
- SoompiShowIE,
-)
from .soundcloud import (
SoundcloudIE,
SoundcloudSetIE,
@@ -648,6 +651,7 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
+from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py
index c0e5d1abf..6a29e587f 100644
--- a/youtube_dl/extractor/abc.py
+++ b/youtube_dl/extractor/abc.py
@@ -23,6 +23,7 @@ class ABCIE(InfoExtractor):
'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
},
+ 'skip': 'this video has expired',
}, {
'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
'md5': 'db2a5369238b51f9811ad815b69dc086',
@@ -36,6 +37,7 @@ class ABCIE(InfoExtractor):
'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
},
'add_ie': ['Youtube'],
+ 'skip': 'Not accessible from Travis CI server',
}, {
'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080',
'md5': 'b96eee7c9edf4fc5a358a0252881cc1f',
@@ -58,6 +60,9 @@ class ABCIE(InfoExtractor):
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
webpage)
if mobj is None:
+ expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
+ if expired:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
raise ExtractorError('Unable to extract video urls')
urls_info = self._parse_json(
diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py
index 3ae618e71..bf21a6887 100644
--- a/youtube_dl/extractor/adultswim.py
+++ b/youtube_dl/extractor/adultswim.py
@@ -68,7 +68,7 @@ class AdultSwimIE(InfoExtractor):
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
@@ -79,6 +79,10 @@ class AdultSwimIE(InfoExtractor):
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
}]
@staticmethod
diff --git a/youtube_dl/extractor/aftenposten.py b/youtube_dl/extractor/aftenposten.py
deleted file mode 100644
index 0c00acfb5..000000000
--- a/youtube_dl/extractor/aftenposten.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class AftenpostenIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
- _TEST = {
- 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
- 'md5': 'fd828cd29774a729bf4d4425fe192972',
- 'info_dict': {
- 'id': '21039',
- 'ext': 'mov',
- 'title': 'TRAILER: "Sweatshop" - I can´t take any more',
- 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
- 'timestamp': 1416927969,
- 'upload_date': '20141125',
- }
- }
-
- def _real_extract(self, url):
- return self.url_result('xstream:ap:%s' % self._match_id(url), 'Xstream')
diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py
new file mode 100644
index 000000000..dcc3c97f1
--- /dev/null
+++ b/youtube_dl/extractor/amp.py
@@ -0,0 +1,84 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class AMPIE(InfoExtractor):
+ # parse Akamai Adaptive Media Player feed
+ def _extract_feed_info(self, url):
+ item = self._download_json(
+ url, None, 'Downloading Akamai AMP feed',
+ 'Unable to download Akamai AMP feed')['channel']['item']
+
+ video_id = item['guid']
+
+ def get_media_node(name, default=None):
+ media_name = 'media-%s' % name
+ media_group = item.get('media-group') or item
+ return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
+
+ thumbnails = []
+ media_thumbnail = get_media_node('thumbnail')
+ if media_thumbnail:
+ if isinstance(media_thumbnail, dict):
+ media_thumbnail = [media_thumbnail]
+ for thumbnail_data in media_thumbnail:
+ thumbnail = thumbnail_data['@attributes']
+ thumbnails.append({
+ 'url': self._proto_relative_url(thumbnail['url'], 'http:'),
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ subtitles = {}
+ media_subtitle = get_media_node('subTitle')
+ if media_subtitle:
+ if isinstance(media_subtitle, dict):
+ media_subtitle = [media_subtitle]
+ for subtitle_data in media_subtitle:
+ subtitle = subtitle_data['@attributes']
+ lang = subtitle.get('lang') or 'en'
+ subtitles[lang] = [{'url': subtitle['href']}]
+
+ formats = []
+ media_content = get_media_node('content')
+ if isinstance(media_content, dict):
+ media_content = [media_content]
+ for media_data in media_content:
+ media = media_data['@attributes']
+ media_type = media['type']
+ if media_type == 'video/f4m':
+ f4m_formats = self._extract_f4m_formats(
+ media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
+ video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ elif media_type == 'application/x-mpegURL':
+ m3u8_formats = self._extract_m3u8_formats(
+ media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ else:
+ formats.append({
+ 'format_id': media_data['media-category']['@attributes']['label'],
+ 'url': media['url'],
+ 'tbr': int_or_none(media.get('bitrate')),
+ 'filesize': int_or_none(media.get('fileSize')),
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': get_media_node('title'),
+ 'description': get_media_node('description'),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(item.get('pubDate'), ' '),
+ 'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
index f68dc3236..62ed0c918 100644
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -11,6 +11,7 @@ from ..utils import (
class AppleTrailersIE(InfoExtractor):
+ IE_NAME = 'appletrailers'
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
_TESTS = [{
'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
@@ -64,6 +65,12 @@ class AppleTrailersIE(InfoExtractor):
},
]
}, {
+ 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
+ 'info_dict': {
+ 'id': 'blackthorn',
+ },
+ 'playlist_mincount': 2,
+ }, {
'url': 'http://trailers.apple.com/ca/metropole/autrui/',
'only_matching': True,
}]
@@ -79,7 +86,7 @@ class AppleTrailersIE(InfoExtractor):
def fix_html(s):
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
- s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
+ s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s)
# The ' in the onClick attributes are not escaped, it couldn't be parsed
# like: http://trailers.apple.com/trailers/wb/gravity/
@@ -96,6 +103,9 @@ class AppleTrailersIE(InfoExtractor):
trailer_info_json = self._search_regex(self._JSON_RE,
on_click, 'trailer info')
trailer_info = json.loads(trailer_info_json)
+ first_url = trailer_info.get('url')
+ if not first_url:
+ continue
title = trailer_info['title']
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
thumbnail = li.find('.//img').attrib['src']
@@ -107,7 +117,6 @@ class AppleTrailersIE(InfoExtractor):
if m:
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
- first_url = trailer_info['url']
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
@@ -144,3 +153,76 @@ class AppleTrailersIE(InfoExtractor):
'id': movie,
'entries': playlist,
}
+
+
+class AppleTrailersSectionIE(InfoExtractor):
+ IE_NAME = 'appletrailers:section'
+ _SECTIONS = {
+ 'justadded': {
+ 'feed_path': 'just_added',
+ 'title': 'Just Added',
+ },
+ 'exclusive': {
+ 'feed_path': 'exclusive',
+ 'title': 'Exclusive',
+ },
+ 'justhd': {
+ 'feed_path': 'just_hd',
+ 'title': 'Just HD',
+ },
+ 'mostpopular': {
+ 'feed_path': 'most_pop',
+ 'title': 'Most Popular',
+ },
+ 'moviestudios': {
+ 'feed_path': 'studios',
+ 'title': 'Movie Studios',
+ },
+ }
+ _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
+ _TESTS = [{
+ 'url': 'http://trailers.apple.com/#section=justadded',
+ 'info_dict': {
+ 'title': 'Just Added',
+ 'id': 'justadded',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=exclusive',
+ 'info_dict': {
+ 'title': 'Exclusive',
+ 'id': 'exclusive',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=justhd',
+ 'info_dict': {
+ 'title': 'Just HD',
+ 'id': 'justhd',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=mostpopular',
+ 'info_dict': {
+ 'title': 'Most Popular',
+ 'id': 'mostpopular',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=moviestudios',
+ 'info_dict': {
+ 'title': 'Movie Studios',
+ 'id': 'moviestudios',
+ },
+ 'playlist_mincount': 80,
+ }]
+
+ def _real_extract(self, url):
+ section = self._match_id(url)
+ section_data = self._download_json(
+ 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
+ section)
+ entries = [
+ self.url_result('http://trailers.apple.com' + e['location'])
+ for e in section_data]
+ return self.playlist_result(entries, section, self._SECTIONS[section]['title'])
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 2a00da3ee..10301a8ea 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -68,9 +68,13 @@ class ArteTVPlus7IE(InfoExtractor):
def _extract_url_info(cls, url):
mobj = re.match(cls._VALID_URL, url)
lang = mobj.group('lang')
- # This is not a real id, it can be for example AJT for the news
- # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
- video_id = mobj.group('id')
+ query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ if 'vid' in query:
+ video_id = query['vid'][0]
+ else:
+ # This is not a real id, it can be for example AJT for the news
+ # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
+ video_id = mobj.group('id')
return video_id, lang
def _real_extract(self, url):
@@ -79,9 +83,15 @@ class ArteTVPlus7IE(InfoExtractor):
return self._extract_from_webpage(webpage, video_id, lang)
def _extract_from_webpage(self, webpage, video_id, lang):
+ patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
+ ids = (video_id, '')
+ # some pages contain multiple videos (like
+ # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
+ # so we first try to look for json URLs that contain the video id from
+ # the 'vid' parameter.
+ patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
json_url = self._html_search_regex(
- [r'arte_vp_url=["\'](.*?)["\']', r'data-url=["\']([^"]+)["\']'],
- webpage, 'json vp url', default=None)
+ patterns, webpage, 'json vp url', default=None)
if not json_url:
iframe_url = self._html_search_regex(
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py
index 50e47ba0a..7ac3044c7 100644
--- a/youtube_dl/extractor/atresplayer.py
+++ b/youtube_dl/extractor/atresplayer.py
@@ -2,6 +2,8 @@ from __future__ import unicode_literals
import time
import hmac
+import hashlib
+import re
from .common import InfoExtractor
from ..compat import (
@@ -32,6 +34,19 @@ class AtresPlayerIE(InfoExtractor):
'duration': 5527.6,
'thumbnail': 're:^https?://.*\.jpg$',
},
+ 'skip': 'This video is only available for registered users'
+ },
+ {
+ 'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
+ 'md5': '0d0e918533bbd4b263f2de4d197d4aac',
+ 'info_dict': {
+ 'id': 'capitulo-112-david-bustamante',
+ 'ext': 'flv',
+ 'title': 'David Bustamante',
+ 'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
+ 'duration': 1439.0,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
},
{
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
@@ -50,6 +65,13 @@ class AtresPlayerIE(InfoExtractor):
_LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'
+ _ERRORS = {
+ 'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.',
+ 'DELETED': 'This video has expired and is no longer available for online streaming.',
+ 'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.',
+ # 'PREMIUM': 'PREMIUM',
+ }
+
def _real_initialize(self):
self._login()
@@ -83,58 +105,81 @@ class AtresPlayerIE(InfoExtractor):
episode_id = self._search_regex(
r'episode="([^"]+)"', webpage, 'episode id')
+ request = sanitized_Request(
+ self._PLAYER_URL_TEMPLATE % episode_id,
+ headers={'User-Agent': self._USER_AGENT})
+ player = self._download_json(request, episode_id, 'Downloading player JSON')
+
+ episode_type = player.get('typeOfEpisode')
+ error_message = self._ERRORS.get(episode_type)
+ if error_message:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
+
+ formats = []
+ video_url = player.get('urlVideo')
+ if video_url:
+ format_info = {
+ 'url': video_url,
+ 'format_id': 'http',
+ }
+ mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url)
+ if mobj:
+ format_info.update({
+ 'width': int_or_none(mobj.group('width')),
+ 'height': int_or_none(mobj.group('height')),
+ 'tbr': int_or_none(mobj.group('bitrate')),
+ })
+ formats.append(format_info)
+
+ m3u8_url = player.get('urlVideoHls')
+ if m3u8_url:
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, episode_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+
timestamp = int_or_none(self._download_webpage(
self._TIME_API_URL,
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
token = hmac.new(
self._MAGIC.encode('ascii'),
- (episode_id + timestamp_shifted).encode('utf-8')
+ (episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5
).hexdigest()
- formats = []
- for fmt in ['windows', 'android_tablet']:
- request = sanitized_Request(
- self._URL_VIDEO_TEMPLATE.format(fmt, episode_id, timestamp_shifted, token))
- request.add_header('User-Agent', self._USER_AGENT)
-
- fmt_json = self._download_json(
- request, video_id, 'Downloading %s video JSON' % fmt)
-
- result = fmt_json.get('resultDes')
- if result.lower() != 'ok':
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, result), expected=True)
-
- for format_id, video_url in fmt_json['resultObject'].items():
- if format_id == 'token' or not video_url.startswith('http'):
- continue
- if video_url.endswith('/Manifest'):
- if 'geodeswowsmpra3player' in video_url:
- f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
- f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
- # this videos are protected by DRM, the f4m downloader doesn't support them
- continue
- else:
- f4m_url = video_url[:-9] + '/manifest.f4m'
- formats.extend(self._extract_f4m_formats(f4m_url, video_id))
- else:
- formats.append({
- 'url': video_url,
- 'format_id': 'android-%s' % format_id,
- 'preference': 1,
- })
- self._sort_formats(formats)
+ request = sanitized_Request(
+ self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token),
+ headers={'User-Agent': self._USER_AGENT})
- player = self._download_json(
- self._PLAYER_URL_TEMPLATE % episode_id,
- episode_id)
+ fmt_json = self._download_json(
+ request, video_id, 'Downloading windows video JSON')
+
+ result = fmt_json.get('resultDes')
+ if result.lower() != 'ok':
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, result), expected=True)
+
+ for format_id, video_url in fmt_json['resultObject'].items():
+ if format_id == 'token' or not video_url.startswith('http'):
+ continue
+ if 'geodeswowsmpra3player' in video_url:
+ f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
+ f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
+ # this videos are protected by DRM, the f4m downloader doesn't support them
+ continue
+ else:
+ f4m_url = video_url[:-9] + '/manifest.f4m'
+ f4m_formats = self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ self._sort_formats(formats)
path_data = player.get('pathData')
episode = self._download_xml(
- self._EPISODE_URL_TEMPLATE % path_data,
- video_id, 'Downloading episode XML')
+ self._EPISODE_URL_TEMPLATE % path_data, video_id,
+ 'Downloading episode XML')
duration = float_or_none(xpath_text(
episode, './media/asset/info/technical/contentDuration', 'duration'))
diff --git a/youtube_dl/extractor/audimedia.py b/youtube_dl/extractor/audimedia.py
index b0b089dee..4382a302b 100644
--- a/youtube_dl/extractor/audimedia.py
+++ b/youtube_dl/extractor/audimedia.py
@@ -15,7 +15,7 @@ class AudiMediaIE(InfoExtractor):
'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test',
'md5': '79a8b71c46d49042609795ab59779b66',
'info_dict': {
- 'id': '1564',
+ 'id': '1565',
'ext': 'mp4',
'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test',
'description': 'md5:60e5d30a78ced725f7b8d34370762941',
diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py
index 693ba22c6..3eed91279 100644
--- a/youtube_dl/extractor/audiomack.py
+++ b/youtube_dl/extractor/audiomack.py
@@ -56,7 +56,7 @@ class AudiomackIE(InfoExtractor):
# API is inconsistent with errors
if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
- raise ExtractorError('Invalid url %s', url)
+ raise ExtractorError('Invalid url %s' % url)
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# if so, pass the work off to the soundcloud extractor
diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py
new file mode 100644
index 000000000..38bda3af5
--- /dev/null
+++ b/youtube_dl/extractor/bleacherreport.py
@@ -0,0 +1,106 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .amp import AMPIE
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class BleacherReportIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
+ 'md5': 'a3ffc3dc73afdbc2010f02d98f990f20',
+ 'info_dict': {
+ 'id': '2496438',
+ 'ext': 'mp4',
+ 'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
+ 'uploader_id': 3992341,
+ 'description': 'CFB, ACC, Florida State',
+ 'timestamp': 1434380212,
+ 'upload_date': '20150615',
+ 'uploader': 'Team Stream Now ',
+ },
+ 'add_ie': ['Ooyala'],
+ }, {
+ 'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
+ 'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50',
+ 'info_dict': {
+ 'id': '2586817',
+ 'ext': 'mp4',
+ 'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
+ 'timestamp': 1446839961,
+ 'uploader': 'Sean Fay',
+ 'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
+ 'uploader_id': 6466954,
+ 'upload_date': '20151011',
+ },
+ 'add_ie': ['Youtube'],
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
+
+ thumbnails = []
+ primary_photo = article_data.get('primaryPhoto')
+ if primary_photo:
+ thumbnails = [{
+ 'url': primary_photo['url'],
+ 'width': primary_photo.get('width'),
+ 'height': primary_photo.get('height'),
+ }]
+
+ info = {
+ '_type': 'url_transparent',
+ 'id': article_id,
+ 'title': article_data['title'],
+ 'uploader': article_data.get('author', {}).get('name'),
+ 'uploader_id': article_data.get('authorId'),
+ 'timestamp': parse_iso8601(article_data.get('createdAt')),
+ 'thumbnails': thumbnails,
+ 'comment_count': int_or_none(article_data.get('commentsCount')),
+ 'view_count': int_or_none(article_data.get('hitCount')),
+ }
+
+ video = article_data.get('video')
+ if video:
+ video_type = video['type']
+ if video_type == 'cms.bleacherreport.com':
+ info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
+ elif video_type == 'ooyala.com':
+ info['url'] = 'ooyala:%s' % video['id']
+ elif video_type == 'youtube.com':
+ info['url'] = video['id']
+ elif video_type == 'vine.co':
+ info['url'] = 'https://vine.co/v/%s' % video['id']
+ else:
+ info['url'] = video_type + video['id']
+ return info
+ else:
+ raise ExtractorError('no video in the article', expected=True)
+
+
+class BleacherReportCMSIE(AMPIE):
+ _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
+ _TESTS = [{
+ 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
+ 'md5': '8c2c12e3af7805152675446c905d159b',
+ 'info_dict': {
+ 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
+ 'ext': 'flv',
+ 'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
+ 'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
+ info['id'] = video_id
+ return info
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
deleted file mode 100644
index 35375f7b1..000000000
--- a/youtube_dl/extractor/bliptv.py
+++ /dev/null
@@ -1,290 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-from ..compat import compat_urlparse
-from ..utils import (
- clean_html,
- int_or_none,
- parse_iso8601,
- sanitized_Request,
- unescapeHTML,
- xpath_text,
- xpath_with_ns,
-)
-
-
-class BlipTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
-
- _TESTS = [
- {
- 'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352',
- 'md5': '80baf1ec5c3d2019037c1c707d676b9f',
- 'info_dict': {
- 'id': '5779306',
- 'ext': 'm4v',
- 'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3',
- 'description': 'md5:9bc31f227219cde65e47eeec8d2dc596',
- 'timestamp': 1323138843,
- 'upload_date': '20111206',
- 'uploader': 'cbr',
- 'uploader_id': '679425',
- 'duration': 81,
- }
- },
- {
- # https://github.com/rg3/youtube-dl/pull/2274
- 'note': 'Video with subtitles',
- 'url': 'http://blip.tv/play/h6Uag5OEVgI.html',
- 'md5': '309f9d25b820b086ca163ffac8031806',
- 'info_dict': {
- 'id': '6586561',
- 'ext': 'mp4',
- 'title': 'Red vs. Blue Season 11 Episode 1',
- 'description': 'One-Zero-One',
- 'timestamp': 1371261608,
- 'upload_date': '20130615',
- 'uploader': 'redvsblue',
- 'uploader_id': '792887',
- 'duration': 279,
- }
- },
- {
- # https://bugzilla.redhat.com/show_bug.cgi?id=967465
- 'url': 'http://a.blip.tv/api.swf#h6Uag5KbVwI',
- 'md5': '314e87b1ebe7a48fcbfdd51b791ce5a6',
- 'info_dict': {
- 'id': '6573122',
- 'ext': 'mov',
- 'upload_date': '20130520',
- 'description': 'Two hapless space marines argue over what to do when they realize they have an astronomically huge problem on their hands.',
- 'title': 'Red vs. Blue Season 11 Trailer',
- 'timestamp': 1369029609,
- 'uploader': 'redvsblue',
- 'uploader_id': '792887',
- }
- },
- {
- 'url': 'http://blip.tv/play/gbk766dkj4Yn',
- 'md5': 'fe0a33f022d49399a241e84a8ea8b8e3',
- 'info_dict': {
- 'id': '1749452',
- 'ext': 'mp4',
- 'upload_date': '20090208',
- 'description': 'Witness the first appearance of the Nostalgia Critic character, as Doug reviews the movie Transformers.',
- 'title': 'Nostalgia Critic: Transformers',
- 'timestamp': 1234068723,
- 'uploader': 'NostalgiaCritic',
- 'uploader_id': '246467',
- }
- },
- {
- # https://github.com/rg3/youtube-dl/pull/4404
- 'note': 'Audio only',
- 'url': 'http://blip.tv/hilarios-productions/weekly-manga-recap-kingdom-7119982',
- 'md5': '76c0a56f24e769ceaab21fbb6416a351',
- 'info_dict': {
- 'id': '7103299',
- 'ext': 'flv',
- 'title': 'Weekly Manga Recap: Kingdom',
- 'description': 'And then Shin breaks the enemy line, and he&apos;s all like HWAH! And then he slices a guy and it&apos;s all like FWASHING! And... it&apos;s really hard to describe the best parts of this series without breaking down into sound effects, okay?',
- 'timestamp': 1417660321,
- 'upload_date': '20141204',
- 'uploader': 'The Rollo T',
- 'uploader_id': '407429',
- 'duration': 7251,
- 'vcodec': 'none',
- }
- },
- {
- # missing duration
- 'url': 'http://blip.tv/rss/flash/6700880',
- 'info_dict': {
- 'id': '6684191',
- 'ext': 'm4v',
- 'title': 'Cowboy Bebop: Gateway Shuffle Review',
- 'description': 'md5:3acc480c0f9ae157f5fe88547ecaf3f8',
- 'timestamp': 1386639757,
- 'upload_date': '20131210',
- 'uploader': 'sfdebris',
- 'uploader_id': '706520',
- }
- }
- ]
-
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
- if mobj:
- return 'http://blip.tv/a/a-' + mobj.group(1)
- mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
- if mobj:
- return mobj.group(1)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- lookup_id = mobj.group('lookup_id')
-
- # See https://github.com/rg3/youtube-dl/issues/857 and
- # https://github.com/rg3/youtube-dl/issues/4197
- if lookup_id:
- urlh = self._request_webpage(
- 'http://blip.tv/play/%s' % lookup_id, lookup_id, 'Resolving lookup id')
- url = compat_urlparse.urlparse(urlh.geturl())
- qs = compat_urlparse.parse_qs(url.query)
- mobj = re.match(self._VALID_URL, qs['file'][0])
-
- video_id = mobj.group('id')
-
- rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS')
-
- def _x(p):
- return xpath_with_ns(p, {
- 'blip': 'http://blip.tv/dtd/blip/1.0',
- 'media': 'http://search.yahoo.com/mrss/',
- 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
- })
-
- item = rss.find('channel/item')
-
- video_id = xpath_text(item, _x('blip:item_id'), 'video id') or lookup_id
- title = xpath_text(item, 'title', 'title', fatal=True)
- description = clean_html(xpath_text(item, _x('blip:puredescription'), 'description'))
- timestamp = parse_iso8601(xpath_text(item, _x('blip:datestamp'), 'timestamp'))
- uploader = xpath_text(item, _x('blip:user'), 'uploader')
- uploader_id = xpath_text(item, _x('blip:userid'), 'uploader id')
- duration = int_or_none(xpath_text(item, _x('blip:runtime'), 'duration'))
- media_thumbnail = item.find(_x('media:thumbnail'))
- thumbnail = (media_thumbnail.get('url') if media_thumbnail is not None
- else xpath_text(item, 'image', 'thumbnail'))
- categories = [category.text for category in item.findall('category') if category is not None]
-
- formats = []
- subtitles_urls = {}
-
- media_group = item.find(_x('media:group'))
- for media_content in media_group.findall(_x('media:content')):
- url = media_content.get('url')
- role = media_content.get(_x('blip:role'))
- msg = self._download_webpage(
- url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url',
- video_id, 'Resolving URL for %s' % role)
- real_url = compat_urlparse.parse_qs(msg.strip())['message'][0]
-
- media_type = media_content.get('type')
- if media_type == 'text/srt' or url.endswith('.srt'):
- LANGS = {
- 'english': 'en',
- }
- lang = role.rpartition('-')[-1].strip().lower()
- langcode = LANGS.get(lang, lang)
- subtitles_urls[langcode] = url
- elif media_type.startswith('video/'):
- formats.append({
- 'url': real_url,
- 'format_id': role,
- 'format_note': media_type,
- 'vcodec': media_content.get(_x('blip:vcodec')) or 'none',
- 'acodec': media_content.get(_x('blip:acodec')),
- 'filesize': media_content.get('filesize'),
- 'width': int_or_none(media_content.get('width')),
- 'height': int_or_none(media_content.get('height')),
- })
- self._check_formats(formats, video_id)
- self._sort_formats(formats)
-
- subtitles = self.extract_subtitles(video_id, subtitles_urls)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'duration': duration,
- 'thumbnail': thumbnail,
- 'categories': categories,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
- def _get_subtitles(self, video_id, subtitles_urls):
- subtitles = {}
- for lang, url in subtitles_urls.items():
- # For some weird reason, blip.tv serves a video instead of subtitles
- # when we request with a common UA
- req = sanitized_Request(url)
- req.add_header('User-Agent', 'youtube-dl')
- subtitles[lang] = [{
- # The extension is 'srt' but it's actually an 'ass' file
- 'ext': 'ass',
- 'data': self._download_webpage(req, None, note=False),
- }]
- return subtitles
-
-
-class BlipTVUserIE(InfoExtractor):
- _VALID_URL = r'(?:(?:https?://(?:\w+\.)?blip\.tv/)|bliptvuser:)(?!api\.swf)([^/]+)/*$'
- _PAGE_SIZE = 12
- IE_NAME = 'blip.tv:user'
- _TEST = {
- 'url': 'http://blip.tv/actone',
- 'info_dict': {
- 'id': 'actone',
- 'title': 'Act One: The Series',
- },
- 'playlist_count': 5,
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- username = mobj.group(1)
-
- page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1'
-
- page = self._download_webpage(url, username, 'Downloading user page')
- mobj = re.search(r'data-users-id="([^"]+)"', page)
- page_base = page_base % mobj.group(1)
- title = self._og_search_title(page)
-
- # Download video ids using BlipTV Ajax calls. Result size per
- # query is limited (currently to 12 videos) so we need to query
- # page by page until there are no video ids - it means we got
- # all of them.
-
- video_ids = []
- pagenum = 1
-
- while True:
- url = page_base + "&page=" + str(pagenum)
- page = self._download_webpage(
- url, username, 'Downloading video ids from page %d' % pagenum)
-
- # Extract video identifiers
- ids_in_page = []
-
- for mobj in re.finditer(r'href="/([^"]+)"', page):
- if mobj.group(1) not in ids_in_page:
- ids_in_page.append(unescapeHTML(mobj.group(1)))
-
- video_ids.extend(ids_in_page)
-
- # A little optimization - if current page is not
- # "full", ie. does not contain PAGE_SIZE video ids then
- # we can assume that this page is the last one - there
- # are no more ids on further pages - no need to query
- # again.
-
- if len(ids_in_page) < self._PAGE_SIZE:
- break
-
- pagenum += 1
-
- urls = ['http://blip.tv/%s' % video_id for video_id in video_ids]
- url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls]
- return self.playlist_result(
- url_entries, playlist_title=title, playlist_id=username)
diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py
index 66e394e10..e66854538 100644
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dl/extractor/br.py
@@ -1,18 +1,21 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
+ xpath_element,
+ xpath_text,
)
class BRIE(InfoExtractor):
IE_DESC = 'Bayerischer Rundfunk Mediathek'
- _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
- _BASE_URL = 'http://www.br.de'
+ _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
_TESTS = [
{
@@ -22,7 +25,7 @@ class BRIE(InfoExtractor):
'id': '48f656ef-287e-486f-be86-459122db22cc',
'ext': 'mp4',
'title': 'Die böse Überraschung',
- 'description': 'Betriebliche Altersvorsorge: Die böse Überraschung',
+ 'description': 'md5:ce9ac81b466ce775b8018f6801b48ac9',
'duration': 180,
'uploader': 'Reinhard Weber',
'upload_date': '20150422',
@@ -30,23 +33,23 @@ class BRIE(InfoExtractor):
},
{
'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html',
- 'md5': 'a44396d73ab6a68a69a568fae10705bb',
+ 'md5': 'af3a3a4aa43ff0ce6a89504c67f427ef',
'info_dict': {
'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'Manfred Schreiber ist tot',
- 'description': 'Abendschau kompakt: Manfred Schreiber ist tot',
+ 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97',
'duration': 26,
}
},
{
- 'url': 'http://www.br.de/radio/br-klassik/sendungen/allegro/premiere-urauffuehrung-the-land-2015-dance-festival-muenchen-100.html',
+ 'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html',
'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d',
'info_dict': {
'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b',
'ext': 'aac',
'title': 'Kurzweilig und sehr bewegend',
- 'description': '"The Land" von Peeping Tom: Kurzweilig und sehr bewegend',
+ 'description': 'md5:0351996e3283d64adeb38ede91fac54e',
'duration': 296,
}
},
@@ -57,7 +60,7 @@ class BRIE(InfoExtractor):
'id': '6ba73750-d405-45d3-861d-1ce8c524e059',
'ext': 'mp4',
'title': 'Umweltbewusster Häuslebauer',
- 'description': 'Uwe Erdelt: Umweltbewusster Häuslebauer',
+ 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2',
'duration': 116,
}
},
@@ -68,7 +71,7 @@ class BRIE(InfoExtractor):
'id': 'd982c9ce-8648-4753-b358-98abb8aec43d',
'ext': 'mp4',
'title': 'Folge 1 - Metaphysik',
- 'description': 'Kant für Anfänger: Folge 1 - Metaphysik',
+ 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
'duration': 893,
'uploader': 'Eva Maria Steimle',
'upload_date': '20140117',
@@ -77,28 +80,31 @@ class BRIE(InfoExtractor):
]
def _real_extract(self, url):
- display_id = self._match_id(url)
+ base_url, display_id = re.search(self._VALID_URL, url).groups()
page = self._download_webpage(url, display_id)
xml_url = self._search_regex(
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')
- xml = self._download_xml(self._BASE_URL + xml_url, None)
+ xml = self._download_xml(base_url + xml_url, display_id)
medias = []
for xml_media in xml.findall('video') + xml.findall('audio'):
+ media_id = xml_media.get('externalId')
media = {
- 'id': xml_media.get('externalId'),
- 'title': xml_media.find('title').text,
- 'duration': parse_duration(xml_media.find('duration').text),
- 'formats': self._extract_formats(xml_media.find('assets')),
- 'thumbnails': self._extract_thumbnails(xml_media.find('teaserImage/variants')),
- 'description': ' '.join(xml_media.find('shareTitle').text.splitlines()),
- 'webpage_url': xml_media.find('permalink').text
+ 'id': media_id,
+ 'title': xpath_text(xml_media, 'title', 'title', True),
+ 'duration': parse_duration(xpath_text(xml_media, 'duration')),
+ 'formats': self._extract_formats(xpath_element(
+ xml_media, 'assets'), media_id),
+ 'thumbnails': self._extract_thumbnails(xpath_element(
+ xml_media, 'teaserImage/variants'), base_url),
+ 'description': xpath_text(xml_media, 'desc'),
+ 'webpage_url': xpath_text(xml_media, 'permalink'),
+ 'uploader': xpath_text(xml_media, 'author'),
}
- if xml_media.find('author').text:
- media['uploader'] = xml_media.find('author').text
- if xml_media.find('broadcastDate').text:
- media['upload_date'] = ''.join(reversed(xml_media.find('broadcastDate').text.split('.')))
+ broadcast_date = xpath_text(xml_media, 'broadcastDate')
+ if broadcast_date:
+ media['upload_date'] = ''.join(reversed(broadcast_date.split('.')))
medias.append(media)
if len(medias) > 1:
@@ -109,35 +115,58 @@ class BRIE(InfoExtractor):
raise ExtractorError('No media entries found')
return medias[0]
- def _extract_formats(self, assets):
-
- def text_or_none(asset, tag):
- elem = asset.find(tag)
- return None if elem is None else elem.text
-
- formats = [{
- 'url': text_or_none(asset, 'downloadUrl'),
- 'ext': text_or_none(asset, 'mediaType'),
- 'format_id': asset.get('type'),
- 'width': int_or_none(text_or_none(asset, 'frameWidth')),
- 'height': int_or_none(text_or_none(asset, 'frameHeight')),
- 'tbr': int_or_none(text_or_none(asset, 'bitrateVideo')),
- 'abr': int_or_none(text_or_none(asset, 'bitrateAudio')),
- 'vcodec': text_or_none(asset, 'codecVideo'),
- 'acodec': text_or_none(asset, 'codecAudio'),
- 'container': text_or_none(asset, 'mediaType'),
- 'filesize': int_or_none(text_or_none(asset, 'size')),
- } for asset in assets.findall('asset')
- if asset.find('downloadUrl') is not None]
-
+ def _extract_formats(self, assets, media_id):
+ formats = []
+ for asset in assets.findall('asset'):
+ format_url = xpath_text(asset, ['downloadUrl', 'url'])
+ asset_type = asset.get('type')
+ if asset_type == 'HDS':
+ f4m_formats = self._extract_f4m_formats(
+ format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ elif asset_type == 'HLS':
+ m3u8_formats = self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ else:
+ format_info = {
+ 'ext': xpath_text(asset, 'mediaType'),
+ 'width': int_or_none(xpath_text(asset, 'frameWidth')),
+ 'height': int_or_none(xpath_text(asset, 'frameHeight')),
+ 'tbr': int_or_none(xpath_text(asset, 'bitrateVideo')),
+ 'abr': int_or_none(xpath_text(asset, 'bitrateAudio')),
+ 'vcodec': xpath_text(asset, 'codecVideo'),
+ 'acodec': xpath_text(asset, 'codecAudio'),
+ 'container': xpath_text(asset, 'mediaType'),
+ 'filesize': int_or_none(xpath_text(asset, 'size')),
+ }
+ format_url = self._proto_relative_url(format_url)
+ if format_url:
+ http_format_info = format_info.copy()
+ http_format_info.update({
+ 'url': format_url,
+ 'format_id': 'http-%s' % asset_type,
+ })
+ formats.append(http_format_info)
+ server_prefix = xpath_text(asset, 'serverPrefix')
+ if server_prefix:
+ rtmp_format_info = format_info.copy()
+ rtmp_format_info.update({
+ 'url': server_prefix,
+ 'play_path': xpath_text(asset, 'fileName'),
+ 'format_id': 'rtmp-%s' % asset_type,
+ })
+ formats.append(rtmp_format_info)
self._sort_formats(formats)
return formats
- def _extract_thumbnails(self, variants):
+ def _extract_thumbnails(self, variants, base_url):
thumbnails = [{
- 'url': self._BASE_URL + variant.find('url').text,
- 'width': int_or_none(variant.find('width').text),
- 'height': int_or_none(variant.find('height').text),
- } for variant in variants.findall('variant')]
+ 'url': base_url + xpath_text(variant, 'url'),
+ 'width': int_or_none(xpath_text(variant, 'width')),
+ 'height': int_or_none(xpath_text(variant, 'height')),
+ } for variant in variants.findall('variant') if xpath_text(variant, 'url')]
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
return thumbnails
diff --git a/youtube_dl/extractor/canal13cl.py b/youtube_dl/extractor/canal13cl.py
deleted file mode 100644
index 93241fefe..000000000
--- a/youtube_dl/extractor/canal13cl.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class Canal13clIE(InfoExtractor):
- _VALID_URL = r'^http://(?:www\.)?13\.cl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
- _TEST = {
- 'url': 'http://www.13.cl/t13/nacional/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
- 'md5': '4cb1fa38adcad8fea88487a078831755',
- 'info_dict': {
- 'id': '1403022125',
- 'display_id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
- 'ext': 'mp4',
- 'title': 'El "círculo de hierro" de Michelle Bachelet en su regreso a La Moneda',
- 'description': '(Foto: Agencia Uno) En nueve días más, Michelle Bachelet va a asumir por segunda vez como presidenta de la República. Entre aquellos que la acompañarán hay caras que se repiten y otras que se consolidan en su entorno de colaboradores más cercanos.',
- }
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('id')
-
- webpage = self._download_webpage(url, display_id)
-
- title = self._html_search_meta(
- 'twitter:title', webpage, 'title', fatal=True)
- description = self._html_search_meta(
- 'twitter:description', webpage, 'description')
- url = self._html_search_regex(
- r'articuloVideo = \"(.*?)\"', webpage, 'url')
- real_id = self._search_regex(
- r'[^0-9]([0-9]{7,})[^0-9]', url, 'id', default=display_id)
- thumbnail = self._html_search_regex(
- r'articuloImagen = \"(.*?)\"', webpage, 'thumbnail')
-
- return {
- 'id': real_id,
- 'display_id': display_id,
- 'url': url,
- 'title': title,
- 'description': description,
- 'ext': 'mp4',
- 'thumbnail': thumbnail,
- }
diff --git a/youtube_dl/extractor/cinemassacre.py b/youtube_dl/extractor/cinemassacre.py
index fd1770dac..6d9cd8abd 100644
--- a/youtube_dl/extractor/cinemassacre.py
+++ b/youtube_dl/extractor/cinemassacre.py
@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..utils import ExtractorError
-from .bliptv import BlipTVIE
from .screenwavemedia import ScreenwaveMediaIE
@@ -34,18 +33,17 @@ class CinemassacreIE(InfoExtractor):
},
},
{
- # blip.tv embedded video
+ # Youtube embedded video
'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
- 'md5': 'ca9b3c8dd5a66f9375daeb5135f5a3de',
+ 'md5': 'df4cf8a1dcedaec79a73d96d83b99023',
'info_dict': {
- 'id': '4065369',
- 'ext': 'flv',
+ 'id': 'OEVzPCY2T-g',
+ 'ext': 'mp4',
'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
'upload_date': '20061207',
- 'uploader': 'cinemassacre',
- 'uploader_id': '250778',
- 'timestamp': 1283233867,
- 'description': 'md5:0a108c78d130676b207d0f6d029ecffd',
+ 'uploader': 'Cinemassacre',
+ 'uploader_id': 'JamesNintendoNerd',
+ 'description': 'md5:784734696c2b8b7f4b8625cc799e07f6',
}
},
{
@@ -89,8 +87,6 @@ class CinemassacreIE(InfoExtractor):
],
webpage, 'player data URL', default=None, group='url')
if not playerdata_url:
- playerdata_url = BlipTVIE._extract_url(webpage)
- if not playerdata_url:
raise ExtractorError('Unable to find player data')
video_title = self._html_search_regex(
diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py
index 81f3d7697..2efa200b5 100644
--- a/youtube_dl/extractor/comcarcoff.py
+++ b/youtube_dl/extractor/comcarcoff.py
@@ -1,10 +1,12 @@
# encoding: utf-8
from __future__ import unicode_literals
-import json
-
from .common import InfoExtractor
-from ..utils import parse_iso8601
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+)
class ComCarCoffIE(InfoExtractor):
@@ -16,6 +18,7 @@ class ComCarCoffIE(InfoExtractor):
'ext': 'mp4',
'upload_date': '20141127',
'timestamp': 1417107600,
+ 'duration': 1232,
'title': 'Happy Thanksgiving Miranda',
'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
@@ -31,9 +34,10 @@ class ComCarCoffIE(InfoExtractor):
display_id = 'comediansincarsgettingcoffee.com'
webpage = self._download_webpage(url, display_id)
- full_data = json.loads(self._search_regex(
- r'<script type="application/json" id="videoData">(?P<json>.+?)</script>',
- webpage, 'full data json'))
+ full_data = self._parse_json(
+ self._search_regex(
+ r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
+ display_id)['videoData']
video_id = full_data['activeVideo']['video']
video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
@@ -45,12 +49,18 @@ class ComCarCoffIE(InfoExtractor):
formats = self._extract_m3u8_formats(
video_data['mediaUrl'], video_id, ext='mp4')
+ timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
+ video_data.get('pubDate'))
+ duration = int_or_none(video_data.get('durationSeconds')) or parse_duration(
+ video_data.get('duration'))
+
return {
'id': video_id,
'display_id': display_id,
'title': video_data['title'],
'description': video_data.get('description'),
- 'timestamp': parse_iso8601(video_data.get('pubDate')),
+ 'timestamp': timestamp,
+ 'duration': duration,
'thumbnails': thumbnails,
'formats': formats,
'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py
index 934da765e..9a94cf361 100644
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -25,6 +25,18 @@ class DaumIE(InfoExtractor):
'duration': 3868,
},
}, {
+ # Test for https://github.com/rg3/youtube-dl/issues/7949
+ 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=M1O35s8HPOo0&clipid=73147290',
+ 'md5': 'c92d78bcee4424451f1667f275c1dc97',
+ 'info_dict': {
+ 'id': '73147290',
+ 'ext': 'mp4',
+ 'title': '싸이 - 나팔바지 [유희열의 스케치북] 299회 20151218',
+ 'description': '싸이 - 나팔바지',
+ 'upload_date': '20151219',
+ 'duration': 232,
+ },
+ }, {
'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
'only_matching': True,
}, {
@@ -37,9 +49,11 @@ class DaumIE(InfoExtractor):
video_id = mobj.group('id')
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
webpage = self._download_webpage(canonical_url, video_id)
+ og_url = self._og_search_url(webpage, default=None) or self._search_regex(
+ r'<link[^>]+rel=(["\'])canonical\1[^>]+href=(["\'])(?P<url>.+?)\2',
+ webpage, 'canonical url', group='url')
full_id = self._search_regex(
- r'src=["\']http://videofarm\.daum\.net/controller/video/viewer/Video\.html\?.*?vid=(.+?)[&"\']',
- webpage, 'full id')
+ r'tvpot\.daum\.net/v/([^/]+)', og_url, 'full id')
query = compat_urllib_parse.urlencode({'vid': full_id})
info = self._download_xml(
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
index d836c1a6c..60ed438f8 100644
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import itertools
-from .common import InfoExtractor
+from .amp import AMPIE
from ..compat import (
compat_HTTPError,
compat_urllib_parse,
@@ -12,14 +12,11 @@ from ..compat import (
from ..utils import (
ExtractorError,
clean_html,
- determine_ext,
- int_or_none,
- parse_iso8601,
sanitized_Request,
)
-class DramaFeverBaseIE(InfoExtractor):
+class DramaFeverBaseIE(AMPIE):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever'
@@ -80,60 +77,25 @@ class DramaFeverIE(DramaFeverBaseIE):
'timestamp': 1404336058,
'upload_date': '20140702',
'duration': 343,
- }
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}
def _real_extract(self, url):
video_id = self._match_id(url).replace('/', '.')
try:
- feed = self._download_json(
- 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id,
- video_id, 'Downloading episode JSON')['channel']['item']
+ info = self._extract_feed_info(
+ 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError):
raise ExtractorError(
'Currently unavailable in your country.', expected=True)
raise
- media_group = feed.get('media-group', {})
-
- formats = []
- for media_content in media_group['media-content']:
- src = media_content.get('@attributes', {}).get('url')
- if not src:
- continue
- ext = determine_ext(src)
- if ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- src, video_id, f4m_id='hds'))
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- src, video_id, 'mp4', m3u8_id='hls'))
- else:
- formats.append({
- 'url': src,
- })
- self._sort_formats(formats)
-
- title = media_group.get('media-title')
- description = media_group.get('media-description')
- duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration'))
- thumbnail = self._proto_relative_url(
- media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url'))
- timestamp = parse_iso8601(feed.get('pubDate'), ' ')
-
- subtitles = {}
- for media_subtitle in media_group.get('media-subTitle', []):
- lang = media_subtitle.get('@attributes', {}).get('lang')
- href = media_subtitle.get('@attributes', {}).get('href')
- if not lang or not href:
- continue
- subtitles[lang] = [{
- 'ext': 'ttml',
- 'url': href,
- }]
-
series_id, episode_number = video_id.split('.')
episode_info = self._download_json(
# We only need a single episode info, so restricting page size to one episode
@@ -146,21 +108,12 @@ class DramaFeverIE(DramaFeverBaseIE):
if value:
subfile = value[0].get('subfile') or value[0].get('new_subfile')
if subfile and subfile != 'http://www.dramafever.com/st/':
- subtitles.setdefault('English', []).append({
+ info['subtitiles'].setdefault('English', []).append({
'ext': 'srt',
'url': subfile,
})
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles,
- }
+ return info
class DramaFeverSeriesIE(DramaFeverBaseIE):
diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py
index d9a868119..6f9b003c2 100644
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@@ -2,6 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import (
+ xpath_element,
+ xpath_text,
+ int_or_none,
+)
class FazIE(InfoExtractor):
@@ -37,31 +42,32 @@ class FazIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ description = self._og_search_description(webpage)
config_xml_url = self._search_regex(
- r'(?:var\s+)?videoXMLURL\s*=\s*"([^"]+)', webpage, 'config xml url')
+ r'videoXMLURL\s*=\s*"([^"]+)', webpage, 'config xml url')
config = self._download_xml(
config_xml_url, video_id, 'Downloading config xml')
- encodings = config.find('ENCODINGS')
+ encodings = xpath_element(config, 'ENCODINGS', 'encodings', True)
formats = []
for pref, code in enumerate(['LOW', 'HIGH', 'HQ']):
- encoding = encodings.find(code)
- if encoding is None:
- continue
- encoding_url = encoding.find('FILENAME').text
- formats.append({
- 'url': encoding_url,
- 'format_id': code.lower(),
- 'quality': pref,
- })
+ encoding = xpath_element(encodings, code)
+ if encoding:
+ encoding_url = xpath_text(encoding, 'FILENAME')
+ if encoding_url:
+ formats.append({
+ 'url': encoding_url,
+ 'format_id': code.lower(),
+ 'quality': pref,
+ 'tbr': int_or_none(xpath_text(encoding, 'AVERAGEBITRATE')),
+ })
self._sort_formats(formats)
- descr = self._html_search_regex(
- r'<p class="Content Copy">(.*?)</p>', webpage, 'description', fatal=False)
return {
'id': video_id,
'title': self._og_search_title(webpage),
'formats': formats,
- 'description': descr,
- 'thumbnail': config.find('STILL/STILL_BIG').text,
+ 'description': description.strip() if description else None,
+ 'thumbnail': xpath_text(config, 'STILL/STILL_BIG'),
+ 'duration': int_or_none(xpath_text(config, 'DURATION')),
}
diff --git a/youtube_dl/extractor/flickr.py b/youtube_dl/extractor/flickr.py
index 91cd46e76..18f439df9 100644
--- a/youtube_dl/extractor/flickr.py
+++ b/youtube_dl/extractor/flickr.py
@@ -1,67 +1,93 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..compat import compat_urllib_parse
from ..utils import (
ExtractorError,
- find_xpath_attr,
- sanitized_Request,
+ int_or_none,
+ qualities,
)
class FlickrIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
+ _VALID_URL = r'https?://(?:www\.|secure\.)?flickr\.com/photos/[\w\-_@]+/(?P<id>\d+)'
_TEST = {
'url': 'http://www.flickr.com/photos/forestwander-nature-pictures/5645318632/in/photostream/',
- 'md5': '6fdc01adbc89d72fc9c4f15b4a4ba87b',
+ 'md5': '164fe3fa6c22e18d448d4d5af2330f31',
'info_dict': {
'id': '5645318632',
- 'ext': 'mp4',
- "description": "Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.",
- "uploader_id": "forestwander-nature-pictures",
- "title": "Dark Hollow Waterfalls"
+ 'ext': 'mpg',
+ 'description': 'Waterfalls in the Springtime at Dark Hollow Waterfalls. These are located just off of Skyline Drive in Virginia. They are only about 6/10 of a mile hike but it is a pretty steep hill and a good climb back up.',
+ 'title': 'Dark Hollow Waterfalls',
+ 'duration': 19,
+ 'timestamp': 1303528740,
+ 'upload_date': '20110423',
+ 'uploader_id': '10922353@N03',
+ 'uploader': 'Forest Wander',
+ 'comment_count': int,
+ 'view_count': int,
+ 'tags': list,
}
}
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ _API_BASE_URL = 'https://api.flickr.com/services/rest?'
- video_id = mobj.group('id')
- video_uploader_id = mobj.group('uploader_id')
- webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
- req = sanitized_Request(webpage_url)
- req.add_header(
- 'User-Agent',
- # it needs a more recent version
- 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20150101 Firefox/38.0 (Chrome)')
- webpage = self._download_webpage(req, video_id)
+ def _call_api(self, method, video_id, api_key, note, secret=None):
+ query = {
+ 'photo_id': video_id,
+ 'method': 'flickr.%s' % method,
+ 'api_key': api_key,
+ 'format': 'json',
+ 'nojsoncallback': 1,
+ }
+ if secret:
+ query['secret'] = secret
+ data = self._download_json(self._API_BASE_URL + compat_urllib_parse.urlencode(query), video_id, note)
+ if data['stat'] != 'ok':
+ raise ExtractorError(data['message'])
+ return data
- secret = self._search_regex(r'secret"\s*:\s*"(\w+)"', webpage, 'secret')
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
- first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
- first_xml = self._download_xml(first_url, video_id, 'Downloading first data webpage')
+ api_key = self._download_json(
+ 'https://www.flickr.com/hermes_error_beacon.gne', video_id,
+ 'Downloading api key')['site_key']
- node_id = find_xpath_attr(
- first_xml, './/{http://video.yahoo.com/YEP/1.0/}Item', 'id',
- 'id').text
+ video_info = self._call_api(
+ 'photos.getInfo', video_id, api_key, 'Downloading video info')['photo']
+ if video_info['media'] == 'video':
+ streams = self._call_api(
+ 'video.getStreamInfo', video_id, api_key,
+ 'Downloading streams info', video_info['secret'])['streams']
- second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
- second_xml = self._download_xml(second_url, video_id, 'Downloading second data webpage')
+ preference = qualities(
+ ['288p', 'iphone_wifi', '100', '300', '700', '360p', 'appletv', '720p', '1080p', 'orig'])
- self.report_extraction(video_id)
+ formats = []
+ for stream in streams['stream']:
+ stream_type = str(stream.get('type'))
+ formats.append({
+ 'format_id': stream_type,
+ 'url': stream['_content'],
+ 'preference': preference(stream_type),
+ })
+ self._sort_formats(formats)
- stream = second_xml.find('.//STREAM')
- if stream is None:
- raise ExtractorError('Unable to extract video url')
- video_url = stream.attrib['APP'] + stream.attrib['FULLPATH']
+ owner = video_info.get('owner', {})
- return {
- 'id': video_id,
- 'url': video_url,
- 'ext': 'mp4',
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'uploader_id': video_uploader_id,
- }
+ return {
+ 'id': video_id,
+ 'title': video_info['title']['_content'],
+ 'description': video_info.get('description', {}).get('_content'),
+ 'formats': formats,
+ 'timestamp': int_or_none(video_info.get('dateuploaded')),
+ 'duration': int_or_none(video_info.get('video', {}).get('duration')),
+ 'uploader_id': owner.get('nsid'),
+ 'uploader': owner.get('realname'),
+ 'comment_count': int_or_none(video_info.get('comments', {}).get('_content')),
+ 'view_count': int_or_none(video_info.get('views')),
+ 'tags': [tag.get('_content') for tag in video_info.get('tags', {}).get('tag', [])]
+ }
+ else:
+ raise ExtractorError('not a video', expected=True)
diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py
index 3a4a59135..318ac013d 100644
--- a/youtube_dl/extractor/foxnews.py
+++ b/youtube_dl/extractor/foxnews.py
@@ -2,14 +2,10 @@ from __future__ import unicode_literals
import re
-from .common import InfoExtractor
-from ..utils import (
- parse_iso8601,
- int_or_none,
-)
+from .amp import AMPIE
-class FoxNewsIE(InfoExtractor):
+class FoxNewsIE(AMPIE):
IE_DESC = 'Fox News and Fox Business Video'
_VALID_URL = r'https?://(?P<host>video\.fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [
@@ -20,10 +16,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3937480',
'ext': 'flv',
'title': 'Frozen in Time',
- 'description': 'Doctors baffled by 16-year-old girl that is the size of a toddler',
+ 'description': '16-year-old girl is size of toddler',
'duration': 265,
- 'timestamp': 1304411491,
- 'upload_date': '20110503',
+ # 'timestamp': 1304411491,
+ # 'upload_date': '20110503',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
@@ -34,10 +30,10 @@ class FoxNewsIE(InfoExtractor):
'id': '3922535568001',
'ext': 'mp4',
'title': "Rep. Luis Gutierrez on if Obama's immigration plan is legal",
- 'description': "Congressman discusses the president's executive action",
+ 'description': "Congressman discusses president's plan",
'duration': 292,
- 'timestamp': 1417662047,
- 'upload_date': '20141204',
+ # 'timestamp': 1417662047,
+ # 'upload_date': '20141204',
'thumbnail': 're:^https?://.*\.jpg$',
},
},
@@ -52,52 +48,9 @@ class FoxNewsIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- host = mobj.group('host')
+ host, video_id = re.match(self._VALID_URL, url).groups()
- video = self._download_json(
- 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id), video_id)
-
- item = video['channel']['item']
- title = item['title']
- description = item['description']
- timestamp = parse_iso8601(item['dc-date'])
-
- media_group = item['media-group']
- duration = None
- formats = []
- for media in media_group['media-content']:
- attributes = media['@attributes']
- video_url = attributes['url']
- if video_url.endswith('.f4m'):
- formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', video_id))
- elif video_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(video_url, video_id, 'flv'))
- elif not video_url.endswith('.smil'):
- duration = int_or_none(attributes.get('duration'))
- formats.append({
- 'url': video_url,
- 'format_id': media['media-category']['@attributes']['label'],
- 'preference': 1,
- 'vbr': int_or_none(attributes.get('bitrate')),
- 'filesize': int_or_none(attributes.get('fileSize'))
- })
- self._sort_formats(formats)
-
- media_thumbnail = media_group['media-thumbnail']['@attributes']
- thumbnails = [{
- 'url': media_thumbnail['url'],
- 'width': int_or_none(media_thumbnail.get('width')),
- 'height': int_or_none(media_thumbnail.get('height')),
- }] if media_thumbnail else []
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'timestamp': timestamp,
- 'formats': formats,
- 'thumbnails': thumbnails,
- }
+ info = self._extract_feed_info(
+ 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
+ info['id'] = video_id
+ return info
diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py
index 6613ee17a..fdc51f44f 100644
--- a/youtube_dl/extractor/franceinter.py
+++ b/youtube_dl/extractor/franceinter.py
@@ -1,8 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import int_or_none
@@ -23,8 +21,7 @@ class FranceInterIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
@@ -33,7 +30,7 @@ class FranceInterIE(InfoExtractor):
video_url = 'http://www.franceinter.fr/' + path
title = self._html_search_regex(
- r'<span class="title">(.+?)</span>', webpage, 'title')
+ r'<span class="title-diffusion">(.+?)</span>', webpage, 'title')
description = self._html_search_regex(
r'<span class="description">(.*?)</span>',
webpage, 'description', fatal=False)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index c2e8f9b62..3c3066e38 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -44,7 +44,6 @@ from .myvi import MyviIE
from .condenast import CondeNastIE
from .udn import UDNEmbedIE
from .senateisvp import SenateISVPIE
-from .bliptv import BlipTVIE
from .svt import SVTIE
from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
@@ -55,6 +54,8 @@ from .snagfilms import SnagFilmsEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE
+from .googledrive import GoogleDriveIE
+from .jwplatform import JWPlatformIE
class GenericIE(InfoExtractor):
@@ -1440,11 +1441,6 @@ class GenericIE(InfoExtractor):
'id': match.group('id')
}
- # Look for embedded blip.tv player
- bliptv_url = BlipTVIE._extract_url(webpage)
- if bliptv_url:
- return self.url_result(bliptv_url, 'BlipTV')
-
# Look for SVT player
svt_url = SVTIE._extract_url(webpage)
if svt_url:
@@ -1769,6 +1765,11 @@ class GenericIE(InfoExtractor):
if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+ # Look for Google Drive embeds
+ google_drive_url = GoogleDriveIE._extract_url(webpage)
+ if google_drive_url:
+ return self.url_result(google_drive_url, 'GoogleDrive')
+
# Look for UDN embeds
mobj = re.search(
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
@@ -1796,6 +1797,11 @@ class GenericIE(InfoExtractor):
if snagfilms_url:
return self.url_result(snagfilms_url)
+ # Look for JWPlatform embeds
+ jwplatform_url = JWPlatformIE._extract_url(webpage)
+ if jwplatform_url:
+ return self.url_result(jwplatform_url, 'JWPlatform')
+
# Look for ScreenwaveMedia embeds
mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
if mobj is not None:
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
new file mode 100644
index 000000000..f354c9c7a
--- /dev/null
+++ b/youtube_dl/extractor/googledrive.py
@@ -0,0 +1,88 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class GoogleDriveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})'
+ _TEST = {
+ 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
+ 'md5': '881f7700aec4f538571fa1e0eed4a7b6',
+ 'info_dict': {
+ 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny.mp4',
+ 'duration': 46,
+ }
+ }
+ _FORMATS_EXT = {
+ '5': 'flv',
+ '6': 'flv',
+ '13': '3gp',
+ '17': '3gp',
+ '18': 'mp4',
+ '22': 'mp4',
+ '34': 'flv',
+ '35': 'flv',
+ '36': '3gp',
+ '37': 'mp4',
+ '38': 'mp4',
+ '43': 'webm',
+ '44': 'webm',
+ '45': 'webm',
+ '46': 'webm',
+ '59': 'mp4',
+ }
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
+ webpage)
+ if mobj:
+ return 'https://drive.google.com/file/d/%s' % mobj.group('id')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
+
+ reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
+ if reason:
+ raise ExtractorError(reason)
+
+ title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
+ duration = int_or_none(self._search_regex(
+ r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
+ fmt_stream_map = self._search_regex(
+ r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
+ fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
+
+ formats = []
+ for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
+ fmt_id, fmt_url = fmt_stream.split('|')
+ resolution = fmt.split('/')[1]
+ width, height = resolution.split('x')
+ formats.append({
+ 'url': fmt_url,
+ 'format_id': fmt_id,
+ 'resolution': resolution,
+ 'width': int_or_none(width),
+ 'height': int_or_none(height),
+ 'ext': self._FORMATS_EXT[fmt_id],
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py
index 70c8ca64e..85e9344aa 100644
--- a/youtube_dl/extractor/imgur.py
+++ b/youtube_dl/extractor/imgur.py
@@ -13,7 +13,7 @@ from ..utils import (
class ImgurIE(InfoExtractor):
- _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!gallery)(?P<id>[a-zA-Z0-9]+)'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
_TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -21,7 +21,7 @@ class ImgurIE(InfoExtractor):
'id': 'A61SaA1',
'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
- 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
+ 'description': 'Imgur: The most awesome images on the Internet.',
},
}, {
'url': 'https://imgur.com/A61SaA1',
@@ -29,8 +29,20 @@ class ImgurIE(InfoExtractor):
'id': 'A61SaA1',
'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
- 'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$',
+ 'description': 'Imgur: The most awesome images on the Internet.',
},
+ }, {
+ 'url': 'https://imgur.com/gallery/YcAQlkx',
+ 'info_dict': {
+ 'id': 'YcAQlkx',
+ 'ext': 'mp4',
+ 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
+ 'description': 'Imgur: The most awesome images on the Internet.'
+
+ }
+ }, {
+ 'url': 'http://imgur.com/topic/Funny/N8rOudd',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -100,25 +112,38 @@ class ImgurIE(InfoExtractor):
class ImgurAlbumIE(InfoExtractor):
- _VALID_URL = r'https?://(?:i\.)?imgur\.com/gallery/(?P<id>[a-zA-Z0-9]+)'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:a|gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{5})(?:[/?#&]+)?$'
- _TEST = {
+ _TESTS = [{
'url': 'http://imgur.com/gallery/Q95ko',
'info_dict': {
'id': 'Q95ko',
},
'playlist_count': 25,
- }
+ }, {
+ 'url': 'http://imgur.com/a/j6Orj',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://imgur.com/topic/Aww/ll5Vk',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
album_id = self._match_id(url)
album_images = self._download_json(
'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
- album_id)['data']['images']
-
- entries = [
- self.url_result('http://imgur.com/%s' % image['hash'])
- for image in album_images if image.get('hash')]
-
- return self.playlist_result(entries, album_id)
+ album_id, fatal=False)
+
+ if album_images:
+ data = album_images.get('data')
+ if data and isinstance(data, dict):
+ images = data.get('images')
+ if images and isinstance(images, list):
+ entries = [
+ self.url_result('http://imgur.com/%s' % image['hash'])
+ for image in images if image.get('hash')]
+ return self.playlist_result(entries, album_id)
+
+ # Fallback to single video
+ return self.url_result('http://imgur.com/%s' % album_id, ImgurIE.ie_key())
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index c158f2064..e5e16ca3b 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -47,7 +47,7 @@ class InstagramIE(InfoExtractor):
class InstagramUserIE(InfoExtractor):
- _VALID_URL = r'https://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user'
_TEST = {
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py
new file mode 100644
index 000000000..cdc095a79
--- /dev/null
+++ b/youtube_dl/extractor/jwplatform.py
@@ -0,0 +1,71 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class JWPlatformIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+ _TEST = {
+ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
+ 'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
+ 'info_dict': {
+ 'id': 'nPripu9l',
+ 'ext': 'mov',
+ 'title': 'Big Buck Bunny Trailer',
+ 'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
+ 'upload_date': '20081127',
+ 'timestamp': 1227796140,
+ }
+ }
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<script[^>]+?src=["\'](?P<url>(?:https?:)?//content.jwplatform.com/players/[a-zA-Z0-9]{8})',
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
+ video_data = json_data['playlist'][0]
+ subtitles = {}
+ for track in video_data['tracks']:
+ if track['kind'] == 'captions':
+ subtitles[track['label']] = [{'url': self._proto_relative_url(track['file'])}]
+
+ formats = []
+ for source in video_data['sources']:
+ source_url = self._proto_relative_url(source['file'])
+ source_type = source.get('type') or ''
+ if source_type == 'application/vnd.apple.mpegurl':
+ m3u8_formats = self._extract_m3u8_formats(source_url, video_id, 'mp4', 'm3u8_native', fatal=None)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ elif source_type.startswith('audio'):
+ formats.append({
+ 'url': source_url,
+ 'vcodec': 'none',
+ })
+ else:
+ formats.append({
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_data['title'],
+ 'description': video_data.get('description'),
+ 'thumbnail': self._proto_relative_url(video_data.get('image')),
+ 'timestamp': int_or_none(video_data.get('pubdate')),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/makertv.py b/youtube_dl/extractor/makertv.py
new file mode 100644
index 000000000..3c34d4604
--- /dev/null
+++ b/youtube_dl/extractor/makertv.py
@@ -0,0 +1,32 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MakerTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
+ _TEST = {
+ 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
+ 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
+ 'info_dict': {
+ 'id': 'Fh3QgymL9gsc',
+ 'ext': 'mp4',
+ 'title': 'Maze Runner: The Scorch Trials Official Movie Review',
+ 'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
+ 'upload_date': '20150918',
+ 'timestamp': 1442549540,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': 'jwplatform:%s' % jwplatform_id,
+ 'ie_key': 'JWPlatform',
+ }
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 744e4a09a..97e8ffc97 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -16,7 +16,7 @@ from ..utils import (
class PBSIE(InfoExtractor):
_STATIONS = (
- (r'(?:video|www)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
+ (r'(?:video|www|player)\.pbs\.org', 'PBS: Public Broadcasting Service'), # http://www.pbs.org/
(r'video\.aptv\.org', 'APT - Alabama Public Television (WBIQ)'), # http://aptv.org/
(r'video\.gpb\.org', 'GPB/Georgia Public Broadcasting (WGTV)'), # http://www.gpb.org/
(r'video\.mpbonline\.org', 'Mississippi Public Broadcasting (WMPN)'), # http://www.mpbonline.org
diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py
index 63cc764bb..514e9b433 100644
--- a/youtube_dl/extractor/periscope.py
+++ b/youtube_dl/extractor/periscope.py
@@ -31,9 +31,8 @@ class PeriscopeIE(InfoExtractor):
}]
def _call_api(self, method, value):
- attribute = 'token' if len(value) > 13 else 'broadcast_id'
return self._download_json(
- 'https://api.periscope.tv/api/v2/%s?%s=%s' % (method, attribute, value), value)
+ 'https://api.periscope.tv/api/v2/%s?broadcast_id=%s' % (method, value), value)
def _real_extract(self, url):
token = self._match_id(url)
diff --git a/youtube_dl/extractor/soompi.py b/youtube_dl/extractor/soompi.py
deleted file mode 100644
index 5da66ca9e..000000000
--- a/youtube_dl/extractor/soompi.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# encoding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .crunchyroll import CrunchyrollIE
-
-from .common import InfoExtractor
-from ..compat import compat_HTTPError
-from ..utils import (
- ExtractorError,
- int_or_none,
- remove_start,
- xpath_text,
-)
-
-
-class SoompiBaseIE(InfoExtractor):
- def _get_episodes(self, webpage, episode_filter=None):
- episodes = self._parse_json(
- self._search_regex(
- r'VIDEOS\s*=\s*(\[.+?\]);', webpage, 'episodes JSON'),
- None)
- return list(filter(episode_filter, episodes))
-
-
-class SoompiIE(SoompiBaseIE, CrunchyrollIE):
- IE_NAME = 'soompi'
- _VALID_URL = r'https?://tv\.soompi\.com/(?:en/)?watch/(?P<id>[0-9]+)'
- _TESTS = [{
- 'url': 'http://tv.soompi.com/en/watch/29235',
- 'info_dict': {
- 'id': '29235',
- 'ext': 'mp4',
- 'title': 'Episode 1096',
- 'description': '2015-05-20'
- },
- 'params': {
- 'skip_download': True,
- },
- }]
-
- def _get_episode(self, webpage, video_id):
- return self._get_episodes(webpage, lambda x: x['id'] == video_id)[0]
-
- def _get_subtitles(self, config, video_id):
- sub_langs = {}
- for subtitle in config.findall('./{default}preload/subtitles/subtitle'):
- sub_langs[subtitle.attrib['id']] = subtitle.attrib['title']
-
- subtitles = {}
- for s in config.findall('./{default}preload/subtitle'):
- lang_code = sub_langs.get(s.attrib['id'])
- if not lang_code:
- continue
- sub_id = s.get('id')
- data = xpath_text(s, './data', 'data')
- iv = xpath_text(s, './iv', 'iv')
- if not id or not iv or not data:
- continue
- subtitle = self._decrypt_subtitles(data, iv, sub_id).decode('utf-8')
- subtitles[lang_code] = self._extract_subtitles(subtitle)
- return subtitles
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- try:
- webpage = self._download_webpage(
- url, video_id, 'Downloading episode page')
- except ExtractorError as ee:
- if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
- webpage = ee.cause.read()
- block_message = self._html_search_regex(
- r'(?s)<div class="block-message">(.+?)</div>', webpage,
- 'block message', default=None)
- if block_message:
- raise ExtractorError(block_message, expected=True)
- raise
-
- formats = []
- config = None
- for format_id in re.findall(r'\?quality=([0-9a-zA-Z]+)', webpage):
- config = self._download_xml(
- 'http://tv.soompi.com/en/show/_/%s-config.xml?mode=hls&quality=%s' % (video_id, format_id),
- video_id, 'Downloading %s XML' % format_id)
- m3u8_url = xpath_text(
- config, './{default}preload/stream_info/file',
- '%s m3u8 URL' % format_id)
- if not m3u8_url:
- continue
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', m3u8_id=format_id))
- self._sort_formats(formats)
-
- episode = self._get_episode(webpage, video_id)
-
- title = episode['name']
- description = episode.get('description')
- duration = int_or_none(episode.get('duration'))
-
- thumbnails = [{
- 'id': thumbnail_id,
- 'url': thumbnail_url,
- } for thumbnail_id, thumbnail_url in episode.get('img_url', {}).items()]
-
- subtitles = self.extract_subtitles(config, video_id)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnails': thumbnails,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles
- }
-
-
-class SoompiShowIE(SoompiBaseIE):
- IE_NAME = 'soompi:show'
- _VALID_URL = r'https?://tv\.soompi\.com/en/shows/(?P<id>[0-9a-zA-Z\-_]+)'
- _TESTS = [{
- 'url': 'http://tv.soompi.com/en/shows/liar-game',
- 'info_dict': {
- 'id': 'liar-game',
- 'title': 'Liar Game',
- 'description': 'md5:52c02bce0c1a622a95823591d0589b66',
- },
- 'playlist_count': 14,
- }]
-
- def _real_extract(self, url):
- show_id = self._match_id(url)
-
- webpage = self._download_webpage(
- url, show_id, 'Downloading show page')
-
- title = remove_start(self._og_search_title(webpage), 'SoompiTV | ')
- description = self._og_search_description(webpage)
-
- entries = [
- self.url_result('http://tv.soompi.com/en/watch/%s' % episode['id'], 'Soompi')
- for episode in self._get_episodes(webpage)]
-
- return self.playlist_result(entries, show_id, title, description)
diff --git a/youtube_dl/extractor/tele13.py b/youtube_dl/extractor/tele13.py
new file mode 100644
index 000000000..a363b4d40
--- /dev/null
+++ b/youtube_dl/extractor/tele13.py
@@ -0,0 +1,81 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ js_to_json,
+ qualities,
+ determine_ext,
+)
+
+
+class Tele13IE(InfoExtractor):
+ _VALID_URL = r'^http://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
+ _TESTS = [
+ {
+ 'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
+ 'md5': '4cb1fa38adcad8fea88487a078831755',
+ 'info_dict': {
+ 'id': 'el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
+ 'ext': 'mp4',
+ 'title': 'El círculo de hierro de Michelle Bachelet en su regreso a La Moneda',
+ },
+ 'params': {
+ # HTTP Error 404: Not Found
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.t13.cl/videos/mundo/tendencias/video-captan-misteriosa-bola-fuego-cielos-bangkok',
+ 'md5': '867adf6a3b3fef932c68a71d70b70946',
+ 'info_dict': {
+ 'id': 'rOoKv2OMpOw',
+ 'ext': 'mp4',
+ 'title': 'Shooting star seen on 7-Sep-2015',
+ 'description': 'md5:7292ff2a34b2f673da77da222ae77e1e',
+ 'uploader': 'Porjai Jaturongkhakun',
+ 'upload_date': '20150906',
+ 'uploader_id': 'UCnLY_3ezwNcDSC_Wc6suZxw',
+ },
+ 'add_ie': ['Youtube'],
+ }
+ ]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ setup_js = self._search_regex(r"(?s)jwplayer\('player-vivo'\).setup\((\{.*?\})\)", webpage, 'setup code')
+ sources = self._parse_json(self._search_regex(r'sources\s*:\s*(\[[^\]]+\])', setup_js, 'sources'), display_id, js_to_json)
+
+ preference = qualities(['Móvil', 'SD', 'HD'])
+ formats = []
+ urls = []
+ for f in sources:
+ format_url = f['file']
+ if format_url and format_url not in urls:
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(format_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ elif YoutubeIE.suitable(format_url):
+ return self.url_result(format_url, 'Youtube')
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': f.get('label'),
+ 'preference': preference(f.get('label')),
+ 'ext': ext,
+ })
+ urls.append(format_url)
+ self._sort_formats(formats)
+
+ return {
+ 'id': display_id,
+ 'title': self._search_regex(r'title\s*:\s*"([^"]+)"', setup_js, 'title'),
+ 'description': self._html_search_meta('description', webpage, 'description'),
+ 'thumbnail': self._search_regex(r'image\s*:\s*"([^"]+)"', setup_js, 'thumbnail', default=None),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py
index c1ee1decc..e03e2dbaa 100644
--- a/youtube_dl/extractor/twentyfourvideo.py
+++ b/youtube_dl/extractor/twentyfourvideo.py
@@ -5,6 +5,8 @@ from .common import InfoExtractor
from ..utils import (
parse_iso8601,
int_or_none,
+ xpath_attr,
+ xpath_element,
)
@@ -15,7 +17,7 @@ class TwentyFourVideoIE(InfoExtractor):
_TESTS = [
{
'url': 'http://www.24video.net/video/view/1044982',
- 'md5': 'd041af8b5b4246ea466226a0d6693345',
+ 'md5': 'e09fc0901d9eaeedac872f154931deeb',
'info_dict': {
'id': '1044982',
'ext': 'mp4',
@@ -64,33 +66,24 @@ class TwentyFourVideoIE(InfoExtractor):
r'<div class="comments-title" id="comments-count">(\d+) комментари',
webpage, 'comment count', fatal=False))
- formats = []
+ # Sets some cookies
+ self._download_xml(
+ r'http://www.24video.net/video/xml/%s?mode=init' % video_id,
+ video_id, 'Downloading init XML')
- pc_video = self._download_xml(
+ video_xml = self._download_xml(
'http://www.24video.net/video/xml/%s?mode=play' % video_id,
- video_id, 'Downloading PC video URL').find('.//video')
+ video_id, 'Downloading video XML')
- formats.append({
- 'url': pc_video.attrib['url'],
- 'format_id': 'pc',
- 'quality': 1,
- })
+ video = xpath_element(video_xml, './/video', 'video', fatal=True)
- like_count = int_or_none(pc_video.get('ratingPlus'))
- dislike_count = int_or_none(pc_video.get('ratingMinus'))
- age_limit = 18 if pc_video.get('adult') == 'true' else 0
+ formats = [{
+ 'url': xpath_attr(video, '', 'url', 'video URL', fatal=True),
+ }]
- mobile_video = self._download_xml(
- 'http://www.24video.net/video/xml/%s' % video_id,
- video_id, 'Downloading mobile video URL').find('.//video')
-
- formats.append({
- 'url': mobile_video.attrib['url'],
- 'format_id': 'mobile',
- 'quality': 0,
- })
-
- self._sort_formats(formats)
+ like_count = int_or_none(video.get('ratingPlus'))
+ dislike_count = int_or_none(video.get('ratingMinus'))
+ age_limit = 18 if video.get('adult') == 'true' else 0
return {
'id': video_id,
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
index f38a72fde..811ee197d 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -4,26 +4,48 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from .xstream import XstreamIE
from ..utils import (
ExtractorError,
float_or_none,
)
-class VGTVIE(InfoExtractor):
- IE_DESC = 'VGTV and BTTV'
+class VGTVIE(XstreamIE):
+ IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
+
+ _HOST_TO_APPNAME = {
+ 'vgtv.no': 'vgtv',
+ 'bt.no/tv': 'bttv',
+ 'aftenbladet.no/tv': 'satv',
+ 'fvn.no/fvntv': 'fvntv',
+ 'aftenposten.no/webtv': 'aptv',
+ }
+
+ _APP_NAME_TO_VENDOR = {
+ 'vgtv': 'vgtv',
+ 'bttv': 'bt',
+ 'satv': 'sa',
+ 'fvntv': 'fvn',
+ 'aptv': 'ap',
+ }
+
_VALID_URL = r'''(?x)
- (?:
- vgtv:|
- http://(?:www\.)?
+ (?:https?://(?:www\.)?
+ (?P<host>
+ %s
)
- (?P<host>vgtv|bt)
+ /
(?:
- :|
- \.no/(?:tv/)?\#!/(?:video|live)/
- )
- (?P<id>[0-9]+)
- '''
+ \#!/(?:video|live)/|
+ embed?.*id=
+ )|
+ (?P<appname>
+ %s
+ ):)
+ (?P<id>\d+)
+ ''' % ('|'.join(_HOST_TO_APPNAME.keys()), '|'.join(_APP_NAME_TO_VENDOR.keys()))
+
_TESTS = [
{
# streamType: vod
@@ -59,17 +81,18 @@ class VGTVIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
+ 'skip': 'Video is no longer available',
},
{
- # streamType: live
+ # streamType: wasLive
'url': 'http://www.vgtv.no/#!/live/113063/direkte-v75-fra-solvalla',
'info_dict': {
'id': '113063',
- 'ext': 'flv',
- 'title': 're:^DIREKTE: V75 fra Solvalla [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'ext': 'mp4',
+ 'title': 'V75 fra Solvalla 30.05.15',
'description': 'md5:b3743425765355855f88e096acc93231',
'thumbnail': 're:^https?://.*\.jpg',
- 'duration': 0,
+ 'duration': 25966,
'timestamp': 1432975582,
'upload_date': '20150530',
'view_count': int,
@@ -80,6 +103,20 @@ class VGTVIE(InfoExtractor):
},
},
{
+ 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
+ 'md5': 'fd828cd29774a729bf4d4425fe192972',
+ 'info_dict': {
+ 'id': '21039',
+ 'ext': 'mov',
+ 'title': 'TRAILER: «SWEATSHOP» - I can´t take any more',
+ 'description': 'md5:21891f2b0dd7ec2f78d84a50e54f8238',
+ 'duration': 66,
+ 'timestamp': 1417002452,
+ 'upload_date': '20141126',
+ 'view_count': int,
+ }
+ },
+ {
'url': 'http://www.bt.no/tv/#!/video/100250/norling-dette-er-forskjellen-paa-1-divisjon-og-eliteserien',
'only_matching': True,
},
@@ -89,21 +126,27 @@ class VGTVIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
host = mobj.group('host')
-
- HOST_WEBSITES = {
- 'vgtv': 'vgtv',
- 'bt': 'bttv',
- }
+ appname = self._HOST_TO_APPNAME[host] if host else mobj.group('appname')
+ vendor = self._APP_NAME_TO_VENDOR[appname]
data = self._download_json(
'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website'
- % (host, video_id, HOST_WEBSITES[host]),
+ % (vendor, video_id, appname),
video_id, 'Downloading media JSON')
if data.get('status') == 'inactive':
raise ExtractorError(
'Video %s is no longer available' % video_id, expected=True)
+ info = {
+ 'formats': [],
+ }
+ if len(video_id) == 5:
+ if appname == 'bttv':
+ info = self._extract_video_info('btno', video_id)
+ elif appname == 'aptv':
+ info = self._extract_video_info('ap', video_id)
+
streams = data['streamUrls']
stream_type = data.get('streamType')
@@ -111,48 +154,53 @@ class VGTVIE(InfoExtractor):
hls_url = streams.get('hls')
if hls_url:
- formats.extend(self._extract_m3u8_formats(
- hls_url, video_id, 'mp4', m3u8_id='hls'))
+ m3u8_formats = self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
hds_url = streams.get('hds')
# wasLive hds are always 404
if hds_url and stream_type != 'wasLive':
- formats.extend(self._extract_f4m_formats(
- hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
- video_id, f4m_id='hds'))
+ f4m_formats = self._extract_f4m_formats(
+ hds_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', video_id, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ mp4_urls = streams.get('pseudostreaming') or []
mp4_url = streams.get('mp4')
if mp4_url:
- _url = hls_url or hds_url
- MP4_URL_TEMPLATE = '%s/%%s.%s' % (mp4_url.rpartition('/')[0], mp4_url.rpartition('.')[-1])
- for mp4_format in _url.split(','):
- m = re.search('(?P<width>\d+)_(?P<height>\d+)_(?P<vbr>\d+)', mp4_format)
- if not m:
- continue
- width = int(m.group('width'))
- height = int(m.group('height'))
- vbr = int(m.group('vbr'))
- formats.append({
- 'url': MP4_URL_TEMPLATE % mp4_format,
- 'format_id': 'mp4-%s' % vbr,
- 'width': width,
- 'height': height,
- 'vbr': vbr,
- 'preference': 1,
+ mp4_urls.append(mp4_url)
+ for mp4_url in mp4_urls:
+ format_info = {
+ 'url': mp4_url,
+ }
+ mobj = re.search('(\d+)_(\d+)_(\d+)', mp4_url)
+ if mobj:
+ tbr = int(mobj.group(3))
+ format_info.update({
+ 'width': int(mobj.group(1)),
+ 'height': int(mobj.group(2)),
+ 'tbr': tbr,
+ 'format_id': 'mp4-%s' % tbr,
})
- self._sort_formats(formats)
+ formats.append(format_info)
+
+ info['formats'].extend(formats)
+
+ self._sort_formats(info['formats'])
- return {
+ info.update({
'id': video_id,
- 'title': self._live_title(data['title']),
+ 'title': self._live_title(data['title']) if stream_type == 'live' else data['title'],
'description': data['description'],
'thumbnail': data['images']['main'] + '?t[]=900x506q80',
'timestamp': data['published'],
'duration': float_or_none(data['duration'], 1000),
'view_count': data['displays'],
- 'formats': formats,
'is_live': True if stream_type == 'live' else False,
- }
+ })
+ return info
class BTArticleIE(InfoExtractor):
@@ -161,7 +209,7 @@ class BTArticleIE(InfoExtractor):
_VALID_URL = 'http://(?:www\.)?bt\.no/(?:[^/]+/)+(?P<id>[^/]+)-\d+\.html'
_TEST = {
'url': 'http://www.bt.no/nyheter/lokalt/Kjemper-for-internatet-1788214.html',
- 'md5': 'd055e8ee918ef2844745fcfd1a4175fb',
+ 'md5': '2acbe8ad129b3469d5ae51b1158878df',
'info_dict': {
'id': '23199',
'ext': 'mp4',
@@ -178,15 +226,15 @@ class BTArticleIE(InfoExtractor):
def _real_extract(self, url):
webpage = self._download_webpage(url, self._match_id(url))
video_id = self._search_regex(
- r'SVP\.Player\.load\(\s*(\d+)', webpage, 'video id')
- return self.url_result('vgtv:bt:%s' % video_id, 'VGTV')
+ r'<video[^>]+data-id="(\d+)"', webpage, 'video id')
+ return self.url_result('bttv:%s' % video_id, 'VGTV')
class BTVestlendingenIE(InfoExtractor):
IE_NAME = 'bt:vestlendingen'
IE_DESC = 'Bergens Tidende - Vestlendingen'
_VALID_URL = 'http://(?:www\.)?bt\.no/spesial/vestlendingen/#!/(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.bt.no/spesial/vestlendingen/#!/86588',
'md5': 'd7d17e3337dc80de6d3a540aefbe441b',
'info_dict': {
@@ -197,7 +245,19 @@ class BTVestlendingenIE(InfoExtractor):
'timestamp': 1430473209,
'upload_date': '20150501',
},
- }
+ 'skip': '404 Error',
+ }, {
+ 'url': 'http://www.bt.no/spesial/vestlendingen/#!/86255',
+ 'md5': 'a2893f8632e96389f4bdf36aa9463ceb',
+ 'info_dict': {
+ 'id': '86255',
+ 'ext': 'mov',
+ 'title': 'Du må tåle å fryse og være sulten',
+ 'description': 'md5:b8046f4d022d5830ddab04865791d063',
+ 'upload_date': '20150321',
+ 'timestamp': 1426942023,
+ },
+ }]
def _real_extract(self, url):
- return self.url_result('xstream:btno:%s' % self._match_id(url), 'Xstream')
+ return self.url_result('bttv:%s' % self._match_id(url), 'VGTV')
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index a63c23617..ca3f20a3d 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -30,6 +30,12 @@ class VikiBaseIE(InfoExtractor):
_token = None
+ _ERRORS = {
+ 'geo': 'Sorry, this content is not available in your region.',
+ 'upcoming': 'Sorry, this content is not yet available.',
+ # 'paywall': 'paywall',
+ }
+
def _prepare_call(self, path, timestamp=None, post_data=None):
path += '?' if '?' not in path else '&'
if not timestamp:
@@ -67,6 +73,12 @@ class VikiBaseIE(InfoExtractor):
'%s returned error: %s' % (self.IE_NAME, error),
expected=True)
+ def _check_errors(self, data):
+ for reason, status in data.get('blocking', {}).items():
+ if status and reason in self._ERRORS:
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, self._ERRORS[reason]), expected=True)
+
def _real_initialize(self):
self._login()
@@ -193,6 +205,7 @@ class VikiIE(VikiBaseIE):
'timestamp': 1321985454,
'description': 'md5:44b1e46619df3a072294645c770cef36',
'title': 'Love In Magic',
+ 'age_limit': 13,
},
}]
@@ -202,6 +215,8 @@ class VikiIE(VikiBaseIE):
video = self._call_api(
'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
+ self._check_errors(video)
+
title = self.dict_selection(video.get('titles', {}), 'en')
if not title:
title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
@@ -262,8 +277,11 @@ class VikiIE(VikiBaseIE):
r'^(\d+)[pP]$', format_id, 'height', default=None))
for protocol, format_dict in stream_dict.items():
if format_id == 'm3u8':
- formats = self._extract_m3u8_formats(
- format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol)
+ m3u8_formats = self._extract_m3u8_formats(
+ format_dict['url'], video_id, 'mp4', 'm3u8_native',
+ m3u8_id='m3u8-%s' % protocol, fatal=None)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
else:
formats.append({
'url': format_dict['url'],
@@ -315,6 +333,8 @@ class VikiChannelIE(VikiBaseIE):
'containers/%s.json' % channel_id, channel_id,
'Downloading channel JSON')
+ self._check_errors(channel)
+
title = self.dict_selection(channel['titles'], 'en')
description = self.dict_selection(channel['descriptions'], 'en')
diff --git a/youtube_dl/extractor/xstream.py b/youtube_dl/extractor/xstream.py
index 71584c291..76c91bd92 100644
--- a/youtube_dl/extractor/xstream.py
+++ b/youtube_dl/extractor/xstream.py
@@ -42,11 +42,7 @@ class XstreamIE(InfoExtractor):
'only_matching': True,
}]
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- partner_id = mobj.group('partner_id')
- video_id = mobj.group('id')
-
+ def _extract_video_info(self, partner_id, video_id):
data = self._download_xml(
'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s'
% (partner_id, video_id),
@@ -97,6 +93,7 @@ class XstreamIE(InfoExtractor):
formats.append({
'url': link.get('href'),
'format_id': link.get('rel'),
+ 'preference': 1,
})
thumbnails = [{
@@ -113,3 +110,10 @@ class XstreamIE(InfoExtractor):
'formats': formats,
'thumbnails': thumbnails,
}
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ partner_id = mobj.group('partner_id')
+ video_id = mobj.group('id')
+
+ return self._extract_video_info(partner_id, video_id)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 01607693e..255d64269 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.12.18'
+__version__ = '2015.12.23'