diff options
31 files changed, 845 insertions, 310 deletions
| @@ -400,7 +400,7 @@ which means you can modify it, redistribute it or use it however you like.                                       downloading, similar to find's -exec                                       syntax. Example: --exec 'adb push {}                                       /sdcard/Music/ && rm {}' -    --convert-subtitles FORMAT       Convert the subtitles to other format +    --convert-subs FORMAT            Convert the subtitles to other format                                       (currently supported: srt|ass|vtt)  # CONFIGURATION diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1646277ec..84c166805 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -125,7 +125,7 @@   - **dailymotion:user**   - **DailymotionCloud**   - **daum.net** - - **daum.net** + - **daum.net:clip**   - **DBTV**   - **DCN**   - **dcn:live** @@ -164,7 +164,7 @@   - **Eporner**   - **EroProfile**   - **Escapist** - - **ESPN** (Currently broken) + - **ESPN**   - **EsriVideo**   - **Europa**   - **EveryonesMixtape** @@ -181,6 +181,7 @@   - **Flickr**   - **Folketinget**: Folketinget (ft.dk; Danish parliament)   - **FootyRoom** + - **FOX**   - **Foxgay**   - **FoxNews**: Fox News and Fox Business Video   - **FoxSports** @@ -259,7 +260,6 @@   - **JeuxVideo**   - **Jove**   - **jpopsuki.tv** - - **Jukebox**   - **JWPlatform**   - **Kaltura**   - **KanalPlay**: Kanal 5/9/11 Play @@ -367,11 +367,13 @@   - **Newstube**   - **NextMedia**: 蘋果日報   - **NextMediaActionNews**: 蘋果日報 - 動新聞 + - **nextmovie.com**   - **nfb**: National Film Board of Canada   - **nfl.com**   - **nhl.com**   - **nhl.com:news**: NHL news   - **nhl.com:videocenter**: NHL videocenter category + - **nick.com**   - **niconico**: ニコニコ動画   - **NiconicoPlaylist**   - **njoy**: N-JOY @@ -405,17 +407,19 @@   - **OnionStudios**   - **Ooyala**   - **OoyalaExternal** + - **OraTV**   - **orf:fm4**: radio FM4   - **orf:iptv**: iptv.ORF.at   - **orf:oe1**: Radio Österreich 1   - **orf:tvthek**: ORF TVthek + - **pandora.tv**: 판도라TV   - **parliamentlive.tv**: UK parliament videos   - **Patreon**   - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET  (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)   - **pcmag**   - **Periscope**: Periscope   - **PhilharmonieDeParis**: Philharmonie de Paris - - **Phoenix** + - **phoenix.de**   - **Photobucket**   - **Pinkbike**   - **Pladform** @@ -457,6 +461,7 @@   - **RBMARadio**   - **RDS**: RDS.ca   - **RedTube** + - **RegioTV**   - **Restudy**   - **ReverbNation**   - **RingTV** @@ -535,7 +540,8 @@   - **SportBoxEmbed**   - **SportDeutschland**   - **Sportschau** - - **Srf** + - **SRGSSR** + - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites   - **SSA**   - **stanfordoc**: Stanford Open ClassRoom   - **Steam** @@ -579,7 +585,6 @@   - **THVideo**   - **THVideoPlaylist**   - **tinypic**: tinypic.com videos - - **tlc.com**   - **tlc.de**   - **TMZ**   - **TMZArticle** @@ -608,6 +613,7 @@   - **TVC**   - **TVCArticle**   - **tvigle**: Интернет-телевидение Tvigle.ru + - **tvland.com**   - **tvp.pl**   - **tvp.pl:Series**   - **TVPlay**: TV3Play and related services @@ -646,6 +652,9 @@   - **VideoDetective**   - **videofy.me**   - **VideoMega** + - **videomore** + - **videomore:season** + - **videomore:video**   - **VideoPremium**   - **VideoTt**: video.tt - Your True Tube   - **videoweed**: VideoWeed diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 50425b8d7..3b2be3159 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1791,6 +1791,10 @@ class YoutubeDL(object):          res = ''          if fdict.get('ext') in ['f4f', 'f4m']:              res += '(unsupported) ' +        if fdict.get('language'): +            if res: +                res += ' ' +            res += '[%s]' % fdict['language']          if fdict.get('format_note') is not None:              res += fdict['format_note'] + ' '          if fdict.get('tbr') is not None: diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fb7151443..4c7e5223d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -203,6 +203,7 @@ from .flickr import FlickrIE  from .folketinget import FolketingetIE  from .footyroom import FootyRoomIE  from .fourtube import FourTubeIE +from .fox import FOXIE  from .foxgay import FoxgayIE  from .foxnews import FoxNewsIE  from .foxsports import FoxSportsIE @@ -433,6 +434,7 @@ from .nextmedia import (      NextMediaActionNewsIE,      AppleDailyIE,  ) +from .nextmovie import NextMovieIE  from .nfb import NFBIE  from .nfl import NFLIE  from .nhl import ( @@ -440,6 +442,7 @@ from .nhl import (      NHLNewsIE,      NHLVideocenterIE,  ) +from .nick import NickIE  from .niconico import NiconicoIE, NiconicoPlaylistIE  from .ninegag import NineGagIE  from .noco import NocoIE @@ -489,12 +492,14 @@ from .ooyala import (      OoyalaIE,      OoyalaExternalIE,  ) +from .ora import OraTVIE  from .orf import (      ORFTVthekIE,      ORFOE1IE,      ORFFM4IE,      ORFIPTVIE,  ) +from .pandoratv import PandoraTVIE  from .parliamentliveuk import ParliamentLiveUKIE  from .patreon import PatreonIE  from .pbs import PBSIE @@ -549,8 +554,10 @@ from .rai import (  from .rbmaradio import RBMARadioIE  from .rds import RDSIE  from .redtube import RedTubeIE +from .regiotv import RegioTVIE  from .restudy import RestudyIE  from .reverbnation import ReverbNationIE +from .revision3 import Revision3IE  from .ringtv import RingTVIE  from .ro220 import Ro220IE  from .rottentomatoes import RottenTomatoesIE @@ -680,7 +687,6 @@ from .telemb import TeleMBIE  from .teletask import TeleTaskIE  from .tenplay import TenPlayIE  from .testurl import TestURLIE -from .testtube import TestTubeIE  from .tf1 import TF1IE  from .theintercept import TheInterceptIE  from .theonion import TheOnionIE @@ -692,7 +698,7 @@ from .thesixtyone import TheSixtyOneIE  from .thisamericanlife import ThisAmericanLifeIE  from .thisav import ThisAVIE  from .tinypic import TinyPicIE -from .tlc import TlcIE, TlcDeIE +from .tlc import TlcDeIE  from .tmz import (      TMZIE,      TMZArticleIE, @@ -735,6 +741,7 @@ from .tvc import (      TVCArticleIE,  )  from .tvigle import TvigleIE +from .tvland import TVLandIE  from .tvp import TvpIE, TvpSeriesIE  from .tvplay import TVPlayIE  from .tweakers import TweakersIE diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py index e37ee4440..76b21e596 100644 --- a/youtube_dl/extractor/baidu.py +++ b/youtube_dl/extractor/baidu.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..compat import compat_urlparse +from ..utils import unescapeHTML  class BaiduVideoIE(InfoExtractor): @@ -14,8 +14,8 @@ class BaiduVideoIE(InfoExtractor):          'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',          'info_dict': {              'id': '1069', -            'title': '中华小当家 TV版 (全52集)', -            'description': 'md5:395a419e41215e531c857bb037bbaf80', +            'title': '中华小当家 TV版国语', +            'description': 'md5:51be07afe461cf99fa61231421b5397c',          },          'playlist_count': 52,      }, { @@ -25,45 +25,32 @@ class BaiduVideoIE(InfoExtractor):              'title': 're:^奔跑吧兄弟',              'description': 'md5:1bf88bad6d850930f542d51547c089b8',          }, -        'playlist_mincount': 3, +        'playlist_mincount': 12,      }] +    def _call_api(self, path, category, playlist_id, note): +        return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % ( +            path, category, playlist_id), playlist_id, note) +      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        playlist_id = mobj.group('id') -        category = category2 = mobj.group('type') +        category, playlist_id = re.match(self._VALID_URL, url).groups()          if category == 'show': -            category2 = 'tvshow' - -        webpage = self._download_webpage(url, playlist_id) - -        playlist_title = self._html_search_regex( -            r'title\s*:\s*(["\'])(?P<title>[^\']+)\1', webpage, -            'playlist title', group='title') -        playlist_description = self._html_search_regex( -            r'<input[^>]+class="j-data-intro"[^>]+value="([^"]+)"/>', webpage, -            playlist_id, 'playlist description') +            category = 'tvshow' +        if category == 'tv': +            category = 'tvplay' -        site = self._html_search_regex( -            r'filterSite\s*:\s*["\']([^"]*)["\']', webpage, -            'primary provider site') -        api_result = self._download_json( -            'http://v.baidu.com/%s_intro/?dtype=%sPlayUrl&id=%s&site=%s' % ( -                category, category2, playlist_id, site), -            playlist_id, 'Get playlist links') +        playlist_detail = self._call_api( +            'xqinfo', category, playlist_id, 'Download playlist JSON metadata') -        entries = [] -        for episode in api_result[0]['episodes']: -            episode_id = '%s_%s' % (playlist_id, episode['episode']) +        playlist_title = playlist_detail['title'] +        playlist_description = unescapeHTML(playlist_detail.get('intro')) -            redirect_page = self._download_webpage( -                compat_urlparse.urljoin(url, episode['url']), episode_id, -                note='Download Baidu redirect page') -            real_url = self._html_search_regex( -                r'location\.replace\("([^"]+)"\)', redirect_page, 'real URL') +        episodes_detail = self._call_api( +            'xqsingle', category, playlist_id, 'Download episodes JSON metadata') -            entries.append(self.url_result( -                real_url, video_title=episode['single_title'])) +        entries = [self.url_result( +            episode['url'], video_title=episode['title'] +        ) for episode in episodes_detail['videos']]          return self.playlist_result(              entries, playlist_id, playlist_title, playlist_description) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 923273fb2..7b169881a 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -23,7 +23,17 @@ class BBCCoUkIE(InfoExtractor):      IE_NAME = 'bbc.co.uk'      IE_DESC = 'BBC iPlayer'      _ID_REGEX = r'[pb][\da-z]{7}' -    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:programmes/(?!articles/)|iplayer(?:/[^/]+)?/(?:episode/|playlist/))|music/clips[/#])(?P<id>%s)' % _ID_REGEX +    _VALID_URL = r'''(?x) +                    https?:// +                        (?:www\.)?bbc\.co\.uk/ +                        (?: +                            programmes/(?!articles/)| +                            iplayer(?:/[^/]+)?/(?:episode/|playlist/)| +                            music/clips[/#]| +                            radio/player/ +                        ) +                        (?P<id>%s) +                    ''' % _ID_REGEX      _MEDIASELECTOR_URLS = [          # Provides HQ HLS streams with even better quality that pc mediaset but fails @@ -193,6 +203,9 @@ class BBCCoUkIE(InfoExtractor):          }, {              'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',              'only_matching': True, +        }, { +            'url': 'http://www.bbc.co.uk/radio/player/p03cchwf', +            'only_matching': True,          }      ] @@ -469,7 +482,8 @@ class BBCCoUkIE(InfoExtractor):          if programme_id:              formats, subtitles = self._download_media_selector(programme_id) -            title = self._og_search_title(webpage) +            title = self._og_search_title(webpage, default=None) or self._html_search_regex( +                r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>', webpage, 'title')              description = self._search_regex(                  r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',                  webpage, 'description', default=None) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 004372f8d..25b2d4efe 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -10,13 +10,14 @@ from ..utils import (      unified_strdate,      url_basename,      qualities, +    int_or_none,  )  class CanalplusIE(InfoExtractor):      IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv'      _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))' -    _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s' +    _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json'      _SITE_ID_MAP = {          'canalplus.fr': 'cplus',          'piwiplus.fr': 'teletoon', @@ -26,10 +27,10 @@ class CanalplusIE(InfoExtractor):      _TESTS = [{          'url': 'http://www.canalplus.fr/c-emissions/pid1830-c-zapping.html?vid=1263092', -        'md5': 'b3481d7ca972f61e37420798d0a9d934', +        'md5': '12164a6f14ff6df8bd628e8ba9b10b78',          'info_dict': {              'id': '1263092', -            'ext': 'flv', +            'ext': 'mp4',              'title': 'Le Zapping - 13/05/15',              'description': 'md5:09738c0d06be4b5d06a0940edb0da73f',              'upload_date': '20150513', @@ -56,10 +57,10 @@ class CanalplusIE(InfoExtractor):          'skip': 'videos get deleted after a while',      }, {          'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559', -        'md5': 'f3a46edcdf28006598ffaf5b30e6a2d4', +        'md5': '38b8f7934def74f0d6f3ba6c036a5f82',          'info_dict': {              'id': '1213714', -            'ext': 'flv', +            'ext': 'mp4',              'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45',              'description': 'md5:8216206ec53426ea6321321f3b3c16db',              'upload_date': '20150211', @@ -82,15 +83,16 @@ class CanalplusIE(InfoExtractor):                  webpage, 'video id', group='id')          info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) -        doc = self._download_xml(info_url, video_id, 'Downloading video XML') +        video_data = self._download_json(info_url, video_id, 'Downloading video JSON') -        video_info = [video for video in doc if video.find('ID').text == video_id][0] -        media = video_info.find('MEDIA') -        infos = video_info.find('INFOS') +        if isinstance(video_data, list): +            video_data = [video for video in video_data if video.get('ID') == video_id][0] +        media = video_data['MEDIA'] +        infos = video_data['INFOS'] -        preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS']) +        preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD']) -        fmt_url = next(iter(media.find('VIDEOS'))).text +        fmt_url = next(iter(media.get('VIDEOS')))          if '/geo' in fmt_url.lower():              response = self._request_webpage(                  HEADRequest(fmt_url), video_id, @@ -101,35 +103,42 @@ class CanalplusIE(InfoExtractor):                      expected=True)          formats = [] -        for fmt in media.find('VIDEOS'): -            format_url = fmt.text +        for format_id, format_url in media['VIDEOS'].items():              if not format_url:                  continue -            format_id = fmt.tag              if format_id == 'HLS':                  formats.extend(self._extract_m3u8_formats( -                    format_url, video_id, 'mp4', preference=preference(format_id))) +                    format_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False))              elif format_id == 'HDS':                  formats.extend(self._extract_f4m_formats( -                    format_url + '?hdcore=2.11.3', video_id, preference=preference(format_id))) +                    format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False))              else:                  formats.append({ -                    'url': format_url, +                    # the secret extracted ya function in http://player.canalplus.fr/common/js/canalPlayer.js +                    'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes',                      'format_id': format_id,                      'preference': preference(format_id),                  })          self._sort_formats(formats) +        thumbnails = [{ +            'id': image_id, +            'url': image_url, +        } for image_id, image_url in media.get('images', {}).items()] + +        titrage = infos['TITRAGE'] +          return {              'id': video_id,              'display_id': display_id, -            'title': '%s - %s' % (infos.find('TITRAGE/TITRE').text, -                                  infos.find('TITRAGE/SOUS_TITRE').text), -            'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text), -            'thumbnail': media.find('IMAGES/GRAND').text, -            'description': infos.find('DESCRIPTION').text, -            'view_count': int(infos.find('NB_VUES').text), -            'like_count': int(infos.find('NB_LIKES').text), -            'comment_count': int(infos.find('NB_COMMENTS').text), +            'title': '%s - %s' % (titrage['TITRE'], +                                  titrage['SOUS_TITRE']), +            'upload_date': unified_strdate(infos.get('PUBLICATION', {}).get('DATE')), +            'thumbnails': thumbnails, +            'description': infos.get('DESCRIPTION'), +            'duration': int_or_none(infos.get('DURATION')), +            'view_count': int_or_none(infos.get('NB_VUES')), +            'like_count': int_or_none(infos.get('NB_LIKES')), +            'comment_count': int_or_none(infos.get('NB_COMMENTS')),              'formats': formats,          } diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index f9a64a0a2..d211ec23b 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -5,6 +5,7 @@ import re  import json  from .common import InfoExtractor +from ..utils import remove_start  class CBSNewsIE(InfoExtractor): @@ -62,6 +63,7 @@ class CBSNewsIE(InfoExtractor):              uri = item.get('media' + format_id + 'URI')              if not uri:                  continue +            uri = remove_start(uri, '{manifest:none}')              fmt = {                  'url': uri,                  'format_id': format_id, @@ -70,6 +72,8 @@ class CBSNewsIE(InfoExtractor):                  play_path = re.sub(                      r'{slistFilePath}', '',                      uri.split('<break>')[-1].split('{break}')[-1]) +                play_path = re.sub( +                    r'{manifest:.+}.*$', '', play_path)                  fmt.update({                      'app': 'ondemand?auth=cbs',                      'play_path': 'mp4:' + play_path, diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py index 6924eac70..e94b1e35b 100644 --- a/youtube_dl/extractor/ccc.py +++ b/youtube_dl/extractor/ccc.py @@ -5,6 +5,7 @@ import re  from .common import InfoExtractor  from ..utils import (      int_or_none, +    parse_duration,      qualities,      unified_strdate,  ) @@ -12,21 +13,25 @@ from ..utils import (  class CCCIE(InfoExtractor):      IE_NAME = 'media.ccc.de' -    _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html' +    _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/v/(?P<id>[^/?#&]+)' -    _TEST = { -        'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video', +    _TESTS = [{ +        'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video',          'md5': '3a1eda8f3a29515d27f5adb967d7e740',          'info_dict': { -            'id': '20131228183', +            'id': '30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor',              'ext': 'mp4',              'title': 'Introduction to Processor Design', -            'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b', +            'description': 'md5:80be298773966f66d56cb11260b879af',              'thumbnail': 're:^https?://.*\.jpg$',              'view_count': int, -            'upload_date': '20131229', +            'upload_date': '20131228', +            'duration': 3660,          } -    } +    }, { +        'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', +        'only_matching': True, +    }]      def _real_extract(self, url):          video_id = self._match_id(url) @@ -40,21 +45,25 @@ class CCCIE(InfoExtractor):          title = self._html_search_regex(              r'(?s)<h1>(.*?)</h1>', webpage, 'title')          description = self._html_search_regex( -            r"(?s)<p class='description'>(.*?)</p>", +            r"(?s)<h3>About</h3>(.+?)<h3>",              webpage, 'description', fatal=False)          upload_date = unified_strdate(self._html_search_regex( -            r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>", +            r"(?s)<span[^>]+class='[^']*fa-calendar-o'[^>]*>(.+?)</span>",              webpage, 'upload date', fatal=False))          view_count = int_or_none(self._html_search_regex(              r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>",              webpage, 'view count', fatal=False)) +        duration = parse_duration(self._html_search_regex( +            r'(?s)<span[^>]+class=(["\']).*?fa-clock-o.*?\1[^>]*></span>(?P<duration>.+?)</li', +            webpage, 'duration', fatal=False, group='duration'))          matches = re.finditer(r'''(?xs) -            <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s* +            <(?:span|div)\s+class='label\s+filetype'>(?P<format>[^<]*)</(?:span|div)>\s* +            <(?:span|div)\s+class='label\s+filetype'>(?P<lang>[^<]*)</(?:span|div)>\s*              <a\s+download\s+href='(?P<http_url>[^']+)'>\s*              (?:                  .*? -                <a\s+href='(?P<torrent_url>[^']+\.torrent)' +                <a\s+(?:download\s+)?href='(?P<torrent_url>[^']+\.torrent)'              )?''', webpage)          formats = []          for m in matches: @@ -62,12 +71,15 @@ class CCCIE(InfoExtractor):              format_id = self._search_regex(                  r'.*/([a-z0-9_-]+)/[^/]*$',                  m.group('http_url'), 'format id', default=None) +            if format_id: +                format_id = m.group('lang') + '-' + format_id              vcodec = 'h264' if 'h264' in format_id else (                  'none' if format_id in ('mp3', 'opus') else None              )              formats.append({                  'format_id': format_id,                  'format': format, +                'language': m.group('lang'),                  'url': m.group('http_url'),                  'vcodec': vcodec,                  'preference': preference(format_id), @@ -95,5 +107,6 @@ class CCCIE(InfoExtractor):              'thumbnail': thumbnail,              'view_count': view_count,              'upload_date': upload_date, +            'duration': duration,              'formats': formats,          } diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c63157619..0719c7bcd 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -108,8 +108,9 @@ class InfoExtractor(object):                                   -2 or smaller for less than default.                                   < -1000 to hide the format (if there is                                      another one which is strictly better) -                    * language_preference  Is this in the correct requested -                                 language? +                    * language   Language code, e.g. "de" or "en-US". +                    * language_preference  Is this in the language mentioned in +                                 the URL?                                   10 if it's what the URL is about,                                   -1 for default (don't know),                                   -10 otherwise, other values reserved for now. diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index d6723ecf2..ce680a9f3 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -9,7 +9,17 @@ from ..compat import compat_str  class DiscoveryIE(InfoExtractor): -    _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?' +    _VALID_URL = r'''(?x)http://(?:www\.)?(?: +            discovery| +            investigationdiscovery| +            discoverylife| +            animalplanet| +            ahctv| +            destinationamerica| +            sciencechannel| +            tlc| +            velocity +        )\.com/(?:[^/]+/)*(?P<id>[^./?#]+)'''      _TESTS = [{          'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',          'info_dict': { @@ -21,8 +31,8 @@ class DiscoveryIE(InfoExtractor):                              'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'                              ' back.'),              'duration': 156, -            'timestamp': 1303099200, -            'upload_date': '20110418', +            'timestamp': 1302032462, +            'upload_date': '20110405',          },          'params': {              'skip_download': True,  # requires ffmpeg @@ -33,27 +43,38 @@ class DiscoveryIE(InfoExtractor):              'id': 'mythbusters-the-simpsons',              'title': 'MythBusters: The Simpsons',          }, -        'playlist_count': 9, +        'playlist_mincount': 10, +    }, { +        'url': 'http://www.animalplanet.com/longfin-eels-maneaters/', +        'info_dict': { +            'id': '78326', +            'ext': 'mp4', +            'title': 'Longfin Eels: Maneaters?', +            'description': 'Jeremy Wade tests whether or not New Zealand\'s longfin eels are man-eaters by covering himself in fish guts and getting in the water with them.', +            'upload_date': '20140725', +            'timestamp': 1406246400, +            'duration': 116, +        },      }]      def _real_extract(self, url): -        video_id = self._match_id(url) -        info = self._download_json(url + '?flat=1', video_id) +        display_id = self._match_id(url) +        info = self._download_json(url + '?flat=1', display_id)          video_title = info.get('playlist_title') or info.get('video_title')          entries = [{              'id': compat_str(video_info['id']),              'formats': self._extract_m3u8_formats( -                video_info['src'], video_id, ext='mp4', +                video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',                  note='Download m3u8 information for video %d' % (idx + 1)),              'title': video_info['title'],              'description': video_info.get('description'),              'duration': parse_duration(video_info.get('video_length')), -            'webpage_url': video_info.get('href'), +            'webpage_url': video_info.get('href') or video_info.get('url'),              'thumbnail': video_info.get('thumbnailURL'),              'alt_title': video_info.get('secondary_title'),              'timestamp': parse_iso8601(video_info.get('publishedDate')),          } for idx, video_info in enumerate(info['playlist'])] -        return self.playlist_result(entries, video_id, video_title) +        return self.playlist_result(entries, display_id, video_title) diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 8ac8587be..028144f20 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -2,14 +2,10 @@ from __future__ import unicode_literals  import re -from .common import InfoExtractor -from ..utils import ( -    ExtractorError, -    unified_strdate, -) +from .zdf import ZDFIE -class DreiSatIE(InfoExtractor): +class DreiSatIE(ZDFIE):      IE_NAME = '3sat'      _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'      _TESTS = [ @@ -35,53 +31,4 @@ class DreiSatIE(InfoExtractor):          mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id')          details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id -        details_doc = self._download_xml(details_url, video_id, 'Downloading video details') - -        status_code = details_doc.find('./status/statuscode') -        if status_code is not None and status_code.text != 'ok': -            code = status_code.text -            if code == 'notVisibleAnymore': -                message = 'Video %s is not available' % video_id -            else: -                message = '%s returned error: %s' % (self.IE_NAME, code) -            raise ExtractorError(message, expected=True) - -        thumbnail_els = details_doc.findall('.//teaserimage') -        thumbnails = [{ -            'width': int(te.attrib['key'].partition('x')[0]), -            'height': int(te.attrib['key'].partition('x')[2]), -            'url': te.text, -        } for te in thumbnail_els] - -        information_el = details_doc.find('.//information') -        video_title = information_el.find('./title').text -        video_description = information_el.find('./detail').text - -        details_el = details_doc.find('.//details') -        video_uploader = details_el.find('./channel').text -        upload_date = unified_strdate(details_el.find('./airtime').text) - -        format_els = details_doc.findall('.//formitaet') -        formats = [{ -            'format_id': fe.attrib['basetype'], -            'width': int(fe.find('./width').text), -            'height': int(fe.find('./height').text), -            'url': fe.find('./url').text, -            'filesize': int(fe.find('./filesize').text), -            'video_bitrate': int(fe.find('./videoBitrate').text), -        } for fe in format_els -            if not fe.find('./url').text.startswith('http://www.metafilegenerator.de/')] - -        self._sort_formats(formats) - -        return { -            '_type': 'video', -            'id': video_id, -            'title': video_title, -            'formats': formats, -            'description': video_description, -            'thumbnails': thumbnails, -            'thumbnail': thumbnails[-1]['url'], -            'uploader': video_uploader, -            'upload_date': upload_date, -        } +        return self.extract_from_xml_url(video_id, details_url) diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py index 5dfea0d39..f7339702c 100644 --- a/youtube_dl/extractor/einthusan.py +++ b/youtube_dl/extractor/einthusan.py @@ -1,9 +1,12 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( +    remove_start, +    sanitized_Request, +)  class EinthusanIE(InfoExtractor): @@ -34,27 +37,33 @@ class EinthusanIE(InfoExtractor):      ]      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') -        webpage = self._download_webpage(url, video_id) +        video_id = self._match_id(url) + +        request = sanitized_Request(url) +        request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0') +        webpage = self._download_webpage(request, video_id) + +        title = self._html_search_regex( +            r'<h1><a[^>]+class=["\']movie-title["\'][^>]*>(.+?)</a></h1>', +            webpage, 'title') -        video_title = self._html_search_regex( -            r'<h1><a class="movie-title".*?>(.*?)</a></h1>', webpage, 'title') +        video_id = self._search_regex( +            r'data-movieid=["\'](\d+)', webpage, 'video id', default=video_id) -        video_url = self._html_search_regex( -            r'''(?s)jwplayer\("mediaplayer"\)\.setup\({.*?'file': '([^']+)'.*?}\);''', -            webpage, 'video url') +        video_url = self._download_webpage( +            'http://cdn.einthusan.com/geturl/%s/hd/London,Washington,Toronto,Dallas,San,Sydney/' +            % video_id, video_id)          description = self._html_search_meta('description', webpage)          thumbnail = self._html_search_regex(              r'''<a class="movie-cover-wrapper".*?><img src=["'](.*?)["'].*?/></a>''',              webpage, "thumbnail url", fatal=False)          if thumbnail is not None: -            thumbnail = thumbnail.replace('..', 'http://www.einthusan.com') +            thumbnail = compat_urlparse.urljoin(url, remove_start(thumbnail, '..'))          return {              'id': video_id, -            'title': video_title, +            'title': title,              'url': video_url,              'thumbnail': thumbnail,              'description': description, diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index e6f8f0337..3762d8748 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -1,18 +1,30 @@  from __future__ import unicode_literals  from .common import InfoExtractor +from ..utils import remove_end  class ESPNIE(InfoExtractor):      _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)' -    _WORKING = False      _TESTS = [{          'url': 'http://espn.go.com/video/clip?id=10365079',          'info_dict': {              'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',              'ext': 'mp4', -            'title': 'dm_140128_30for30Shorts___JudgingJewellv2', -            'description': '', +            'title': '30 for 30 Shorts: Judging Jewell', +            'description': None, +        }, +        'params': { +            # m3u8 download +            'skip_download': True, +        }, +    }, { +        # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season +        'url': 'http://espn.go.com/video/clip?id=2743663', +        'info_dict': { +            'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', +            'ext': 'mp4', +            'title': 'Must-See Moments: Best of the MLS season',          },          'params': {              # m3u8 download @@ -44,12 +56,23 @@ class ESPNIE(InfoExtractor):              r'class="video-play-button"[^>]+data-id="(\d+)',              webpage, 'video id') +        cms = 'espn' +        if 'data-source="intl"' in webpage: +            cms = 'intl' +        player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms)          player = self._download_webpage( -            'https://espn.go.com/video/iframe/twitter/?id=%s' % video_id, video_id) +            player_url, video_id)          pcode = self._search_regex(              r'["\']pcode=([^"\']+)["\']', player, 'pcode') -        return self.url_result( -            'ooyalaexternal:espn:%s:%s' % (video_id, pcode), -            'OoyalaExternal') +        title = remove_end( +            self._og_search_title(webpage), +            '- ESPN Video').strip() + +        return { +            '_type': 'url_transparent', +            'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode), +            'ie_key': 'OoyalaExternal', +            'title': title, +        } diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py new file mode 100644 index 000000000..ab97b3196 --- /dev/null +++ b/youtube_dl/extractor/fox.py @@ -0,0 +1,39 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import smuggle_url + + +class FOXIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)' +    _TEST = { +        'url': 'http://www.fox.com/watch/255180355939/7684182528', +        'info_dict': { +            'id': '255180355939', +            'ext': 'mp4', +            'title': 'Official Trailer: Gotham', +            'description': 'Tracing the rise of the great DC Comics Super-Villains and vigilantes, Gotham reveals an entirely new chapter that has never been told.', +            'duration': 129, +        }, +        'add_ie': ['ThePlatform'], +        'params': { +            # m3u8 download +            'skip_download': True, +        }, +    } + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) + +        release_url = self._parse_json(self._search_regex( +            r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'), +            video_id)['release_url'] + '&manifest=m3u' + +        return { +            '_type': 'url_transparent', +            'ie_key': 'ThePlatform', +            'url': smuggle_url(release_url, {'force_smil_url': True}), +            'id': video_id, +        } diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index d887583e6..e8bb527b8 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -167,14 +167,16 @@ class MTVServicesInfoExtractor(InfoExtractor):              'description': description,          } +    def _get_feed_query(self, uri): +        data = {'uri': uri} +        if self._LANG: +            data['lang'] = self._LANG +        return compat_urllib_parse.urlencode(data) +      def _get_videos_info(self, uri):          video_id = self._id_from_uri(uri)          feed_url = self._get_feed_url(uri) -        data = compat_urllib_parse.urlencode({'uri': uri}) -        info_url = feed_url + '?' -        if self._LANG: -            info_url += 'lang=%s&' % self._LANG -        info_url += data +        info_url = feed_url + '?' + self._get_feed_query(uri)          return self._get_videos_info_from_url(info_url, video_id)      def _get_videos_info_from_url(self, url, video_id): @@ -184,9 +186,7 @@ class MTVServicesInfoExtractor(InfoExtractor):          return self.playlist_result(              [self._get_video_info(item) for item in idoc.findall('.//item')]) -    def _real_extract(self, url): -        title = url_basename(url) -        webpage = self._download_webpage(url, title) +    def _extract_mgid(self, webpage):          try:              # the url can be http://media.mtvnservices.com/fb/{mgid}.swf              # or http://media.mtvnservices.com/{mgid} @@ -207,7 +207,12 @@ class MTVServicesInfoExtractor(InfoExtractor):                  'sm4:video:embed', webpage, 'sm4 embed', default='')              mgid = self._search_regex(                  r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid') +        return mgid +    def _real_extract(self, url): +        title = url_basename(url) +        webpage = self._download_webpage(url, title) +        mgid = self._extract_mgid(webpage)          videos_info = self._get_videos_info(mgid)          return videos_info diff --git a/youtube_dl/extractor/nextmovie.py b/youtube_dl/extractor/nextmovie.py new file mode 100644 index 000000000..657ae77a0 --- /dev/null +++ b/youtube_dl/extractor/nextmovie.py @@ -0,0 +1,30 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .mtv import MTVServicesInfoExtractor +from ..compat import compat_urllib_parse + + +class NextMovieIE(MTVServicesInfoExtractor): +    IE_NAME = 'nextmovie.com' +    _VALID_URL = r'https?://(?:www\.)?nextmovie\.com/shows/[^/]+/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)' +    _FEED_URL = 'http://lite.dextr.mtvi.com/service1/dispatch.htm' +    _TESTS = [{ +        'url': 'http://www.nextmovie.com/shows/exclusives/2013-03-10/mgid:uma:videolist:nextmovie.com:1715019/', +        'md5': '09a9199f2f11f10107d04fcb153218aa', +        'info_dict': { +            'id': '961726', +            'ext': 'mp4', +            'title': 'The Muppets\' Gravity', +        }, +    }] + +    def _get_feed_query(self, uri): +        return compat_urllib_parse.urlencode({ +            'feed': '1505', +            'mgid': uri, +        }) + +    def _real_extract(self, url): +        mgid = self._match_id(url) +        return self._get_videos_info(mgid) diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py new file mode 100644 index 000000000..b62819ae5 --- /dev/null +++ b/youtube_dl/extractor/nick.py @@ -0,0 +1,63 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .mtv import MTVServicesInfoExtractor +from ..compat import compat_urllib_parse + + +class NickIE(MTVServicesInfoExtractor): +    IE_NAME = 'nick.com' +    _VALID_URL = r'https?://(?:www\.)?nick\.com/videos/clip/(?P<id>[^/?#.]+)' +    _FEED_URL = 'http://udat.mtvnservices.com/service1/dispatch.htm' +    _TESTS = [{ +        'url': 'http://www.nick.com/videos/clip/alvinnn-and-the-chipmunks-112-full-episode.html', +        'playlist': [ +            { +                'md5': '6e5adc1e28253bbb1b28ab05403dd4d4', +                'info_dict': { +                    'id': 'be6a17b0-412d-11e5-8ff7-0026b9414f30', +                    'ext': 'mp4', +                    'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S1', +                    'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.', + +                } +            }, +            { +                'md5': 'd7be441fc53a1d4882fa9508a1e5b3ce', +                'info_dict': { +                    'id': 'be6b8f96-412d-11e5-8ff7-0026b9414f30', +                    'ext': 'mp4', +                    'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S2', +                    'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.', + +                } +            }, +            { +                'md5': 'efffe1728a234b2b0d2f2b343dd1946f', +                'info_dict': { +                    'id': 'be6cf7e6-412d-11e5-8ff7-0026b9414f30', +                    'ext': 'mp4', +                    'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S3', +                    'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.', +                } +            }, +            { +                'md5': '1ec6690733ab9f41709e274a1d5c7556', +                'info_dict': { +                    'id': 'be6e3354-412d-11e5-8ff7-0026b9414f30', +                    'ext': 'mp4', +                    'title': 'ALVINNN!!! and The Chipmunks: "Mojo Missing/Who\'s The Animal" S4', +                    'description': 'Alvin is convinced his mojo was in a cap he gave to a fan, and must find a way to get his hat back before the Chipmunks’ big concert.\nDuring a costume visit to the zoo, Alvin finds himself mistaken for the real Tasmanian devil.', +                } +            }, +        ], +    }] + +    def _get_feed_query(self, uri): +        return compat_urllib_parse.urlencode({ +            'feed': 'nick_arc_player_prime', +            'mgid': uri, +        }) + +    def _extract_mgid(self, webpage): +        return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid') diff --git a/youtube_dl/extractor/ora.py b/youtube_dl/extractor/ora.py new file mode 100644 index 000000000..9c4255a2d --- /dev/null +++ b/youtube_dl/extractor/ora.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( +    get_element_by_attribute, +    qualities, +    unescapeHTML, +) + + +class OraTVIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?ora\.tv/([^/]+/)*(?P<id>[^/\?#]+)' +    _TEST = { +        'url': 'https://www.ora.tv/larrykingnow/2015/12/16/vine-youtube-stars-zach-king-king-bach-on-their-viral-videos-0_36jupg6090pq', +        'md5': 'fa33717591c631ec93b04b0e330df786', +        'info_dict': { +            'id': '50178', +            'ext': 'mp4', +            'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!', +            'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1', +            'duration': 1477, +        } +    } + +    def _real_extract(self, url): +        display_id = self._match_id(url) +        webpage = self._download_webpage(url, display_id) + +        video_data = self._search_regex( +            r'"current"\s*:\s*({[^}]+?})', webpage, 'current video') +        m3u8_url = self._search_regex( +            r'"hls_stream"\s*:\s*"([^"]+)', video_data, 'm3u8 url', None) +        if m3u8_url: +            formats = self._extract_m3u8_formats( +                m3u8_url, display_id, 'mp4', 'm3u8_native', +                m3u8_id='hls', fatal=False) +            # simular to GameSpotIE +            m3u8_path = compat_urlparse.urlparse(m3u8_url).path +            QUALITIES_RE = r'((,[a-z]+\d+)+,?)' +            available_qualities = self._search_regex( +                QUALITIES_RE, m3u8_path, 'qualities').strip(',').split(',') +            http_path = m3u8_path[1:].split('/', 1)[1] +            http_template = re.sub(QUALITIES_RE, r'%s', http_path) +            http_template = http_template.replace('.csmil/master.m3u8', '') +            http_template = compat_urlparse.urljoin( +                'http://videocdn-pmd.ora.tv/', http_template) +            preference = qualities( +                ['mobile400', 'basic400', 'basic600', 'sd900', 'sd1200', 'sd1500', 'hd720', 'hd1080']) +            for q in available_qualities: +                formats.append({ +                    'url': http_template % q, +                    'format_id': q, +                    'preference': preference(q), +                }) +            self._sort_formats(formats) +        else: +            return self.url_result(self._search_regex( +                r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube') + +        return { +            'id': self._search_regex( +                r'"video_id"\s*:\s*(\d+)', video_data, 'video id'), +            'display_id': display_id, +            'title': unescapeHTML(self._og_search_title(webpage)), +            'description': get_element_by_attribute( +                'class', 'video_txt_decription', webpage), +            'thumbnail': self._proto_relative_url(self._search_regex( +                r'"thumb"\s*:\s*"([^"]+)', video_data, 'thumbnail', None)), +            'duration': int(self._search_regex( +                r'"duration"\s*:\s*(\d+)', video_data, 'duration')), +            'formats': formats, +        } diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dl/extractor/pandoratv.py new file mode 100644 index 000000000..8d49f5c4a --- /dev/null +++ b/youtube_dl/extractor/pandoratv.py @@ -0,0 +1,78 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( +    compat_str, +    compat_urlparse, +) +from ..utils import ( +    ExtractorError, +    float_or_none, +    parse_duration, +    str_to_int, +) + + +class PandoraTVIE(InfoExtractor): +    IE_NAME = 'pandora.tv' +    IE_DESC = '판도라TV' +    _VALID_URL = r'https?://(?:.+?\.)?channel\.pandora\.tv/channel/video\.ptv\?' +    _TEST = { +        'url': 'http://jp.channel.pandora.tv/channel/video.ptv?c1=&prgid=53294230&ch_userid=mikakim&ref=main&lot=cate_01_2', +        'info_dict': { +            'id': '53294230', +            'ext': 'flv', +            'title': '頭を撫でてくれる?', +            'description': '頭を撫でてくれる?', +            'thumbnail': 're:^https?://.*\.jpg$', +            'duration': 39, +            'upload_date': '20151218', +            'uploader': 'カワイイ動物まとめ', +            'uploader_id': 'mikakim', +            'view_count': int, +            'like_count': int, +        } +    } + +    def _real_extract(self, url): +        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) +        video_id = qs.get('prgid', [None])[0] +        user_id = qs.get('ch_userid', [None])[0] +        if any(not f for f in (video_id, user_id,)): +            raise ExtractorError('Invalid URL', expected=True) + +        data = self._download_json( +            'http://m.pandora.tv/?c=view&m=viewJsonApi&ch_userid=%s&prgid=%s' +            % (user_id, video_id), video_id) + +        info = data['data']['rows']['vod_play_info']['result'] + +        formats = [] +        for format_id, format_url in info.items(): +            if not format_url: +                continue +            height = self._search_regex( +                r'^v(\d+)[Uu]rl$', format_id, 'height', default=None) +            if not height: +                continue +            formats.append({ +                'format_id': '%sp' % height, +                'url': format_url, +                'height': int(height), +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': info['subject'], +            'description': info.get('body'), +            'thumbnail': info.get('thumbnail') or info.get('poster'), +            'duration': float_or_none(info.get('runtime'), 1000) or parse_duration(info.get('time')), +            'upload_date': info['fid'][:8] if isinstance(info.get('fid'), compat_str) else None, +            'uploader': info.get('nickname'), +            'uploader_id': info.get('upload_userid'), +            'view_count': str_to_int(info.get('hit')), +            'like_count': str_to_int(info.get('likecnt')), +            'formats': formats, +        } diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 1ba3bbddf..45a3c41c5 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -11,6 +11,7 @@ from ..utils import (      strip_jsonp,      unescapeHTML,      clean_html, +    ExtractorError,  ) @@ -177,7 +178,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE):          'info_dict': {              'id': '001BLpXF2DyJe2',              'title': '林俊杰', -            'description': 'md5:2a222d89ba4455a3af19940c0481bb78', +            'description': 'md5:870ec08f7d8547c29c93010899103751',          },          'playlist_count': 12,      } @@ -272,7 +273,7 @@ class QQMusicToplistIE(QQPlaylistBaseIE):          'url': 'http://y.qq.com/#type=toplist&p=top_3',          'info_dict': {              'id': 'top_3', -            'title': 'QQ音乐巅峰榜·欧美', +            'title': '巅峰榜·欧美',              'description': 'QQ音乐巅峰榜·欧美根据用户收听行为自动生成,集结当下最流行的欧美新歌!:更新时间:每周四22点|统'                             '计周期:一周(上周四至本周三)|统计对象:三个月内发行的欧美歌曲|统计数量:100首|统计算法:根据'                             '歌曲在一周内的有效播放次数,由高到低取前100名(同一歌手最多允许5首歌曲同时上榜)|有效播放次数:' @@ -315,7 +316,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):      IE_DESC = 'QQ音乐 - 歌单'      _VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)' -    _TEST = { +    _TESTS = [{          'url': 'http://y.qq.com/#type=taoge&id=3462654915',          'info_dict': {              'id': '3462654915', @@ -323,7 +324,16 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):              'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',          },          'playlist_count': 40, -    } +        'skip': 'playlist gone', +    }, { +        'url': 'http://y.qq.com/#type=taoge&id=1374105607', +        'info_dict': { +            'id': '1374105607', +            'title': '易入人心的华语民谣', +            'description': '民谣的歌曲易于传唱、、歌词朗朗伤口、旋律简单温馨。属于那种才入耳孔。却上心头的感觉。没有太多的复杂情绪。简单而直接地表达乐者的情绪,就是这样的简单才易入人心。', +        }, +        'playlist_count': 20, +    }]      def _real_extract(self, url):          list_id = self._match_id(url) @@ -331,14 +341,21 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):          list_json = self._download_json(              'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s'              % list_id, list_id, 'Download list page', -            transform_source=strip_jsonp)['cdlist'][0] - +            transform_source=strip_jsonp) +        if not len(list_json.get('cdlist', [])): +            if list_json.get('code'): +                raise ExtractorError( +                    'QQ Music said: error %d in fetching playlist info' % list_json['code'], +                    expected=True) +            raise ExtractorError('Unable to get playlist info') + +        cdlist = list_json['cdlist'][0]          entries = [              self.url_result(                  'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] -            ) for song in list_json['songlist'] +            ) for song in cdlist['songlist']          ] -        list_name = list_json.get('dissname') -        list_description = clean_html(unescapeHTML(list_json.get('desc'))) +        list_name = cdlist.get('dissname') +        list_description = clean_html(unescapeHTML(cdlist.get('desc')))          return self.playlist_result(entries, list_id, list_name, list_description) diff --git a/youtube_dl/extractor/regiotv.py b/youtube_dl/extractor/regiotv.py new file mode 100644 index 000000000..e250a52f0 --- /dev/null +++ b/youtube_dl/extractor/regiotv.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( +    sanitized_Request, +    xpath_text, +    xpath_with_ns, +) + + +class RegioTVIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?regio-tv\.de/video/(?P<id>[0-9]+)' +    _TESTS = [{ +        'url': 'http://www.regio-tv.de/video/395808.html', +        'info_dict': { +            'id': '395808', +            'ext': 'mp4', +            'title': 'Wir in Ludwigsburg', +            'description': 'Mit unseren zuckersüßen Adventskindern, außerdem besuchen wir die Abendsterne!', +        } +    }, { +        'url': 'http://www.regio-tv.de/video/395808', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage(url, video_id) + +        key = self._search_regex( +            r'key\s*:\s*(["\'])(?P<key>.+?)\1', webpage, 'key', group='key') +        title = self._og_search_title(webpage) + +        SOAP_TEMPLATE = '<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><{0} xmlns="http://v.telvi.de/"><key xsi:type="xsd:string">{1}</key></{0}></soap:Body></soap:Envelope>' + +        request = sanitized_Request( +            'http://v.telvi.de/', +            SOAP_TEMPLATE.format('GetHTML5VideoData', key).encode('utf-8')) +        video_data = self._download_xml(request, video_id, 'Downloading video XML') + +        NS_MAP = { +            'xsi': 'http://www.w3.org/2001/XMLSchema-instance', +            'soap': 'http://schemas.xmlsoap.org/soap/envelope/', +        } + +        video_url = xpath_text( +            video_data, xpath_with_ns('.//video', NS_MAP), 'video url', fatal=True) +        thumbnail = xpath_text( +            video_data, xpath_with_ns('.//image', NS_MAP), 'thumbnail') +        description = self._og_search_description( +            webpage) or self._html_search_meta('description', webpage) + +        return { +            'id': video_id, +            'url': video_url, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +        } diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py new file mode 100644 index 000000000..b1b8800b9 --- /dev/null +++ b/youtube_dl/extractor/revision3.py @@ -0,0 +1,127 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( +    int_or_none, +    parse_iso8601, +    unescapeHTML, +    qualities, +) + + +class Revision3IE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|testtube|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)' +    _TESTS = [{ +        'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016', +        'md5': 'd94a72d85d0a829766de4deb8daaf7df', +        'info_dict': { +            'id': '73034', +            'display_id': 'technobuffalo/5-google-predictions-for-2016', +            'ext': 'webm', +            'title': '5 Google Predictions for 2016', +            'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.', +            'upload_date': '20151228', +            'timestamp': 1451325600, +            'duration': 187, +            'uploader': 'TechnoBuffalo', +            'uploader_id': 'technobuffalo', +        } +    }, { +        'url': 'http://testtube.com/brainstuff', +        'info_dict': { +            'id': '251', +            'title': 'BrainStuff', +            'description': 'Whether the topic is popcorn or particle physics, you can count on the HowStuffWorks team to explore-and explain-the everyday science in the world around us on BrainStuff.', +        }, +        'playlist_mincount': 93, +    }, { +        'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial', +        'info_dict': { +            'id': '60163', +            'display_id': 'dnews/5-weird-ways-plants-can-eat-animals', +            'duration': 275, +            'ext': 'webm', +            'title': '5 Weird Ways Plants Can Eat Animals', +            'description': 'Why have some plants evolved to eat meat?', +            'upload_date': '20150120', +            'timestamp': 1421763300, +            'uploader': 'DNews', +            'uploader_id': 'dnews', +        }, +    }] +    _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s' +    _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62' + +    def _real_extract(self, url): +        domain, display_id = re.match(self._VALID_URL, url).groups() +        page_info = self._download_json( +            self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id) + +        if page_info['data']['type'] == 'episode': +            episode_data = page_info['data'] +            video_id = compat_str(episode_data['video']['data']['id']) +            video_data = self._download_json( +                'http://revision3.com/api/getPlaylist.json?api_key=%s&codecs=h264,vp8,theora&video_id=%s' % (self._API_KEY, video_id), +                video_id)['items'][0] + +            formats = [] +            for vcodec, media in video_data['media'].items(): +                for quality_id, quality in media.items(): +                    if quality_id == 'hls': +                        formats.extend(self._extract_m3u8_formats( +                            quality['url'], video_id, 'mp4', +                            'm3u8_native', m3u8_id='hls', fatal=False)) +                    else: +                        formats.append({ +                            'url': quality['url'], +                            'format_id': '%s-%s' % (vcodec, quality_id), +                            'tbr': int_or_none(quality.get('bitrate')), +                            'vcodec': vcodec, +                        }) +            self._sort_formats(formats) + +            preference = qualities(['mini', 'small', 'medium', 'large']) +            thumbnails = [{ +                'url': image_url, +                'id': image_id, +                'preference': preference(image_id) +            } for image_id, image_url in video_data.get('images', {}).items()] + +            return { +                'id': video_id, +                'display_id': display_id, +                'title': unescapeHTML(video_data['title']), +                'description': unescapeHTML(video_data.get('summary')), +                'timestamp': parse_iso8601(episode_data.get('publishTime'), ' '), +                'author': episode_data.get('author'), +                'uploader': video_data.get('show', {}).get('name'), +                'uploader_id': video_data.get('show', {}).get('slug'), +                'duration': int_or_none(video_data.get('duration')), +                'thumbnails': thumbnails, +                'formats': formats, +            } +        else: +            show_data = page_info['show']['data'] +            episodes_data = page_info['episodes']['data'] +            num_episodes = page_info['meta']['totalEpisodes'] +            processed_episodes = 0 +            entries = [] +            page_num = 1 +            while True: +                entries.extend([self.url_result( +                    'http://%s/%s/%s' % (domain, display_id, episode['slug'])) for episode in episodes_data]) +                processed_episodes += len(episodes_data) +                if processed_episodes == num_episodes: +                    break +                page_num += 1 +                episodes_data = self._download_json(self._PAGE_DATA_TEMPLATE % ( +                    domain, display_id + '/' + compat_str(page_num), domain), +                    display_id)['episodes']['data'] + +            return self.playlist_result( +                entries, compat_str(show_data['id']), +                show_data.get('name'), show_data.get('summary')) diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index 41fddc375..ffea438cc 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -75,9 +75,12 @@ class RuutuIE(InfoExtractor):                          preference = -1 if proto == 'rtmp' else 1                          label = child.get('label')                          tbr = int_or_none(child.get('bitrate')) +                        format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto +                        if not self._is_valid_url(video_url, video_id, format_id): +                            continue                          width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]]                          formats.append({ -                            'format_id': '%s-%s' % (proto, label if label else tbr), +                            'format_id': format_id,                              'url': video_url,                              'width': width,                              'height': height, diff --git a/youtube_dl/extractor/testtube.py b/youtube_dl/extractor/testtube.py deleted file mode 100644 index 26655d690..000000000 --- a/youtube_dl/extractor/testtube.py +++ /dev/null @@ -1,90 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( -    int_or_none, -    qualities, -) - - -class TestTubeIE(InfoExtractor): -    _VALID_URL = r'https?://testtube\.com/[^/?#]+/(?P<id>[^/?#]+)' -    _TESTS = [{ -        'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial', -        'info_dict': { -            'id': '60163', -            'display_id': '5-weird-ways-plants-can-eat-animals', -            'duration': 275, -            'ext': 'webm', -            'title': '5 Weird Ways Plants Can Eat Animals', -            'description': 'Why have some plants evolved to eat meat?', -            'thumbnail': 're:^https?://.*\.jpg$', -            'uploader': 'DNews', -            'uploader_id': 'dnews', -        }, -    }, { -        'url': 'https://testtube.com/iflscience/insane-jet-ski-flipping', -        'info_dict': { -            'id': 'fAGfJ4YjVus', -            'ext': 'mp4', -            'title': 'Flipping Jet-Ski Skills | Outrageous Acts of Science', -            'uploader': 'Science Channel', -            'uploader_id': 'ScienceChannel', -            'upload_date': '20150203', -            'description': 'md5:e61374030015bae1d2e22f096d4769d6', -        } -    }] - -    def _real_extract(self, url): -        display_id = self._match_id(url) - -        webpage = self._download_webpage(url, display_id) - -        youtube_url = self._html_search_regex( -            r'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"', -            webpage, 'youtube iframe', default=None) -        if youtube_url: -            return self.url_result(youtube_url, 'Youtube', video_id=display_id) - -        video_id = self._search_regex( -            r"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);", -            webpage, 'video ID') - -        all_info = self._download_json( -            'https://testtube.com/api/getPlaylist.json?api_key=ba9c741bce1b9d8e3defcc22193f3651b8867e62&codecs=h264,vp8,theora&video_id=%s' % video_id, -            video_id) -        info = all_info['items'][0] - -        formats = [] -        for vcodec, fdatas in info['media'].items(): -            for name, fdata in fdatas.items(): -                formats.append({ -                    'format_id': '%s-%s' % (vcodec, name), -                    'url': fdata['url'], -                    'vcodec': vcodec, -                    'tbr': fdata.get('bitrate'), -                }) -        self._sort_formats(formats) - -        duration = int_or_none(info.get('duration')) -        images = info.get('images') -        thumbnails = None -        preference = qualities(['mini', 'small', 'medium', 'large']) -        if images: -            thumbnails = [{ -                'id': thumbnail_id, -                'url': img_url, -                'preference': preference(thumbnail_id) -            } for thumbnail_id, img_url in images.items()] - -        return { -            'id': video_id, -            'display_id': display_id, -            'title': info['title'], -            'description': info.get('summary'), -            'thumbnails': thumbnails, -            'uploader': info.get('show', {}).get('name'), -            'uploader_id': info.get('show', {}).get('slug'), -            'duration': duration, -            'formats': formats, -        } diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index d6d038a8d..adc05ed5f 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -4,32 +4,9 @@ import re  from .common import InfoExtractor  from .brightcove import BrightcoveLegacyIE -from .discovery import DiscoveryIE  from ..compat import compat_urlparse -class TlcIE(DiscoveryIE): -    IE_NAME = 'tlc.com' -    _VALID_URL = r'http://www\.tlc\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?' - -    # DiscoveryIE has _TESTS -    _TESTS = [{ -        'url': 'http://www.tlc.com/tv-shows/cake-boss/videos/too-big-to-fly.htm', -        'info_dict': { -            'id': '104493', -            'ext': 'mp4', -            'title': 'Too Big to Fly', -            'description': 'Buddy has taken on a high flying task.', -            'duration': 119, -            'timestamp': 1393365060, -            'upload_date': '20140225', -        }, -        'params': { -            'skip_download': True,  # requires ffmpef -        }, -    }] - -  class TlcDeIE(InfoExtractor):      IE_NAME = 'tlc.de'      _VALID_URL = r'http://www\.tlc\.de/sendungen/[^/]+/videos/(?P<title>[^/?]+)' diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py new file mode 100644 index 000000000..b73279dec --- /dev/null +++ b/youtube_dl/extractor/tvland.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .mtv import MTVServicesInfoExtractor + + +class TVLandIE(MTVServicesInfoExtractor): +    IE_NAME = 'tvland.com' +    _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)' +    _FEED_URL = 'http://www.tvland.com/feeds/mrss/' +    _TESTS = [{ +        'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048', +        'playlist': [ +            { +                'md5': '227e9723b9669c05bf51098b10287aa7', +                'info_dict': { +                    'id': 'bcbd3a83-3aca-4dca-809b-f78a87dcccdd', +                    'ext': 'mp4', +                    'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 1 of 5', +                } +            }, +            { +                'md5': '9fa2b764ec0e8194fb3ebb01a83df88b', +                'info_dict': { +                    'id': 'f4279548-6e13-40dd-92e8-860d27289197', +                    'ext': 'mp4', +                    'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 2 of 5', +                } +            }, +            { +                'md5': 'fde4c3bccd7cc7e3576b338734153cec', +                'info_dict': { +                    'id': '664e4a38-53ef-4115-9bc9-d0f789ec6334', +                    'ext': 'mp4', +                    'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 3 of 5', +                } +            }, +            { +                'md5': '247f6780cda6891f2e49b8ae2b10e017', +                'info_dict': { +                    'id': '9146ecf5-b15a-4d78-879c-6679b77f4960', +                    'ext': 'mp4', +                    'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 4 of 5', +                } +            }, +            { +                'md5': 'fd269f33256e47bad5eb6c40de089ff6', +                'info_dict': { +                    'id': '04334a2e-9a47-4214-a8c2-ae5792e2fab7', +                    'ext': 'mp4', +                    'title': 'Everybody Loves Raymond|Everybody Loves Raymond 048 HD, Part 5 of 5', +                } +            } +        ], +    }, { +        'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies', +        'md5': 'e2c6389401cf485df26c79c247b08713', +        'info_dict': { +            'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88', +            'ext': 'mp4', +            'title': 'Younger|Younger: Hilary Duff - Little Lies', +            'description': 'md5:7d192f56ca8d958645c83f0de8ef0269' +        }, +    }] diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index fca5ddc69..4a492f784 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -155,7 +155,16 @@ class YahooIE(InfoExtractor):                  'description': 'md5:8fc39608213295748e1e289807838c97',                  'duration': 1646,              }, -        } +        }, { +            # it uses an alias to get the video_id +            'url': 'https://www.yahoo.com/movies/the-stars-of-daddys-home-have-very-different-212843197.html', +            'info_dict': { +                'id': '40eda9c8-8e5f-3552-8745-830f67d0c737', +                'ext': 'mp4', +                'title': 'Will Ferrell & Mark Wahlberg Are Pro-Spanking', +                'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.', +            }, +        },      ]      def _real_extract(self, url): @@ -199,13 +208,22 @@ class YahooIE(InfoExtractor):              r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,              default=None)          if items_json is None: -            CONTENT_ID_REGEXES = [ -                r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', -                r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', -                r'"first_videoid"\s*:\s*"([^"]+)"', -                r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), -            ] -            video_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID') +            alias = self._search_regex( +                r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None) +            if alias is not None: +                alias_info = self._download_json( +                    'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias, +                    display_id, 'Downloading alias info') +                video_id = alias_info[0]['id'] +            else: +                CONTENT_ID_REGEXES = [ +                    r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', +                    r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', +                    r'"first_videoid"\s*:\s*"([^"]+)"', +                    r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id), +                ] +                video_id = self._search_regex( +                    CONTENT_ID_REGEXES, webpage, 'content ID')          else:              items = json.loads(items_json)              info = items['mediaItems']['query']['results']['mediaObj'][0] diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index 3a3432be8..f767fa15f 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -2,6 +2,9 @@  from __future__ import unicode_literals  import base64 +import random +import string +import time  from .common import InfoExtractor  from ..compat import ( @@ -141,6 +144,11 @@ class YoukuIE(InfoExtractor):          return video_urls_dict +    @staticmethod +    def get_ysuid(): +        return '%d%s' % (int(time.time()), ''.join([ +            random.choice(string.ascii_letters) for i in range(3)])) +      def get_hd(self, fm):          hd_id_dict = {              '3gp': '0', @@ -189,6 +197,8 @@ class YoukuIE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) +        self._set_cookie('youku.com', '__ysuid', self.get_ysuid()) +          def retrieve_data(req_url, note):              headers = {                  'Referer': req_url, diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 2a1f2f6d1..c619a75e2 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -13,6 +13,7 @@ from ..utils import (      determine_ext,      qualities,      float_or_none, +    ExtractorError,  ) @@ -59,7 +60,6 @@ class ZDFIE(InfoExtractor):                      'ext': 'flv',                      'format_id': '%s-%d' % (proto, bitrate),                      'tbr': bitrate, -                    'protocol': proto,                  })          self._sort_formats(formats)          return formats @@ -70,6 +70,15 @@ class ZDFIE(InfoExtractor):              note='Downloading video info',              errnote='Failed to download video info') +        status_code = doc.find('./status/statuscode') +        if status_code is not None and status_code.text != 'ok': +            code = status_code.text +            if code == 'notVisibleAnymore': +                message = 'Video %s is not available' % video_id +            else: +                message = '%s returned error: %s' % (self.IE_NAME, code) +            raise ExtractorError(message, expected=True) +          title = doc.find('.//information/title').text          description = xpath_text(doc, './/information/detail', 'description')          duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration')) @@ -129,10 +138,10 @@ class ZDFIE(InfoExtractor):                      video_url, video_id, fatal=False))              elif ext == 'm3u8':                  formats.extend(self._extract_m3u8_formats( -                    video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) +                    video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))              elif ext == 'f4m':                  formats.extend(self._extract_f4m_formats( -                    video_url, video_id, f4m_id='hds', fatal=False)) +                    video_url, video_id, f4m_id=format_id, fatal=False))              else:                  proto = format_m.group('proto').lower() diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a62baa305..790bd5b3b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2015.12.29' +__version__ = '2016.01.01' | 
