Move playlist tests to extractors.

From now on, test_download will run these tests. That means we benefit not only from the networking setup in there, but also from the other tests (for example test_all_urls to find problems with _VALID_URLs).
author: Philipp Hagemeister <phihag@phihag.de> 2014-08-28 00:58:24 +0200
committer: Philipp Hagemeister <phihag@phihag.de> 2014-08-28 00:58:24 +0200
commit: 22a6f15061127045f4d6ae1ff4efc922fa372cc2 (patch)
tree: f0df2ab32f301b3c66b6d9d147e49fe177e27a55 /youtube_dl/extractor
parent: 259454525f9fe41947e6e05882336b7196fc8fce (diff)
23 files changed, 340 insertions, 82 deletions
diff --git a/youtube_dl/extractor/academicearth.py b/youtube_dl/extractor/academicearth.py
index 59d3bbba4..c983ef0f5 100644
--- a/youtube_dl/extractor/academicearth.py
+++ b/youtube_dl/extractor/academicearth.py
@@ -7,6 +7,15 @@ from .common import InfoExtractor
 class AcademicEarthCourseIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
     IE_NAME = 'AcademicEarth:Course'
+    _TEST = {
+        'url': 'http://academicearth.org/playlists/laws-of-nature/',
+        'info_dict': {
+            'id': 'laws-of-nature',
+            'title': 'Laws of Nature',
+            'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.',
+        },
+        'playlist_count': 4,
+    }
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py
index a7bfe5a5c..47f8e4157 100644
--- a/youtube_dl/extractor/aol.py
+++ b/youtube_dl/extractor/aol.py
@@ -21,7 +21,7 @@ class AolIE(InfoExtractor):
         (?:$|\?)
     '''
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
         'md5': '18ef68f48740e86ae94b98da815eec42',
         'info_dict': {
@@ -30,7 +30,14 @@ class AolIE(InfoExtractor):
             'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
         },
         'add_ie': ['FiveMin'],
-    }
+    }, {
+        'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
+        'info_dict': {
+            'id': '152147',
+            'title': 'Brace Yourself - Today\'s Weirdest News',
+        },
+        'playlist_mincount': 10,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py
index ccd31c4c7..de5d4faf3 100644
--- a/youtube_dl/extractor/bambuser.py
+++ b/youtube_dl/extractor/bambuser.py
@@ -59,6 +59,13 @@ class BambuserChannelIE(InfoExtractor):
     _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
     # The maximum number we can get with each request
     _STEP = 50
+    _TEST = {
+        'url': 'http://bambuser.com/channel/pixelversity',
+        'info_dict': {
+            'title': 'pixelversity',
+        },
+        'playlist_mincount': 60,
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -73,10 +80,10 @@ class BambuserChannelIE(InfoExtractor):
             req = compat_urllib_request.Request(req_url)
             # Without setting this header, we wouldn't get any result
             req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
-            info_json = self._download_webpage(req, user,
-                'Downloading page %d' % i)
-            results = json.loads(info_json)['result']
-            if len(results) == 0:
+            data = self._download_json(
+                req, user, 'Downloading page %d' % i)
+            results = data['result']
+            if not results:
                 break
             last_id = results[-1]['vid']
             urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
index dcbbdef43..c569aa4d2 100644
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@@ -96,7 +96,7 @@ class BandcampAlbumIE(InfoExtractor):
     IE_NAME = 'Bandcamp:album'
     _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+))'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
         'playlist': [
             {
@@ -118,7 +118,13 @@ class BandcampAlbumIE(InfoExtractor):
             'playlistend': 2
         },
         'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test'
-    }
+    }, {
+        'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
+        'info_dict': {
+            'title': 'Hierophany of the Open Grave',
+        },
+        'playlist_mincount': 9,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py
index b6552c542..541106684 100644
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dl/extractor/cspan.py
@@ -34,6 +34,13 @@ class CSpanIE(InfoExtractor):
             'title': 'International Health Care Models',
             'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
         }
+    }, {
+        'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
+        'info_dict': {
+            'id': '342759',
+            'title': 'General Motors Ignition Switch Recall',
+        },
+        'playlist_duration_sum': 14855,
     }]
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 26d077eff..66a8f16d9 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -1,3 +1,6 @@
+#coding: utf-8
+from __future__ import unicode_literals
+
 import re
 import json
 import itertools
@@ -99,8 +102,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
             webpage)
         if m_vevo is not None:
             vevo_id = m_vevo.group('id')
-            self.to_screen(u'Vevo video detected: %s' % vevo_id)
-            return self.url_result(u'vevo:%s' % vevo_id, ie='Vevo')
+            self.to_screen('Vevo video detected: %s' % vevo_id)
+            return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
 
         age_limit = self._rta_search(webpage)
 
@@ -111,7 +114,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
 
         embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
         embed_page = self._download_webpage(embed_url, video_id,
-                                            u'Downloading embed page')
+                                            'Downloading embed page')
         info = self._search_regex(r'var info = ({.*?}),$', embed_page,
             'video info', flags=re.MULTILINE)
         info = json.loads(info)
@@ -136,7 +139,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
                     'height': height,
                 })
         if not formats:
-            raise ExtractorError(u'Unable to extract video URL')
+            raise ExtractorError('Unable to extract video URL')
 
         # subtitles
         video_subtitles = self.extract_subtitles(video_id, webpage)
@@ -145,7 +148,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
             return
 
         view_count = self._search_regex(
-            r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, u'view count', fatal=False)
+            r'video_views_count[^>]+>\s+([\d\.,]+)', webpage, 'view count', fatal=False)
         if view_count is not None:
             view_count = str_to_int(view_count)
 
@@ -167,28 +170,35 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
                 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
                 video_id, note=False)
         except ExtractorError as err:
-            self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+            self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
             return {}
         info = json.loads(sub_list)
         if (info['total'] > 0):
             sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
             return sub_lang_list
-        self._downloader.report_warning(u'video doesn\'t have subtitles')
+        self._downloader.report_warning('video doesn\'t have subtitles')
         return {}
 
 
 class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
-    IE_NAME = u'dailymotion:playlist'
+    IE_NAME = 'dailymotion:playlist'
     _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
     _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
     _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
+    _TESTS = [{
+        'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
+        'info_dict': {
+            'title': 'SPORT',
+        },
+        'playlist_mincount': 20,
+    }]
 
     def _extract_entries(self, id):
         video_ids = []
         for pagenum in itertools.count(1):
             request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
             webpage = self._download_webpage(request,
-                                             id, u'Downloading page %s' % pagenum)
+                                             id, 'Downloading page %s' % pagenum)
 
             video_ids.extend(re.findall(r'data-xid="(.+?)"', webpage))
 
@@ -211,9 +221,17 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
 
 
 class DailymotionUserIE(DailymotionPlaylistIE):
-    IE_NAME = u'dailymotion:user'
+    IE_NAME = 'dailymotion:user'
     _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
     _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
+    _TESTS = [{
+        'url': 'https://www.dailymotion.com/user/nqtv',
+        'info_dict': {
+            'id': 'nqtv',
+            'title': 'Rémi Gaillard',
+        },
+        'playlist_mincount': 100,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -221,7 +239,7 @@ class DailymotionUserIE(DailymotionPlaylistIE):
         webpage = self._download_webpage(url, user)
         full_user = unescapeHTML(self._html_search_regex(
             r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
-            webpage, u'user', flags=re.DOTALL))
+            webpage, 'user'))
 
         return {
             '_type': 'playlist',
diff --git a/youtube_dl/extractor/everyonesmixtape.py b/youtube_dl/extractor/everyonesmixtape.py
index 12829cbcc..d237a8281 100644
--- a/youtube_dl/extractor/everyonesmixtape.py
+++ b/youtube_dl/extractor/everyonesmixtape.py
@@ -12,10 +12,11 @@ from ..utils import (
 class EveryonesMixtapeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
-        'file': '5bfseWNmlds.mp4',
         "info_dict": {
+            'id': '5bfseWNmlds',
+            'ext': 'mp4',
             "title": "Passion Pit - \"Sleepyhead\" (Official Music Video)",
             "uploader": "FKR.TV",
             "uploader_id": "frenchkissrecords",
@@ -25,7 +26,14 @@ class EveryonesMixtapeIE(InfoExtractor):
         'params': {
             'skip_download': True,  # This is simply YouTube
         }
-    }
+    }, {
+        'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
+        'info_dict': {
+            'id': 'm7m0jJAbMQi',
+            'title': 'Driving',
+        },
+        'playlist_count': 24
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 6961dfec7..acdf725cc 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -351,6 +351,20 @@ class GenericIE(InfoExtractor):
                 'description': 're:'
             },
             'playlist_mincount': 11,
+        },
+        # Multiple brightcove videos
+        # https://github.com/rg3/youtube-dl/issues/2283
+        {
+            'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
+            'info_dict': {
+                'id': 'always-never',
+                'title': 'Always / Never - The New Yorker',
+            },
+            'playlist_count': 3,
+            'params': {
+                'extract_flat': False,
+                'skip_download': True,
+            }
         }
     ]
 
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
index 7cee505c0..4536db3bf 100644
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -63,6 +63,14 @@ class ImdbListIE(InfoExtractor):
     IE_NAME = 'imdb:list'
     IE_DESC = 'Internet Movie Database lists'
     _VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
+    _TEST = {
+        'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
+        'info_dict': {
+            'id': 'JFs9NWw6XI0',
+            'title': 'March 23, 2012 Releases',
+        },
+        'playlist_count': 7,
+    }
     
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index b5372bf7a..5109f26ce 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -46,6 +46,30 @@ class InstagramUserIE(InfoExtractor):
     _VALID_URL = r'http://instagram\.com/(?P<username>[^/]{2,})/?(?:$|[?#])'
     IE_DESC = 'Instagram user profile'
     IE_NAME = 'instagram:user'
+    _TEST = {
+        'url': 'http://instagram.com/porsche',
+        'info_dict': {
+            'id': 'porsche',
+            'title': 'porsche',
+        },
+        'playlist_mincount': 2,
+        'playlist': [{
+            'info_dict': {
+                'id': '614605558512799803_462752227',
+                'ext': 'mp4',
+                'title': '#Porsche Intelligent Performance.',
+                'thumbnail': 're:^https?://.*\.jpg',
+                'uploader': 'Porsche',
+                'uploader_id': 'porsche',
+                'timestamp': 1387486713,
+                'upload_date': '20131219',
+            },
+        }],
+        'params': {
+            'extract_flat': True,
+            'skip_download': True,
+        }
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py
index 4027deb70..75b543b7c 100644
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@@ -127,6 +127,21 @@ class IviCompilationIE(InfoExtractor):
     IE_DESC = 'ivi.ru compilations'
     IE_NAME = 'ivi:compilation'
     _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+    _TESTS = [{
+        'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
+        'info_dict': {
+            'id': 'dvoe_iz_lartsa',
+            'title': 'Двое из ларца (2006 - 2008)',
+        },
+        'playlist_mincount': 24,
+    }, {
+        'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
+        'info_dict': {
+            'id': 'dvoe_iz_lartsa/season1',
+            'title': 'Двое из ларца (2006 - 2008) 1 сезон',
+        },
+        'playlist_mincount': 12,
+    }]
 
     def _extract_entries(self, html, compilation_id):
         return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dl/extractor/khanacademy.py
index 772bb5671..04bac7517 100644
--- a/youtube_dl/extractor/khanacademy.py
+++ b/youtube_dl/extractor/khanacademy.py
@@ -12,18 +12,27 @@ class KhanAcademyIE(InfoExtractor):
     _VALID_URL = r'^https?://(?:www\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
     IE_NAME = 'KhanAcademy'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.khanacademy.org/video/one-time-pad',
-        'file': 'one-time-pad.mp4',
         'md5': '7021db7f2d47d4fff89b13177cb1e8f4',
         'info_dict': {
+            'id': 'one-time-pad',
+            'ext': 'mp4',
             'title': 'The one-time pad',
             'description': 'The perfect cipher',
             'duration': 176,
             'uploader': 'Brit Cruise',
             'upload_date': '20120411',
         }
-    }
+    }, {
+        'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
+        'info_dict': {
+            'id': 'cryptography',
+            'title': 'Journey into cryptography',
+            'description': 'How have humans protected their secret messages through history? What has changed today?',
+        },
+        'playlist_mincount': 3,
+    }]
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index 281a0ce40..516147417 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -19,7 +19,7 @@ from ..utils import (
 class LivestreamIE(InfoExtractor):
     IE_NAME = 'livestream'
     _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$'
-    _TEST = {
+    _TESTS = [{
         'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
         'md5': '53274c76ba7754fb0e8d072716f2292b',
         'info_dict': {
@@ -31,7 +31,13 @@ class LivestreamIE(InfoExtractor):
             'view_count': int,
             'thumbnail': 're:^http://.*\.jpg$'
         }
-    }
+    }, {
+        'url': 'http://new.livestream.com/tedx/cityenglish',
+        'info_dict': {
+            'title': 'TEDCity2.0 (English)',
+        },
+        'playlist_mincount': 4,
+    }]
 
     def _parse_smil(self, video_id, smil_url):
         formats = []
@@ -111,34 +117,37 @@ class LivestreamIE(InfoExtractor):
         event_name = mobj.group('event_name')
         webpage = self._download_webpage(url, video_id or event_name)
 
-        og_video = self._og_search_video_url(webpage, 'player url', fatal=False, default=None)
-        if og_video is None:
-            config_json = self._search_regex(
-                r'window.config = ({.*?});', webpage, 'window config')
-            info = json.loads(config_json)['event']
-
-            def is_relevant(vdata, vid):
-                result = vdata['type'] == 'video'
-                if video_id is not None:
-                    result = result and compat_str(vdata['data']['id']) == vid
-                return result
-
-            videos = [self._extract_video_info(video_data['data'])
-                      for video_data in info['feed']['data']
-                      if is_relevant(video_data, video_id)]
-            if video_id is None:
-                # This is an event page:
-                return self.playlist_result(videos, info['id'], info['full_name'])
-            else:
-                if videos:
-                    return videos[0]
-        else:
+        og_video = self._og_search_video_url(
+            webpage, 'player url', fatal=False, default=None)
+        if og_video is not None:
             query_str = compat_urllib_parse_urlparse(og_video).query
             query = compat_urlparse.parse_qs(query_str)
-            api_url = query['play_url'][0].replace('.smil', '')
-            info = json.loads(self._download_webpage(
-                api_url, video_id, 'Downloading video info'))
-            return self._extract_video_info(info)
+            if 'play_url' in query:
+                api_url = query['play_url'][0].replace('.smil', '')
+                info = json.loads(self._download_webpage(
+                    api_url, video_id, 'Downloading video info'))
+                return self._extract_video_info(info)
+
+        config_json = self._search_regex(
+            r'window.config = ({.*?});', webpage, 'window config')
+        info = json.loads(config_json)['event']
+
+        def is_relevant(vdata, vid):
+            result = vdata['type'] == 'video'
+            if video_id is not None:
+                result = result and compat_str(vdata['data']['id']) == vid
+            return result
+
+        videos = [self._extract_video_info(video_data['data'])
+                  for video_data in info['feed']['data']
+                  if is_relevant(video_data, video_id)]
+        if video_id is None:
+            # This is an event page:
+            return self.playlist_result(videos, info['id'], info['full_name'])
+        else:
+            if not videos:
+                raise ExtractorError('Cannot find video %s' % video_id)
+            return videos[0]
 
 
 # The original version of Livestream uses a different system
@@ -148,7 +157,7 @@ class LivestreamOriginalIE(InfoExtractor):
         (?P<user>[^/]+)/(?P<type>video|folder)
         (?:\?.*?Id=|/)(?P<id>.*?)(&|$)
         '''
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
         'info_dict': {
             'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
@@ -159,7 +168,13 @@ class LivestreamOriginalIE(InfoExtractor):
             # rtmp
             'skip_download': True,
         },
-    }
+    }, {
+        'url': 'https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+        'info_dict': {
+            'id': 'a07bf706-d0e4-4e75-a747-b021d84f2fd3',
+        },
+        'playlist_mincount': 4,
+    }]
 
     def _extract_video(self, user, video_id):
         api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
@@ -182,15 +197,19 @@ class LivestreamOriginalIE(InfoExtractor):
 
     def _extract_folder(self, url, folder_id):
         webpage = self._download_webpage(url, folder_id)
-        urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
+        paths = orderedSet(re.findall(
+            r'''(?x)(?:
+                <li\s+class="folder">\s*<a\s+href="|
+                <a\s+href="(?=https?://livestre\.am/)
+            )([^"]+)"''', webpage))
 
         return {
             '_type': 'playlist',
             'id': folder_id,
             'entries': [{
                 '_type': 'url',
-                'url': video_url,
-            } for video_url in urls],
+                'url': compat_urlparse.urljoin(url, p),
+            } for p in paths],
         }
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py
index 8a8acc955..ceda1dcc0 100644
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dl/extractor/nhl.py
@@ -46,7 +46,7 @@ class NHLBaseInfoExtractor(InfoExtractor):
 
 class NHLIE(NHLBaseInfoExtractor):
     IE_NAME = 'nhl.com'
-    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?<=[?&])id=(?P<id>\d+)'
+    _VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/console\?.*?(?:[?&])id=(?P<id>[0-9]+)'
 
     _TEST = {
         'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
@@ -72,7 +72,7 @@ class NHLIE(NHLBaseInfoExtractor):
 class NHLVideocenterIE(NHLBaseInfoExtractor):
     IE_NAME = 'nhl.com:videocenter'
     IE_DESC = 'NHL videocenter category'
-    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[^&]+))?'
+    _VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?.*?catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
     _TEST = {
         'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
         'info_dict': {
diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py
index 357edbbda..0c8790da2 100644
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -74,6 +74,13 @@ class RutubeChannelIE(InfoExtractor):
     IE_NAME = 'rutube:channel'
     IE_DESC = 'Rutube channels'
     _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://rutube.ru/tags/video/1800/',
+        'info_dict': {
+            'id': '1800',
+        },
+        'playlist_mincount': 68,
+    }]
 
     _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
 
@@ -101,6 +108,7 @@ class RutubeMovieIE(RutubeChannelIE):
     IE_NAME = 'rutube:movie'
     IE_DESC = 'Rutube movies'
     _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
+    _TESTS = []
 
     _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
     _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
@@ -119,5 +127,12 @@ class RutubePersonIE(RutubeChannelIE):
     IE_NAME = 'rutube:person'
     IE_DESC = 'Rutube person videos'
     _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://rutube.ru/video/person/313878/',
+        'info_dict': {
+            'id': '313878',
+        },
+        'playlist_mincount': 37,
+    }]
 
     _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py
index 13e7e71cb..9bd5defa7 100644
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@@ -267,6 +267,14 @@ class SmotriCommunityIE(InfoExtractor):
     IE_DESC = 'Smotri.com community videos'
     IE_NAME = 'smotri:community'
     _VALID_URL = r'^https?://(?:www\.)?smotri\.com/community/video/(?P<communityid>[0-9A-Za-z_\'-]+)'
+    _TEST = {
+        'url': 'http://smotri.com/community/video/kommuna',
+        'info_dict': {
+            'id': 'kommuna',
+            'title': 'КПРФ',
+        },
+        'playlist_mincount': 4,
+    }
     
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -289,6 +297,14 @@ class SmotriUserIE(InfoExtractor):
     IE_DESC = 'Smotri.com user videos'
     IE_NAME = 'smotri:user'
     _VALID_URL = r'^https?://(?:www\.)?smotri\.com/user/(?P<userid>[0-9A-Za-z_\'-]+)'
+    _TESTS = [{
+        'url': 'http://smotri.com/user/inspector',
+        'info_dict': {
+            'id': 'inspector',
+            'title': 'Inspector',
+        },
+        'playlist_mincount': 9,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 097d0e418..b78aed7f0 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -28,7 +28,8 @@ class SoundcloudIE(InfoExtractor):
     _VALID_URL = r'''(?x)^(?:https?://)?
                     (?:(?:(?:www\.|m\.)?soundcloud\.com/
                             (?P<uploader>[\w\d-]+)/
-                            (?!sets/)(?P<title>[\w\d-]+)/?
+                            (?!sets/|likes/?(?:$|[?#]))
+                            (?P<title>[\w\d-]+)/?
                             (?P<token>[^?]+?)?(?:[?].*)?$)
                        |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+))
                        |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
@@ -221,13 +222,16 @@ class SoundcloudIE(InfoExtractor):
 class SoundcloudSetIE(SoundcloudIE):
     _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
     IE_NAME = 'soundcloud:set'
-    # it's in tests/test_playlists.py
-    _TESTS = []
+    _TESTS = [{
+        'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
+        'info_dict': {
+            'title': 'The Royal Concept EP',
+        },
+        'playlist_mincount': 6,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
 
         # extract uploader (which is in the url)
         uploader = mobj.group(1)
@@ -246,20 +250,32 @@ class SoundcloudSetIE(SoundcloudIE):
                 self._downloader.report_error('unable to download video webpage: %s' % compat_str(err['error_message']))
             return
 
-        self.report_extraction(full_title)
-        return {'_type': 'playlist',
-                'entries': [self._extract_info_dict(track) for track in info['tracks']],
-                'id': info['id'],
-                'title': info['title'],
-                }
+        return {
+            '_type': 'playlist',
+            'entries': [self._extract_info_dict(track) for track in info['tracks']],
+            'id': info['id'],
+            'title': info['title'],
+        }
 
 
 class SoundcloudUserIE(SoundcloudIE):
     _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
     IE_NAME = 'soundcloud:user'
-
-    # it's in tests/test_playlists.py
-    _TESTS = []
+    _TESTS = [{
+        'url': 'https://soundcloud.com/the-concept-band',
+        'info_dict': {
+            'id': '9615865',
+            'title': 'The Royal Concept',
+        },
+        'playlist_mincount': 12
+    }, {
+        'url': 'https://soundcloud.com/the-concept-band/likes',
+        'info_dict': {
+            'id': '9615865',
+            'title': 'The Royal Concept',
+        },
+        'playlist_mincount': 1,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -301,9 +317,18 @@ class SoundcloudUserIE(SoundcloudIE):
 class SoundcloudPlaylistIE(SoundcloudIE):
     _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)'
     IE_NAME = 'soundcloud:playlist'
+    _TESTS = [
 
-     # it's in tests/test_playlists.py
-    _TESTS = []
+        {
+            'url': 'http://api.soundcloud.com/playlists/4110309',
+            'info_dict': {
+                'id': '4110309',
+                'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
+                'description': 're:.*?TILT Brass - Bowery Poetry Club',
+            },
+            'playlist_count': 6,
+        }
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py
index 46d727d1d..8a95fd656 100644
--- a/youtube_dl/extractor/teachertube.py
+++ b/youtube_dl/extractor/teachertube.py
@@ -106,6 +106,13 @@ class TeacherTubeUserIE(InfoExtractor):
         \s*
         <a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)"
     '''
+    _TEST = {
+        'url': 'http://www.teachertube.com/user/profile/rbhagwati2',
+        'info_dict': {
+            'id': 'rbhagwati2'
+        },
+        'playlist_mincount': 179,
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index e772968d7..1cca47771 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -58,6 +58,13 @@ class TEDIE(SubtitlesInfoExtractor):
             'uploader': 'Gabby Giffords and Mark Kelly',
             'description': 'md5:5174aed4d0f16021b704120360f72b92',
         },
+    }, {
+        'url': 'http://www.ted.com/playlists/who_are_the_hackers',
+        'info_dict': {
+            'id': '10',
+            'title': 'Who are the hackers?',
+        },
+        'playlist_mincount': 6,
     }]
 
     _NATIVE_FORMATS = {
diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py
index 0f389bd93..19415b2a6 100644
--- a/youtube_dl/extractor/toypics.py
+++ b/youtube_dl/extractor/toypics.py
@@ -42,6 +42,13 @@ class ToypicsIE(InfoExtractor):
 class ToypicsUserIE(InfoExtractor):
     IE_DESC = 'Toypics user profile'
     _VALID_URL = r'http://videos\.toypics\.net/(?P<username>[^/?]+)(?:$|[?#])'
+    _TEST = {
+        'url': 'http://videos.toypics.net/Mikey',
+        'info_dict': {
+            'id': 'Mikey',
+        },
+        'playlist_mincount': 9917,
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py
index 488b10df9..816b101eb 100644
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -68,21 +68,36 @@ class UstreamIE(InfoExtractor):
 class UstreamChannelIE(InfoExtractor):
     _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)'
     IE_NAME = 'ustream:channel'
+    _TEST = {
+        'url': 'http://www.ustream.tv/channel/channeljapan',
+        'info_dict': {
+            'id': '10874166',
+        },
+        'playlist_mincount': 54,
+    }
 
     def _real_extract(self, url):
         m = re.match(self._VALID_URL, url)
-        slug = m.group('slug')
-        webpage = self._download_webpage(url, slug)
+        display_id = m.group('slug')
+        webpage = self._download_webpage(url, display_id)
         channel_id = get_meta_content('ustream:channel_id', webpage)
 
         BASE = 'http://www.ustream.tv'
         next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
         video_ids = []
         while next_url:
-            reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
+            reply = self._download_json(
+                compat_urlparse.urljoin(BASE, next_url), display_id,
+                note='Downloading video information (next: %d)' % (len(video_ids) + 1))
             video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
             next_url = reply['nextUrl']
 
-        urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
-        url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
-        return self.playlist_result(url_entries, channel_id)
+        entries = [
+            self.url_result('http://www.ustream.tv/recorded/' + vid, 'Ustream')
+            for vid in video_ids]
+        return {
+            '_type': 'playlist',
+            'id': channel_id,
+            'display_id': display_id,
+            'entries': entries,
+        }
diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py
index 076c87119..e7754158d 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -65,6 +65,13 @@ class VineUserIE(InfoExtractor):
     IE_NAME = 'vine:user'
     _VALID_URL = r'(?:https?://)?vine\.co/(?P<user>[^/]+)/?(\?.*)?$'
     _VINE_BASE_URL = "https://vine.co/"
+    _TEST = {
+        'url': 'https://vine.co/Visa',
+        'info_dict': {
+            'id': 'Visa',
+        },
+        'playlist_mincount': 47,
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py
index b293e2665..273d93d9e 100644
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -77,9 +77,17 @@ class XTubeIE(InfoExtractor):
             'age_limit': 18,
         }
 
+
 class XTubeUserIE(InfoExtractor):
     IE_DESC = 'XTube user profile'
     _VALID_URL = r'https?://(?:www\.)?xtube\.com/community/profile\.php\?(.*?)user=(?P<username>[^&#]+)(?:$|[&#])'
+    _TEST = {
+        'url': 'http://www.xtube.com/community/profile.php?user=greenshowers',
+        'info_dict': {
+            'id': 'greenshowers',
+        },
+        'playlist_mincount': 155,
+    }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
author	Philipp Hagemeister <phihag@phihag.de>	2014-08-28 00:58:24 +0200
committer	Philipp Hagemeister <phihag@phihag.de>	2014-08-28 00:58:24 +0200
commit	22a6f15061127045f4d6ae1ff4efc922fa372cc2 (patch)
tree	f0df2ab32f301b3c66b6d9d147e49fe177e27a55 /youtube_dl/extractor
parent	259454525f9fe41947e6e05882336b7196fc8fce (diff)