11 files changed, 218 insertions, 34 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index f75939a05..14133c315 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -83,6 +83,7 @@ from .extremetube import ExtremeTubeIE
 from .facebook import FacebookIE
 from .faz import FazIE
 from .fc2 import FC2IE
+from .firedrive import FiredriveIE
 from .firstpost import FirstpostIE
 from .firsttv import FirstTVIE
 from .fivemin import FiveMinIE
@@ -232,6 +233,7 @@ from .radiofrance import RadioFranceIE
 from .rai import RaiIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
+from .reverbnation import ReverbNationIE
 from .ringtv import RingTVIE
 from .ro220 import Ro220IE
 from .rottentomatoes import RottenTomatoesIE
@@ -240,6 +242,7 @@ from .rtbf import RTBFIE
 from .rtlnow import RTLnowIE
 from .rts import RTSIE
 from .rtve import RTVEALaCartaIE
+from .ruhd import RUHDIE
 from .rutube import (
     RutubeIE,
     RutubeChannelIE,
@@ -268,8 +271,8 @@ from .soundcloud import (
     SoundcloudPlaylistIE
 )
 from .soundgasm import SoundgasmIE
-from .southparkstudios import (
-    SouthParkStudiosIE,
+from .southpark import (
+    SouthParkIE,
     SouthparkDeIE,
 )
 from .space import SpaceIE
diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py
new file mode 100644
index 000000000..d26145db1
--- /dev/null
+++ b/youtube_dl/extractor/firedrive.py
@@ -0,0 +1,83 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    compat_urllib_parse,
+    compat_urllib_request,
+    determine_ext,
+)
+
+
+class FiredriveIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \
+                 '(?:file|embed)/(?P<id>[0-9a-zA-Z]+)'
+    _FILE_DELETED_REGEX = r'<div class="removed_file_image">'
+
+    _TESTS = [{
+        'url': 'https://www.firedrive.com/file/FEB892FA160EBD01',
+        'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970',
+        'info_dict': {
+            'id': 'FEB892FA160EBD01',
+            'ext': 'flv',
+            'title': 'bbb_theora_486kbit.flv',
+            'thumbnail': 're:^http://.*\.jpg$',
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        url = 'http://firedrive.com/file/%s' % video_id
+
+        webpage = self._download_webpage(url, video_id)
+
+        if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
+            raise ExtractorError('Video %s does not exist' % video_id,
+                                 expected=True)
+
+        fields = dict(re.findall(r'''(?x)<input\s+
+            type="hidden"\s+
+            name="([^"]+)"\s+
+            (?:id="[^"]+"\s+)?
+            value="([^"]*)"
+            ''', webpage))
+
+        post = compat_urllib_parse.urlencode(fields)
+        req = compat_urllib_request.Request(url, post)
+        req.add_header('Content-type', 'application/x-www-form-urlencoded')
+
+        # Apparently, this header is required for confirmation to work.
+        req.add_header('Host', 'www.firedrive.com')
+
+        webpage = self._download_webpage(req, video_id,
+                                         'Downloading video page')
+
+        title = self._search_regex(r'class="external_title_left">(.+)</div>',
+                                   webpage, 'title')
+        thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage,
+                                       'thumbnail', fatal=False)
+        if thumbnail is not None:
+            thumbnail = 'http:' + thumbnail
+
+        ext = self._search_regex(r'type:\s?\'([^\']+)\',',
+                                 webpage, 'extension', fatal=False)
+        video_url = self._search_regex(
+            r'file:\s?\'(http[^\']+)\',', webpage, 'file url')
+
+        formats = [{
+            'format_id': 'sd',
+            'url': video_url,
+            'ext': ext,
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'thumbnail': thumbnail,
+            'formats': formats,
+        }
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index af9490ccc..228b42d2b 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -158,6 +158,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
             if mgid.endswith('.swf'):
                 mgid = mgid[:-4]
         except RegexNotFoundError:
+            mgid = None
+
+        if mgid is None or ':' not in mgid:
             mgid = self._search_regex(
                 [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'],
                 webpage, u'mgid')
diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
index 3d6096e46..94d5ba982 100644
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -18,15 +18,15 @@ class NDRIE(InfoExtractor):
 
     _TESTS = [
         {
-            'url': 'http://www.ndr.de/fernsehen/sendungen/markt/markt7959.html',
-            'md5': 'e7a6079ca39d3568f4996cb858dd6708',
+            'url': 'http://www.ndr.de/fernsehen/media/dienordreportage325.html',
+            'md5': '4a4eeafd17c3058b65f0c8f091355855',
             'note': 'Video file',
             'info_dict': {
-                'id': '7959',
+                'id': '325',
                 'ext': 'mp4',
-                'title': 'Markt - die ganze Sendung',
-                'description': 'md5:af9179cf07f67c5c12dc6d9997e05725',
-                'duration': 2655,
+                'title': 'Blaue Bohnen aus Blocken',
+                'description': 'md5:190d71ba2ccddc805ed01547718963bc',
+                'duration': 1715,
             },
         },
         {
diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py
index 0bc0859b4..6d5732d45 100644
--- a/youtube_dl/extractor/pyvideo.py
+++ b/youtube_dl/extractor/pyvideo.py
@@ -46,7 +46,7 @@ class PyvideoIE(InfoExtractor):
             return self.url_result(m_youtube.group(1), 'Youtube')
 
         title = self._html_search_regex(
-            r'<div class="section">.*?<h3(?:\s+class="[^"]*")?>([^>]+?)</h3>',
+            r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>',
             webpage, 'title', flags=re.DOTALL)
         video_url = self._search_regex(
             [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'],
diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py
new file mode 100644
index 000000000..49cf427a1
--- /dev/null
+++ b/youtube_dl/extractor/reverbnation.py
@@ -0,0 +1,45 @@
+from __future__ import unicode_literals
+
+import re
+import time
+
+from .common import InfoExtractor
+from ..utils import strip_jsonp
+
+
+class ReverbNationIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
+    _TESTS = [{
+        'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
+        'file': '16965047.mp3',
+        'md5': '3da12ebca28c67c111a7f8b262d3f7a7',
+        'info_dict': {
+            "title": "MONA LISA",
+            "uploader": "ALKILADOS",
+            "uploader_id": 216429,
+            "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg"
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        song_id = mobj.group('id')
+
+        api_res = self._download_json(
+            'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d'
+                % (song_id, int(time.time() * 1000)),
+            song_id,
+            transform_source=strip_jsonp,
+            note='Downloading information of song %s' % song_id
+        )
+
+        return {
+            'id': song_id,
+            'title': api_res.get('name'),
+            'url': api_res.get('url'),
+            'uploader': api_res.get('artist', {}).get('name'),
+            'uploader_id': api_res.get('artist', {}).get('id'),
+            'thumbnail': api_res.get('image', api_res.get('thumbnail')),
+            'ext': 'mp3',
+            'vcodec': 'none',
+        }
diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py
new file mode 100644
index 000000000..55b58e5e6
--- /dev/null
+++ b/youtube_dl/extractor/ruhd.py
@@ -0,0 +1,46 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class RUHDIE(InfoExtractor):
+    _VALID_URL = r'http://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P<id>\d+)'
+    _TEST = {
+        'url': 'http://www.ruhd.ru/play.php?vid=207',
+        'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83',
+        'info_dict': {
+            'id': '207',
+            'ext': 'divx',
+            'title': 'КОТ бааааам',
+            'description': 'классный кот)',
+            'thumbnail': 're:^http://.*\.jpg$',
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_url = self._html_search_regex(
+            r'<param name="src" value="([^"]+)"', webpage, 'video url')
+        title = self._html_search_regex(
+            r'<title>([^<]+)&nbsp;&nbsp; RUHD.ru - Видео Высокого качества №1 в России!</title>', webpage, 'title')
+        description = self._html_search_regex(
+            r'(?s)<div id="longdesc">(.+?)<span id="showlink">', webpage, 'description', fatal=False)
+        thumbnail = self._html_search_regex(
+            r'<param name="previewImage" value="([^"]+)"', webpage, 'thumbnail', fatal=False)
+        if thumbnail:
+            thumbnail = 'http://www.ruhd.ru' + thumbnail
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 14ec9452d..8a77c1370 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -81,16 +81,16 @@ class SoundcloudIE(InfoExtractor):
         },
         # downloadable song
         {
-            'url': 'https://soundcloud.com/simgretina/just-your-problem-baby-1',
-            'md5': '56a8b69568acaa967b4c49f9d1d52d19',
+            'url': 'https://soundcloud.com/oddsamples/bus-brakes',
+            'md5': 'fee7b8747b09bb755cefd4b853e7249a',
             'info_dict': {
-                'id': '105614606',
+                'id': '128590877',
                 'ext': 'wav',
-                'title': 'Just Your Problem Baby (Acapella)',
-                'description': 'Vocals',
-                'uploader': 'Sim Gretina',
-                'upload_date': '20130815',
-                #'duration': 42,
+                'title': 'Bus Brakes',
+                'description': 'md5:0170be75dd395c96025d210d261c784e',
+                'uploader': 'oddsamples',
+                'upload_date': '20140109',
+                'duration': 17,
             },
         },
     ]
diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southpark.py
index aea8e6439..c20397b3d 100644
--- a/youtube_dl/extractor/southparkstudios.py
+++ b/youtube_dl/extractor/southpark.py
@@ -3,24 +3,24 @@ from __future__ import unicode_literals
 from .mtv import MTVServicesInfoExtractor
 
 
-class SouthParkStudiosIE(MTVServicesInfoExtractor):
-    IE_NAME = 'southparkstudios.com'
-    _VALID_URL = r'https?://(www\.)?(?P<url>southparkstudios\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
+class SouthParkIE(MTVServicesInfoExtractor):
+    IE_NAME = 'southpark.cc.com'
+    _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.cc\.com/(clips|full-episodes)/(?P<id>.+?)(\?|#|$))'
 
     _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
 
     _TESTS = [{
-        'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
+        'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
         'info_dict': {
             'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30',
             'ext': 'mp4',
-            'title': 'Bat Daded',
+            'title': 'South Park|Bat Daded',
             'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.',
         },
     }]
 
 
-class SouthparkDeIE(SouthParkStudiosIE):
+class SouthparkDeIE(SouthParkIE):
     IE_NAME = 'southpark.de'
     _VALID_URL = r'https?://(www\.)?(?P<url>southpark\.de/(clips|alle-episoden)/(?P<id>.+?)(\?|#|$))'
     _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/'
diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py
index ad175b83e..d848ee186 100644
--- a/youtube_dl/extractor/tlc.py
+++ b/youtube_dl/extractor/tlc.py
@@ -5,6 +5,7 @@ import re
 from .common import InfoExtractor
 from .brightcove import BrightcoveIE
 from .discovery import DiscoveryIE
+from ..utils import compat_urlparse
 
 
 class TlcIE(DiscoveryIE):
@@ -51,6 +52,10 @@ class TlcDeIE(InfoExtractor):
         # Otherwise we don't get the correct 'BrightcoveExperience' element,
         # example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/
         iframe_url = iframe_url.replace('.htm?', '.php?')
+        url_fragment = compat_urlparse.urlparse(url).fragment
+        if url_fragment:
+            # Since the fragment is not send to the server, we always get the same iframe
+            iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url)
         iframe = self._download_webpage(iframe_url, title)
 
         return {
diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py
index c980153ec..d516b6427 100644
--- a/youtube_dl/extractor/tutv.py
+++ b/youtube_dl/extractor/tutv.py
@@ -1,21 +1,21 @@
 from __future__ import unicode_literals
+
 import base64
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    compat_parse_qs,
-)
+from ..utils import compat_parse_qs
 
 
 class TutvIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
     _TEST = {
-        'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc',
-        'file': '2742556.flv',
-        'md5': '5eb766671f69b82e528dc1e7769c5cb2',
+        'url': 'http://tu.tv/videos/robots-futbolistas',
+        'md5': '627c7c124ac2a9b5ab6addb94e0e65f7',
         'info_dict': {
-            'title': 'Noah en pabellon cuahutemoc',
+            'id': '2973058',
+            'ext': 'flv',
+            'title': 'Robots futbolistas',
         },
     }
 
@@ -26,10 +26,9 @@ class TutvIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
         internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
 
-        data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id)
-        data_content = self._download_webpage(data_url, video_id, note='Downloading video info')
-        data = compat_parse_qs(data_content)
-        video_url = base64.b64decode(data['kpt'][0]).decode('utf-8')
+        data_content = self._download_webpage(
+            'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info')
+        video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8')
 
         return {
             'id': internal_id,