4 files changed, 141 insertions, 147 deletions
diff --git a/test/test_download.py b/test/test_download.py
index 5877c42b3..14ac511d2 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -107,11 +107,7 @@ def generator(test_case):
                 with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
                     info_dict = json.load(infof)
                 for (info_field, value) in tc.get('info_dict', {}).items():
-                    if value.startswith('md5:'):
-                        md5_info_value = hashlib.md5(info_dict.get(info_field, '')).hexdigest()
-                        self.assertEqual(value[3:], md5_info_value)
-                    else:
-                        self.assertEqual(value, info_dict.get(info_field))
+                    self.assertEqual(value, info_dict.get(info_field))
         finally:
             for tc in test_cases:
                 _try_rm(tc['file'])
diff --git a/test/tests.json b/test/tests.json
index 2c2137ce4..a46ff491b 100644
--- a/test/tests.json
+++ b/test/tests.json
@@ -225,5 +225,77 @@
         "uploader_id": "ford-lopatin",
         "location": "Spain"
     }
+  },
+  {
+    "name": "Facebook",
+    "url": "https://www.facebook.com/photo.php?v=120708114770723",
+    "file": "120708114770723.mp4",
+    "md5": "48975a41ccc4b7a581abd68651c1a5a8",
+    "info_dict": {
+      "title": "PEOPLE ARE AWESOME 2013",
+      "duration": 279
+    }
+  },
+  {
+    "name": "EightTracks",
+    "url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
+    "playlist": [
+      {
+        "file": "11885610.m4a",
+        "md5": "96ce57f24389fc8734ce47f4c1abcc55",
+        "info_dict": {
+          "title": "youtube-dl test track 1 \"'/\\\u00e4\u21ad"
+        }
+      },
+      {
+        "file": "11885608.m4a",
+        "md5": "4ab26f05c1f7291ea460a3920be8021f",
+        "info_dict": {
+          "title": "youtube-dl test track 2 \"'/\\\u00e4\u21ad"
+        }
+      },
+      {
+        "file": "11885679.m4a",
+        "md5": "d30b5b5f74217410f4689605c35d1fd7",
+        "info_dict": {
+          "title": "youtube-dl test track 3 \"'/\\\u00e4\u21ad"
+        }
+      },
+      {
+        "file": "11885680.m4a",
+        "md5": "4eb0a669317cd725f6bbd336a29f923a",
+        "info_dict": {
+          "title": "youtube-dl test track 4 \"'/\\\u00e4\u21ad"
+        }
+      },
+      {
+        "file": "11885682.m4a",
+        "md5": "1893e872e263a2705558d1d319ad19e8",
+        "info_dict": {
+          "title": "youtube-dl test track 5 \"'/\\\u00e4\u21ad"
+        }
+      },
+      {
+        "file": "11885683.m4a",
+        "md5": "b673c46f47a216ab1741ae8836af5899",
+        "info_dict": {
+          "title": "youtube-dl test track 6 \"'/\\\u00e4\u21ad"
+        }
+      },
+      {
+        "file": "11885684.m4a",
+        "md5": "1d74534e95df54986da7f5abf7d842b7",
+        "info_dict": {
+          "title": "youtube-dl test track 7 \"'/\\\u00e4\u21ad"
+        }
+      },
+      {
+        "file": "11885685.m4a",
+        "md5": "f081f47af8f6ae782ed131d38b9cd1c0",
+        "info_dict": {
+          "title": "youtube-dl test track 8 \"'/\\\u00e4\u21ad"
+        }
+      }
+    ]
   }
 ]
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index dcd7ca647..50a5a5cfb 100755
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -5,6 +5,7 @@ from __future__ import absolute_import
 
 import base64
 import datetime
+import itertools
 import netrc
 import os
 import re
@@ -1980,62 +1981,14 @@ class DepositFilesIE(InfoExtractor):
 class FacebookIE(InfoExtractor):
     """Information Extractor for Facebook"""
 
-    _WORKING = False
     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
     _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
     _NETRC_MACHINE = 'facebook'
-    _available_formats = ['video', 'highqual', 'lowqual']
-    _video_extensions = {
-        'video': 'mp4',
-        'highqual': 'mp4',
-        'lowqual': 'mp4',
-    }
     IE_NAME = u'facebook'
 
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
-    def _reporter(self, message):
-        """Add header and report message."""
-        self._downloader.to_screen(u'[facebook] %s' % message)
-
     def report_login(self):
         """Report attempt to log in."""
-        self._reporter(u'Logging in')
-
-    def report_video_webpage_download(self, video_id):
-        """Report attempt to download video webpage."""
-        self._reporter(u'%s: Downloading video webpage' % video_id)
-
-    def report_information_extraction(self, video_id):
-        """Report attempt to extract video information."""
-        self._reporter(u'%s: Extracting video information' % video_id)
-
-    def _parse_page(self, video_webpage):
-        """Extract video information from page"""
-        # General data
-        data = {'title': r'\("video_title", "(.*?)"\)',
-            'description': r'<div class="datawrap">(.*?)</div>',
-            'owner': r'\("video_owner_name", "(.*?)"\)',
-            'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
-            }
-        video_info = {}
-        for piece in data.keys():
-            mobj = re.search(data[piece], video_webpage)
-            if mobj is not None:
-                video_info[piece] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
-
-        # Video urls
-        video_urls = {}
-        for fmt in self._available_formats:
-            mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
-            if mobj is not None:
-                # URL is in a Javascript segment inside an escaped Unicode format within
-                # the generally utf-8 page
-                video_urls[fmt] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
-        video_info['video_urls'] = video_urls
-
-        return video_info
+        self._downloader.to_screen(u'[%s] Logging in' % self.IE_NAME)
 
     def _real_initialize(self):
         if self._downloader is None:
@@ -2088,100 +2041,33 @@ class FacebookIE(InfoExtractor):
             return
         video_id = mobj.group('ID')
 
-        # Get video webpage
-        self.report_video_webpage_download(video_id)
-        request = compat_urllib_request.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
-        try:
-            page = compat_urllib_request.urlopen(request)
-            video_webpage = page.read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
-            return
-
-        # Start extracting information
-        self.report_information_extraction(video_id)
-
-        # Extract information
-        video_info = self._parse_page(video_webpage)
-
-        # uploader
-        if 'owner' not in video_info:
-            self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
-            return
-        video_uploader = video_info['owner']
-
-        # title
-        if 'title' not in video_info:
-            self._downloader.trouble(u'ERROR: unable to extract video title')
-            return
-        video_title = video_info['title']
-        video_title = video_title.decode('utf-8')
-
-        # thumbnail image
-        if 'thumbnail' not in video_info:
-            self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
-            video_thumbnail = ''
-        else:
-            video_thumbnail = video_info['thumbnail']
-
-        # upload date
-        upload_date = None
-        if 'upload_date' in video_info:
-            upload_time = video_info['upload_date']
-            timetuple = email.utils.parsedate_tz(upload_time)
-            if timetuple is not None:
-                try:
-                    upload_date = time.strftime('%Y%m%d', timetuple[0:9])
-                except:
-                    pass
-
-        # description
-        video_description = video_info.get('description', 'No description available.')
+        url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
+        webpage = self._download_webpage(url, video_id)
 
-        url_map = video_info['video_urls']
-        if url_map:
-            # Decide which formats to download
-            req_format = self._downloader.params.get('format', None)
-            format_limit = self._downloader.params.get('format_limit', None)
+        BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n'
+        AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
+        m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
+        if not m:
+            raise ExtractorError(u'Cannot parse data')
+        data = dict(json.loads(m.group(1)))
+        video_url = compat_urllib_parse.unquote(data['hd_src'])
+        video_duration = int(data['video_duration'])
 
-            if format_limit is not None and format_limit in self._available_formats:
-                format_list = self._available_formats[self._available_formats.index(format_limit):]
-            else:
-                format_list = self._available_formats
-            existing_formats = [x for x in format_list if x in url_map]
-            if len(existing_formats) == 0:
-                self._downloader.trouble(u'ERROR: no known formats available for video')
-                return
-            if req_format is None:
-                video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
-            elif req_format == 'worst':
-                video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
-            elif req_format == '-1':
-                video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
-            else:
-                # Specific format
-                if req_format not in url_map:
-                    self._downloader.trouble(u'ERROR: requested format not available')
-                    return
-                video_url_list = [(req_format, url_map[req_format])] # Specific format
+        m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
+        if not m:
+            raise ExtractorError(u'Cannot find title in webpage')
+        video_title = unescapeHTML(m.group(1))
 
-        results = []
-        for format_param, video_real_url in video_url_list:
-            # Extension
-            video_extension = self._video_extensions.get(format_param, 'mp4')
+        info = {
+            'id': video_id,
+            'title': video_title,
+            'url': video_url,
+            'ext': 'mp4',
+            'duration': video_duration,
+            'thumbnail': data['thumbnail_src'],
+        }
+        return [info]
 
-            results.append({
-                'id':       video_id.decode('utf-8'),
-                'url':      video_real_url.decode('utf-8'),
-                'uploader': video_uploader.decode('utf-8'),
-                'upload_date':  upload_date,
-                'title':    video_title,
-                'ext':      video_extension.decode('utf-8'),
-                'format':   (format_param is None and u'NA' or format_param.decode('utf-8')),
-                'thumbnail':    video_thumbnail.decode('utf-8'),
-                'description':  video_description.decode('utf-8'),
-            })
-        return results
 
 class BlipTVIE(InfoExtractor):
     """Information extractor for blip.tv"""
@@ -3927,8 +3813,6 @@ class PornotubeIE(InfoExtractor):
 
         return [info]
 
-
-
 class YouJizzIE(InfoExtractor):
     """Information extractor for youjizz.com."""
     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
@@ -3975,6 +3859,47 @@ class YouJizzIE(InfoExtractor):
 
         return [info]
 
+class EightTracksIE(InfoExtractor):
+    IE_NAME = '8tracks'
+    _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/]+)'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)
+        playlist_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        m = re.search(r"new TRAX.Mix\((.*?)\);\n*\s*TRAX.initSearchAutocomplete\('#search'\);", webpage, flags=re.DOTALL)
+        if not m:
+            raise ExtractorError(u'Cannot find trax information')
+        json_like = m.group(1)
+        data = json.loads(json_like)
+
+        session = str(random.randint(0, 1000000000))
+        mix_id = data['id']
+        track_count = data['tracks_count']
+        first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
+        next_url = first_url
+        res = []
+        for i in itertools.count():
+            api_json = self._download_webpage(next_url, playlist_id,
+                note=u'Downloading song information %s/%s' % (str(i+1), track_count),
+                errnote=u'Failed to download song information')
+            api_data = json.loads(api_json)
+            track_data = api_data[u'set']['track']
+            info = {
+                'id': track_data['id'],
+                'url': track_data['track_file_stream_url'],
+                'title': track_data['name'],
+                'ext': 'm4a',
+            }
+            res.append(info)
+            if api_data['set']['at_last_track']:
+                break
+            next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
+        return res
 
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
@@ -4021,6 +3946,7 @@ def gen_extractors():
         SteamIE(),
         UstreamIE(),
         RBMARadioIE(),
+        EightTracksIE(),
         GenericIE()
     ]
 
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 9322a3bfe..dfd7d6cec 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
 
-__version__ = '2013.01.13'
+__version__ = '2013.01.27'