[archiveorg] Add test, simplify and modernize

author: Sergey M․ <dstftw@gmail.com> 2014-12-29 02:08:46 +0600
committer: Sergey M․ <dstftw@gmail.com> 2014-12-29 02:08:46 +0600
commit: e8e28989eb8ab0dbb755a273cd1d79847494744b (patch)
tree: 013b12215eea6b7765e0e36b3513e4db10922bca /youtube_dl/extractor/archiveorg.py
parent: 0fa629d05bb521133f808b373fd92c9835275e48 (diff)
1 files changed, 29 insertions, 31 deletions
diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py
index ba94b5454..9fc35a42b 100644
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -1,50 +1,48 @@
 from __future__ import unicode_literals
 
-import json
-import re
-
 from .common import InfoExtractor
-from ..utils import (
-    unified_strdate,
-)
+from ..utils import unified_strdate
 
 
 class ArchiveOrgIE(InfoExtractor):
     IE_NAME = 'archive.org'
     IE_DESC = 'archive.org videos'
-    _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
-    _TEST = {
-        "url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
-        'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
+    _VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
+    _TESTS = [{
+        'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
         'md5': '8af1d4cf447933ed3c7f4871162602db',
         'info_dict': {
-            "title": "1968 Demo - FJCC Conference Presentation Reel #1",
-            "description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
-            "upload_date": "19681210",
-            "uploader": "SRI International"
+            'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
+            'ext': 'ogv',
+            'title': '1968 Demo - FJCC Conference Presentation Reel #1',
+            'description': 'md5:1780b464abaca9991d8968c877bb53ed',
+            'upload_date': '19681210',
+            'uploader': 'SRI International'
         }
-    }
-
-    def get_optional_metadata(self, data, field):
-        try:
-            return data['metadata'][field][0]
-        except KeyError:
-            return None
+    }, {
+        'url': 'https://archive.org/details/Cops1922',
+        'md5': '18f2a19e6d89af8425671da1cf3d4e04',
+        'info_dict': {
+            'id': 'Cops1922',
+            'ext': 'ogv',
+            'title': 'Buster Keaton\'s "Cops" (1922)',
+            'description': 'md5:70f72ee70882f713d4578725461ffcc3',
+        }
+    }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         json_url = url + ('?' if '?' in url else '&') + 'output=json'
-        json_data = self._download_webpage(json_url, video_id)
-        data = json.loads(json_data)
+        data = self._download_json(json_url, video_id)
+
+        def get_optional(data_dict, field):
+            return data_dict['metadata'].get(field, [None])[0]
 
-        title = self.get_optional_metadata(data, 'title')
-        description = self.get_optional_metadata(data, 'description')
-        uploader = self.get_optional_metadata(data, 'creator')
-        upload_date = self.get_optional_metadata(data, 'date')
-        if upload_date:
-            upload_date = unified_strdate(upload_date)
+        title = get_optional(data, 'title')
+        description = get_optional(data, 'description')
+        uploader = get_optional(data, 'creator')
+        upload_date = unified_strdate(get_optional(data, 'date'))
 
         formats = [
             {
author	Sergey M․ <dstftw@gmail.com>	2014-12-29 02:08:46 +0600
committer	Sergey M․ <dstftw@gmail.com>	2014-12-29 02:08:46 +0600
commit	e8e28989eb8ab0dbb755a273cd1d79847494744b (patch)
tree	013b12215eea6b7765e0e36b3513e4db10922bca /youtube_dl/extractor/archiveorg.py
parent	0fa629d05bb521133f808b373fd92c9835275e48 (diff)