From 984e4d487520bd2a860b31b3165416c879b28096 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Wed, 24 Jun 2015 01:13:23 +0100
Subject: [googledrive] Add new extractor

---
 youtube_dl/extractor/__init__.py    |   1 +
 youtube_dl/extractor/googledrive.py | 106 ++++++++++++++++++++++++++++++++++++
 2 files changed, 107 insertions(+)
 create mode 100644 youtube_dl/extractor/googledrive.py

(limited to 'youtube_dl/extractor')
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 3cfa804ec..6655d7eb5 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -209,6 +209,7 @@ from .globo import GloboIE
 from .godtube import GodTubeIE
 from .goldenmoustache import GoldenMoustacheIE
 from .golem import GolemIE
+from .googledrive import GoogleDriveIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
new file mode 100644
index 000000000..8c611fa47
--- /dev/null
+++ b/youtube_dl/extractor/googledrive.py
@@ -0,0 +1,106 @@
+from .common import InfoExtractor
+from ..utils import RegexNotFoundError
+
+class GoogleDriveIE(InfoExtractor):
+    _VALID_URL = r'(?:https?://)?(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/))(?P<id>.+?)(?:&|/|$)'
+    _TEST = {
+        'url': 'https://drive.google.com/file/d/0BzpExh0WzJF0NlR5WUlxdEVsY0U/edit?pli=1',
+        'info_dict': {
+            'id': '0BzpExh0WzJF0NlR5WUlxdEVsY0U',
+            'ext': 'mp4',
+            'title': '[AHSH] Fairy Tail S2 - 01 [720p].mp4',
+        }
+    }
+    _formats = {
+        '5': {'ext': 'flv'},
+        '6': {'ext': 'flv'},
+        '13': {'ext': '3gp'},
+        '17': {'ext': '3gp'},
+        '18': {'ext': 'mp4'},
+        '22': {'ext': 'mp4'},
+        '34': {'ext': 'flv'},
+        '35': {'ext': 'flv'},
+        '36': {'ext': '3gp'},
+        '37': {'ext': 'mp4'},
+        '38': {'ext': 'mp4'},
+        '43': {'ext': 'webm'},
+        '44': {'ext': 'webm'},
+        '45': {'ext': 'webm'},
+        '46': {'ext': 'webm'},
+        '59': {'ext': 'mp4'}
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(
+            'http://docs.google.com/file/d/'+video_id, video_id, encoding='unicode_escape'
+        )
+        try:
+            title = self._html_search_regex(
+                r'"title","(?P<title>.*?)"',
+                webpage,
+                'title',
+                group='title'
+            )
+            fmt_stream_map = self._html_search_regex(
+                r'"fmt_stream_map","(?P<fmt_stream_map>.*?)"',
+                webpage,
+                'fmt_stream_map',
+                group='fmt_stream_map'
+            )
+            fmt_list = self._html_search_regex(
+                r'"fmt_list","(?P<fmt_list>.*?)"',
+                webpage,
+                'fmt_list',
+                group='fmt_list'
+            )
+#			timestamp = self._html_search_regex(
+#				r'"timestamp","(?P<timestamp>.*?)"',
+#				webpage,
+#				'timestamp',
+#				group='timestamp'
+#			)
+            length_seconds = self._html_search_regex(
+                r'"length_seconds","(?P<length_seconds>.*?)"',
+                webpage,
+                'length_seconds',
+                group='length_seconds'
+            )
+        except RegexNotFoundError:
+            try:
+                reason = self._html_search_regex(
+                    r'"reason","(?P<reason>.*?)"',
+                    webpage,
+                    'reason',
+                    group='reason'
+                )
+                self.report_warning(reason)
+                return
+            except RegexNotFoundError:
+                self.report_warning('not a video')
+                return
+
+        fmt_stream_map = fmt_stream_map.split(',')
+        fmt_list = fmt_list.split(',')
+        formats = []
+        for i in range(len(fmt_stream_map)):
+            fmt_id, fmt_url = fmt_stream_map[i].split('|')
+            resolution = fmt_list[i].split('/')[1]
+            width, height = resolution.split('x')
+            formats.append({
+                'url': fmt_url,
+                'format_id': fmt_id,
+                'resolution': resolution,
+                'width': int(width),
+                'height': int(height),
+                'ext': self._formats[fmt_id]['ext']
+            })
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': title,
+#           'timestamp': int(timestamp),
+            'duration': int(length_seconds),
+            'formats': formats
+        }
-- 
cgit v1.2.3


From f120a7ab5e9c560a8114f9662e2f213243a945b0 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Wed, 24 Jun 2015 14:56:19 +0100
Subject: change the _TEST info

---
 youtube_dl/extractor/googledrive.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/extractor')

diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
index 8c611fa47..e3d5c3418 100644
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -4,11 +4,11 @@ from ..utils import RegexNotFoundError
 class GoogleDriveIE(InfoExtractor):
     _VALID_URL = r'(?:https?://)?(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/))(?P<id>.+?)(?:&|/|$)'
     _TEST = {
-        'url': 'https://drive.google.com/file/d/0BzpExh0WzJF0NlR5WUlxdEVsY0U/edit?pli=1',
+        'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
         'info_dict': {
-            'id': '0BzpExh0WzJF0NlR5WUlxdEVsY0U',
+            'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
             'ext': 'mp4',
-            'title': '[AHSH] Fairy Tail S2 - 01 [720p].mp4',
+            'title': 'Big Buck Bunny.mp4',
         }
     }
     _formats = {
-- 
cgit v1.2.3


From 3e5f3df1729846a33631dd38a887cd1d81a727c1 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Mon, 29 Jun 2015 07:53:21 +0100
Subject: move the embed to a separate class

---
 youtube_dl/extractor/googledrive.py | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/extractor')

diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
index e3d5c3418..ac891b275 100644
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -1,8 +1,37 @@
+import re
+
 from .common import InfoExtractor
 from ..utils import RegexNotFoundError
 
+class GoogleDriveEmbedIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9-]{28})(?:/preview)'
+    _TEST = {
+        'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview',
+        'info_dict': {
+            'id': '0B8KB9DRosYGKMXNoeWxqa3JYclE',
+            'ext': 'mp4',
+            'title': 'Jimmy Fallon Sings Since You\'ve Been Gone.wmv',
+        }
+    }
+
+    @staticmethod
+    def _extract_url(webpage):
+        mobj = re.search(
+            r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9-]{28})(?:/preview)',
+            webpage)
+        if mobj:
+            return 'https://drive.google.com/file/d/%s' % mobj.group('id')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return {
+            '_type': 'url',
+            'ie-key': 'GoogleDrive',
+            'url': 'https://drive.google.com/file/d/%s' % video_id
+        }
+
 class GoogleDriveIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)?(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/))(?P<id>.+?)(?:&|/|$)'
+    _VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9-]{28})'
     _TEST = {
         'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
         'info_dict': {
-- 
cgit v1.2.3


From 2d651a2d02885cddf1752b45497e9113d3a3d403 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Mon, 29 Jun 2015 07:55:44 +0100
Subject: import google drive embed class

---
 youtube_dl/extractor/__init__.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/extractor')

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 6655d7eb5..02e18a0da 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -209,7 +209,10 @@ from .globo import GloboIE
 from .godtube import GodTubeIE
 from .goldenmoustache import GoldenMoustacheIE
 from .golem import GolemIE
-from .googledrive import GoogleDriveIE
+from .googledrive import (
+    GoogleDriveEmbedIE,
+    GoogleDriveIE,
+)
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
-- 
cgit v1.2.3


From 653789afc72d1a225b971541fb633dd768d58942 Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Mon, 29 Jun 2015 08:01:30 +0100
Subject: add google drive embeds

---
 youtube_dl/extractor/generic.py | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'youtube_dl/extractor')

diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 6d2efb22e..3f7b094db 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -48,6 +48,7 @@ from .vimeo import VimeoIE
 from .dailymotion import DailymotionCloudIE
 from .onionstudios import OnionStudiosIE
 from .snagfilms import SnagFilmsEmbedIE
+from .googledrive import GoogleDriveEmbedIE
 
 
 class GenericIE(InfoExtractor):
@@ -1599,6 +1600,11 @@ class GenericIE(InfoExtractor):
         if nbc_sports_url:
             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
 
+        # Look for Google Drive embeds
+        google_drive_url = GoogleDriveEmbedIE._extract_url(webpage)
+        if google_drive_url:
+            return self.url_result(google_drive_url, 'GoogleDrive')
+
         # Look for UDN embeds
         mobj = re.search(
             r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
-- 
cgit v1.2.3


From 3b3d531965f0f36c20f5fa8557481c144170653f Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Fri, 17 Jul 2015 14:17:19 +0100
Subject: fix embed regex

---
 youtube_dl/extractor/googledrive.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/extractor')

diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
index ac891b275..c82c9037f 100644
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -4,7 +4,7 @@ from .common import InfoExtractor
 from ..utils import RegexNotFoundError
 
 class GoogleDriveEmbedIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9-]{28})(?:/preview)'
+    _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
     _TEST = {
         'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview',
         'info_dict': {
@@ -17,7 +17,7 @@ class GoogleDriveEmbedIE(InfoExtractor):
     @staticmethod
     def _extract_url(webpage):
         mobj = re.search(
-            r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9-]{28})(?:/preview)',
+            r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
             webpage)
         if mobj:
             return 'https://drive.google.com/file/d/%s' % mobj.group('id')
@@ -31,7 +31,7 @@ class GoogleDriveEmbedIE(InfoExtractor):
         }
 
 class GoogleDriveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9-]{28})'
+    _VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
     _TEST = {
         'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
         'info_dict': {
-- 
cgit v1.2.3


From d1cc05e17eccccb7ee6473574c6a4f887104baeb Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Fri, 17 Jul 2015 14:37:21 +0100
Subject: remove unnecessary regex group names

---
 youtube_dl/extractor/googledrive.py | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

(limited to 'youtube_dl/extractor')

diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
index c82c9037f..6d9bcfefd 100644
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -62,46 +62,40 @@ class GoogleDriveIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(
-            'http://docs.google.com/file/d/'+video_id, video_id, encoding='unicode_escape'
+            'http://docs.google.com/file/d/' + video_id, video_id, encoding='unicode_escape'
         )
         try:
             title = self._html_search_regex(
-                r'"title","(?P<title>.*?)"',
+                r'"title"\s+,\s+"[^"]+',
                 webpage,
-                'title',
-                group='title'
+                'title'
             )
             fmt_stream_map = self._html_search_regex(
-                r'"fmt_stream_map","(?P<fmt_stream_map>.*?)"',
+                r'"fmt_stream_map"\s+,\s+"[^"]+',
                 webpage,
-                'fmt_stream_map',
-                group='fmt_stream_map'
+                'fmt_stream_map'
             )
             fmt_list = self._html_search_regex(
-                r'"fmt_list","(?P<fmt_list>.*?)"',
+                r'"fmt_list"\s+,\s+"[^"]+',
                 webpage,
-                'fmt_list',
-                group='fmt_list'
+                'fmt_list'
             )
 #			timestamp = self._html_search_regex(
-#				r'"timestamp","(?P<timestamp>.*?)"',
+#				r'"timestamp"\s+,\s+"[^"]+',
 #				webpage,
-#				'timestamp',
-#				group='timestamp'
+#				'timestamp'
 #			)
             length_seconds = self._html_search_regex(
-                r'"length_seconds","(?P<length_seconds>.*?)"',
+                r'"length_seconds"\s+,\s+"[^"]+',
                 webpage,
-                'length_seconds',
-                group='length_seconds'
+                'length_seconds'
             )
         except RegexNotFoundError:
             try:
                 reason = self._html_search_regex(
-                    r'"reason","(?P<reason>.*?)"',
+                    r'"reason","[^"]+',
                     webpage,
-                    'reason',
-                    group='reason'
+                    'reason'
                 )
                 self.report_warning(reason)
                 return
-- 
cgit v1.2.3


From 36dbca87848fc5698d3e0b89380c7bcec741ceaf Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Fri, 17 Jul 2015 14:52:01 +0100
Subject: fix recursive error

---
 youtube_dl/extractor/googledrive.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'youtube_dl/extractor')

diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
index 6d9bcfefd..a3d9b4450 100644
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -26,7 +26,7 @@ class GoogleDriveEmbedIE(InfoExtractor):
         video_id = self._match_id(url)
         return {
             '_type': 'url',
-            'ie-key': 'GoogleDrive',
+            'ie_key': 'GoogleDrive',
             'url': 'https://drive.google.com/file/d/%s' % video_id
         }
 
@@ -66,34 +66,34 @@ class GoogleDriveIE(InfoExtractor):
         )
         try:
             title = self._html_search_regex(
-                r'"title"\s+,\s+"[^"]+',
+                r'"title"\s*,\s*"([^"]+)',
                 webpage,
                 'title'
             )
             fmt_stream_map = self._html_search_regex(
-                r'"fmt_stream_map"\s+,\s+"[^"]+',
+                r'"fmt_stream_map"\s*,\s*"([^"]+)',
                 webpage,
                 'fmt_stream_map'
             )
             fmt_list = self._html_search_regex(
-                r'"fmt_list"\s+,\s+"[^"]+',
+                r'"fmt_list"\s*,\s*"([^"]+)',
                 webpage,
                 'fmt_list'
             )
 #			timestamp = self._html_search_regex(
-#				r'"timestamp"\s+,\s+"[^"]+',
+#				r'"timestamp"\s*,\s*"([^"]+)',
 #				webpage,
 #				'timestamp'
 #			)
             length_seconds = self._html_search_regex(
-                r'"length_seconds"\s+,\s+"[^"]+',
+                r'"length_seconds"\s*,\s*"([^"]+)',
                 webpage,
                 'length_seconds'
             )
         except RegexNotFoundError:
             try:
                 reason = self._html_search_regex(
-                    r'"reason","[^"]+',
+                    r'"reason","([^"]+)',
                     webpage,
                     'reason'
                 )
-- 
cgit v1.2.3


From 8e92d21ebf6f17e14c9e916f22e49f27529556af Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Sat, 18 Jul 2015 23:31:14 +0100
Subject: [googledrive] raise ExtractorError instead of warning

---
 youtube_dl/extractor/googledrive.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/extractor')

diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
index a3d9b4450..7bc7b7a0d 100644
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -1,7 +1,10 @@
 import re
 
 from .common import InfoExtractor
-from ..utils import RegexNotFoundError
+from ..utils import (
+    RegexNotFoundError,
+    ExtractorError,
+)
 
 class GoogleDriveEmbedIE(InfoExtractor):
     _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
@@ -97,10 +100,10 @@ class GoogleDriveIE(InfoExtractor):
                     webpage,
                     'reason'
                 )
-                self.report_warning(reason)
+                raise ExtractorError(reason)
                 return
             except RegexNotFoundError:
-                self.report_warning('not a video')
+                raise ExtractorError('not a video')
                 return
 
         fmt_stream_map = fmt_stream_map.split(',')
-- 
cgit v1.2.3


From 5b251628e9f45c89c1becb3f62c4212874eb74ea Mon Sep 17 00:00:00 2001
From: remitamine <remitamine@gmail.com>
Date: Mon, 21 Dec 2015 03:05:34 +0100
Subject: [googledrive] Modernize

---
 youtube_dl/extractor/__init__.py    |   5 +-
 youtube_dl/extractor/generic.py     |   4 +-
 youtube_dl/extractor/googledrive.py | 144 +++++++++++++-----------------------
 3 files changed, 53 insertions(+), 100 deletions(-)

(limited to 'youtube_dl/extractor')

diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 02e18a0da..6655d7eb5 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -209,10 +209,7 @@ from .globo import GloboIE
 from .godtube import GodTubeIE
 from .goldenmoustache import GoldenMoustacheIE
 from .golem import GolemIE
-from .googledrive import (
-    GoogleDriveEmbedIE,
-    GoogleDriveIE,
-)
+from .googledrive import GoogleDriveIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .gorillavid import GorillaVidIE
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 3f7b094db..abd98e500 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -48,7 +48,7 @@ from .vimeo import VimeoIE
 from .dailymotion import DailymotionCloudIE
 from .onionstudios import OnionStudiosIE
 from .snagfilms import SnagFilmsEmbedIE
-from .googledrive import GoogleDriveEmbedIE
+from .googledrive import GoogleDriveIE
 
 
 class GenericIE(InfoExtractor):
@@ -1601,7 +1601,7 @@ class GenericIE(InfoExtractor):
             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
 
         # Look for Google Drive embeds
-        google_drive_url = GoogleDriveEmbedIE._extract_url(webpage)
+        google_drive_url = GoogleDriveIE._extract_url(webpage)
         if google_drive_url:
             return self.url_result(google_drive_url, 'GoogleDrive')
 
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
index 7bc7b7a0d..f354c9c7a 100644
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -1,132 +1,88 @@
+from __future__ import unicode_literals
+
 import re
 
 from .common import InfoExtractor
 from ..utils import (
-    RegexNotFoundError,
     ExtractorError,
+    int_or_none,
 )
 
-class GoogleDriveEmbedIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
+
+class GoogleDriveIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})'
     _TEST = {
-        'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview',
+        'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
+        'md5': '881f7700aec4f538571fa1e0eed4a7b6',
         'info_dict': {
-            'id': '0B8KB9DRosYGKMXNoeWxqa3JYclE',
+            'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
             'ext': 'mp4',
-            'title': 'Jimmy Fallon Sings Since You\'ve Been Gone.wmv',
+            'title': 'Big Buck Bunny.mp4',
+            'duration': 46,
         }
     }
+    _FORMATS_EXT = {
+        '5': 'flv',
+        '6': 'flv',
+        '13': '3gp',
+        '17': '3gp',
+        '18': 'mp4',
+        '22': 'mp4',
+        '34': 'flv',
+        '35': 'flv',
+        '36': '3gp',
+        '37': 'mp4',
+        '38': 'mp4',
+        '43': 'webm',
+        '44': 'webm',
+        '45': 'webm',
+        '46': 'webm',
+        '59': 'mp4',
+    }
 
     @staticmethod
     def _extract_url(webpage):
         mobj = re.search(
-            r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
+            r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
             webpage)
         if mobj:
             return 'https://drive.google.com/file/d/%s' % mobj.group('id')
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        return {
-            '_type': 'url',
-            'ie_key': 'GoogleDrive',
-            'url': 'https://drive.google.com/file/d/%s' % video_id
-        }
+        webpage = self._download_webpage(
+            'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
 
-class GoogleDriveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
-    _TEST = {
-        'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
-        'info_dict': {
-            'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
-            'ext': 'mp4',
-            'title': 'Big Buck Bunny.mp4',
-        }
-    }
-    _formats = {
-        '5': {'ext': 'flv'},
-        '6': {'ext': 'flv'},
-        '13': {'ext': '3gp'},
-        '17': {'ext': '3gp'},
-        '18': {'ext': 'mp4'},
-        '22': {'ext': 'mp4'},
-        '34': {'ext': 'flv'},
-        '35': {'ext': 'flv'},
-        '36': {'ext': '3gp'},
-        '37': {'ext': 'mp4'},
-        '38': {'ext': 'mp4'},
-        '43': {'ext': 'webm'},
-        '44': {'ext': 'webm'},
-        '45': {'ext': 'webm'},
-        '46': {'ext': 'webm'},
-        '59': {'ext': 'mp4'}
-    }
+        reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
+        if reason:
+            raise ExtractorError(reason)
 
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(
-            'http://docs.google.com/file/d/' + video_id, video_id, encoding='unicode_escape'
-        )
-        try:
-            title = self._html_search_regex(
-                r'"title"\s*,\s*"([^"]+)',
-                webpage,
-                'title'
-            )
-            fmt_stream_map = self._html_search_regex(
-                r'"fmt_stream_map"\s*,\s*"([^"]+)',
-                webpage,
-                'fmt_stream_map'
-            )
-            fmt_list = self._html_search_regex(
-                r'"fmt_list"\s*,\s*"([^"]+)',
-                webpage,
-                'fmt_list'
-            )
-#			timestamp = self._html_search_regex(
-#				r'"timestamp"\s*,\s*"([^"]+)',
-#				webpage,
-#				'timestamp'
-#			)
-            length_seconds = self._html_search_regex(
-                r'"length_seconds"\s*,\s*"([^"]+)',
-                webpage,
-                'length_seconds'
-            )
-        except RegexNotFoundError:
-            try:
-                reason = self._html_search_regex(
-                    r'"reason","([^"]+)',
-                    webpage,
-                    'reason'
-                )
-                raise ExtractorError(reason)
-                return
-            except RegexNotFoundError:
-                raise ExtractorError('not a video')
-                return
+        title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title')
+        duration = int_or_none(self._search_regex(
+            r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None))
+        fmt_stream_map = self._search_regex(
+            r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',')
+        fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',')
 
-        fmt_stream_map = fmt_stream_map.split(',')
-        fmt_list = fmt_list.split(',')
         formats = []
-        for i in range(len(fmt_stream_map)):
-            fmt_id, fmt_url = fmt_stream_map[i].split('|')
-            resolution = fmt_list[i].split('/')[1]
+        for fmt, fmt_stream in zip(fmt_list, fmt_stream_map):
+            fmt_id, fmt_url = fmt_stream.split('|')
+            resolution = fmt.split('/')[1]
             width, height = resolution.split('x')
             formats.append({
                 'url': fmt_url,
                 'format_id': fmt_id,
                 'resolution': resolution,
-                'width': int(width),
-                'height': int(height),
-                'ext': self._formats[fmt_id]['ext']
+                'width': int_or_none(width),
+                'height': int_or_none(height),
+                'ext': self._FORMATS_EXT[fmt_id],
             })
         self._sort_formats(formats)
 
         return {
             'id': video_id,
             'title': title,
-#           'timestamp': int(timestamp),
-            'duration': int(length_seconds),
-            'formats': formats
+            'thumbnail': self._og_search_thumbnail(webpage),
+            'duration': duration,
+            'formats': formats,
         }
-- 
cgit v1.2.3