diff options
| author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-09-06 10:41:46 +0200 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-09-06 10:43:02 +0200 | 
| commit | fd9cf738363e180bb3abf0829e1f322e12c68b21 (patch) | |
| tree | 27692062686ea8a56fae7f5da19320aac928669f | |
| parent | 0638ad9999e7c374b253d0e13f4e3a20ef0b1171 (diff) | |
[youtube] Users: download from the api in json to simplify extraction (fixes #1358)
There could be duplicate videos or other videos if the description have links.
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 15 | 
1 files changed, 8 insertions, 7 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index cd5165c94..98a44f333 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1018,8 +1018,7 @@ class YoutubeUserIE(InfoExtractor):      _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)([A-Za-z0-9_-]+)'      _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'      _GDATA_PAGE_SIZE = 50 -    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' -    _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]' +    _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'      IE_NAME = u'youtube:user'      def suitable(cls, url): @@ -1048,13 +1047,15 @@ class YoutubeUserIE(InfoExtractor):              page = self._download_webpage(gdata_url, username,                                            u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE)) +            try: +                response = json.loads(page) +            except ValueError as err: +                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) +              # Extract video identifiers              ids_in_page = [] - -            for mobj in re.finditer(self._VIDEO_INDICATOR, page): -                if mobj.group(1) not in ids_in_page: -                    ids_in_page.append(mobj.group(1)) - +            for entry in response['feed']['entry']: +                ids_in_page.append(entry['id']['$t'].split('/')[-1])              video_ids.extend(ids_in_page)              # A little optimization - if current page is not | 
