aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-07-18 13:12:27 +0200
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-07-18 13:12:27 +0200
commit3f4021770488cd5f11d5d640769407475895efaf (patch)
tree13b3f645b758adf126ff7177f321c347543cb87c
parentf631c3311a869ea15452d47fa3c76591343ef94e (diff)
downloadyoutube-dl-3f4021770488cd5f11d5d640769407475895efaf.tar.xz
InstagramIE: fix the extraction of the uploader_id and the title
The page title is now 'Instagram', so we build it. Also extract the description
-rw-r--r--youtube_dl/extractor/instagram.py22
1 files changed, 10 insertions, 12 deletions
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index f9ac8d5b4..ddc42882a 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -10,7 +10,8 @@ class InstagramIE(InfoExtractor):
u'md5': u'0d2da106a9d2631273e192b372806516',
u'info_dict': {
u"uploader_id": u"naomipq",
- u"title": u"Video by naomipq"
+ u"title": u"Video by naomipq",
+ u'description': u'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
}
}
@@ -18,20 +19,17 @@ class InstagramIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
webpage = self._download_webpage(url, video_id)
- html_title = self._html_search_regex(
- r'<title>(.+?)</title>',
- webpage, u'title', flags=re.DOTALL)
- title = re.sub(u'(?: *\(Videos?\))? \u2022 Instagram$', '', html_title).strip()
- uploader_id = self._html_search_regex(
- r'<div class="media-user" id="media_user">.*?<h2><a href="[^"]*">([^<]*)</a></h2>',
- webpage, u'uploader id', fatal=False, flags=re.DOTALL)
- ext = 'mp4'
+ uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
+ webpage, u'uploader id', fatal=False)
+ desc = self._search_regex(r'"caption":"(.*?)"', webpage, u'description',
+ fatal=False)
return [{
'id': video_id,
'url': self._og_search_video_url(webpage),
- 'ext': ext,
- 'title': title,
+ 'ext': 'mp4',
+ 'title': u'Video by %s' % uploader_id,
'thumbnail': self._og_search_thumbnail(webpage),
- 'uploader_id' : uploader_id
+ 'uploader_id' : uploader_id,
+ 'description': desc,
}]