diff options
| -rw-r--r-- | youtube_dl/extractor/vk.py | 66 | 
1 files changed, 38 insertions, 28 deletions
| diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 634d17d91..cd22df25a 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -1,6 +1,7 @@  # encoding: utf-8  from __future__ import unicode_literals +import collections  import re  import json  import sys @@ -16,7 +17,6 @@ from ..utils import (      get_element_by_class,      int_or_none,      orderedSet, -    parse_duration,      remove_start,      str_to_int,      unescapeHTML, @@ -447,6 +447,9 @@ class VKWallPostIE(VKBaseIE):                  'skip_download': True,              },          }], +        'params': { +            'usenetrc': True, +        },          'skip': 'Requires vk account credentials',      }, {          # single YouTube embed, no leading - @@ -456,6 +459,9 @@ class VKWallPostIE(VKBaseIE):              'title': 'Sergey Gorbunov - Wall post 85155021_6319',          },          'playlist_count': 1, +        'params': { +            'usenetrc': True, +        },          'skip': 'Requires vk account credentials',      }, {          # wall page URL @@ -483,37 +489,41 @@ class VKWallPostIE(VKBaseIE):              raise ExtractorError('VK said: %s' % error, expected=True)          description = clean_html(get_element_by_class('wall_post_text', webpage)) -        uploader = clean_html(get_element_by_class( -            'fw_post_author', webpage)) or self._og_search_description(webpage) +        uploader = clean_html(get_element_by_class('author', webpage))          thumbnail = self._og_search_thumbnail(webpage)          entries = [] -        for audio in re.finditer(r'''(?sx) -                            <input[^>]+ -                                id=(?P<q1>["\'])audio_info(?P<id>\d+_\d+).*?(?P=q1)[^>]+ -                                value=(?P<q2>["\'])(?P<url>http.+?)(?P=q2) -                                .+? -                            </table>''', webpage): -            audio_html = audio.group(0) -            audio_id = audio.group('id') -            duration = parse_duration(get_element_by_class('duration', audio_html)) -            track = self._html_search_regex( -                r'<span[^>]+id=["\']title%s[^>]*>([^<]+)' % audio_id, -                audio_html, 'title', default=None) -            artist = self._html_search_regex( -                r'>([^<]+)</a></b>\s*&ndash', audio_html, -                'artist', default=None) -            entries.append({ -                'id': audio_id, -                'url': audio.group('url'), -                'title': '%s - %s' % (artist, track) if artist and track else audio_id, -                'thumbnail': thumbnail, -                'duration': duration, -                'uploader': uploader, -                'artist': artist, -                'track': track, -            }) +        audio_ids = re.findall(r'data-full-id=["\'](\d+_\d+)', webpage) +        if audio_ids: +            al_audio = self._download_webpage( +                'https://vk.com/al_audio.php', post_id, +                note='Downloading audio info', fatal=False, +                data=urlencode_postdata({ +                    'act': 'reload_audio', +                    'al': '1', +                    'ids': ','.join(audio_ids) +                })) +            if al_audio: +                Audio = collections.namedtuple( +                    'Audio', ['id', 'user_id', 'url', 'track', 'artist', 'duration']) +                audios = self._parse_json( +                    self._search_regex( +                        r'<!json>(.+?)<!>', al_audio, 'audios', default='[]'), +                    post_id, fatal=False, transform_source=unescapeHTML) +                if isinstance(audios, list): +                    for audio in audios: +                        a = Audio._make(audio[:6]) +                        entries.append({ +                            'id': '%s_%s' % (a.user_id, a.id), +                            'url': a.url, +                            'title': '%s - %s' % (a.artist, a.track) if a.artist and a.track else a.id, +                            'thumbnail': thumbnail, +                            'duration': a.duration, +                            'uploader': uploader, +                            'artist': a.artist, +                            'track': a.track, +                        })          for video in re.finditer(                  r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage): | 
