diff options
| -rw-r--r-- | test/test_write_annotations.py | 82 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 17 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 10 | 
4 files changed, 113 insertions, 0 deletions
| diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py new file mode 100644 index 000000000..ba7a9f50a --- /dev/null +++ b/test/test_write_annotations.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# coding: utf-8 + +import xml.etree.ElementTree +import os +import sys +import unittest + +# Allow direct execution +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import youtube_dl.YoutubeDL +import youtube_dl.extractor +from youtube_dl.utils import * +from .helper import try_rm + +PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") + +# General configuration (from __init__, not very elegant...) +jar = compat_cookiejar.CookieJar() +cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) +proxy_handler = compat_urllib_request.ProxyHandler() +opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) +compat_urllib_request.install_opener(opener) + +class YoutubeDL(youtube_dl.YoutubeDL): +    def __init__(self, *args, **kwargs): +        super(YoutubeDL, self).__init__(*args, **kwargs) +        self.to_stderr = self.to_screen + +with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: +    params = json.load(pf) +params['writeannotations'] = True +params['skip_download'] = True +params['writeinfojson'] = False +params['format'] = 'flv' + +TEST_ID = 'gr51aVj-mLg' +ANNOTATIONS_FILE = TEST_ID + '.flv.annotations.xml' +EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label'] + +class TestAnnotations(unittest.TestCase): +    def setUp(self): +        # Clear old files +        self.tearDown() + + +    def test_info_json(self): +        expected = list(EXPECTED_ANNOTATIONS) #Two annotations could have the same text. +        ie = youtube_dl.extractor.YoutubeIE() +        ydl = YoutubeDL(params) +        ydl.add_info_extractor(ie) +        ydl.download([TEST_ID]) +        self.assertTrue(os.path.exists(ANNOTATIONS_FILE)) +        annoxml = None +        with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof: +                annoxml = xml.etree.ElementTree.parse(annof) +        self.assertTrue(annoxml is not None, 'Failed to parse annotations XML') +        root = annoxml.getroot() +        self.assertEqual(root.tag, 'document') +        annotationsTag = root.find('annotations') +        self.assertEqual(annotationsTag.tag, 'annotations') +        annotations = annotationsTag.findall('annotation') + +        #Not all the annotations have TEXT children and the annotations are returned unsorted. +        for a in annotations: +                self.assertEqual(a.tag, 'annotation') +                if a.get('type') == 'text': +                        textTag = a.find('TEXT') +                        text = textTag.text +                        self.assertTrue(text in expected) #assertIn only added in python 2.7 +                        #remove the first occurance, there could be more than one annotation with the same text +                        expected.remove(text) +        #We should have seen (and removed) all the expected annotation texts. +        self.assertEqual(len(expected), 0, 'Not all expected annotations were found.') +         + +    def tearDown(self): +        try_rm(ANNOTATIONS_FILE) + +if __name__ == '__main__': +    unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e85e03fa4..c8054544a 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -71,6 +71,7 @@ class YoutubeDL(object):      logtostderr:       Log messages to stderr instead of stdout.      writedescription:  Write the video description to a .description file      writeinfojson:     Write the video description to a .info.json file +    writeannotations:  Write the video annotations to a .annotations.xml file      writethumbnail:    Write the thumbnail image to a file      writesubtitles:    Write the video subtitles to a file      writeautomaticsub: Write the automatic subtitles to a file @@ -258,6 +259,10 @@ class YoutubeDL(object):          """ Report that the metadata file has been written """          self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) +    def report_writeannotations(self, annofn): +        """ Report that the annotations file has been written. """ +        self.to_screen(u'[info] Writing video annotations to: ' + annofn) +      def report_file_already_downloaded(self, file_name):          """Report file has already been fully downloaded."""          try: @@ -522,6 +527,18 @@ class YoutubeDL(object):                  self.report_error(u'Cannot write description file ' + descfn)                  return +        if self.params.get('writeannotations', False): +            try: +               annofn = filename + u'.annotations.xml' +               self.report_writeannotations(annofn) +               with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: +                   annofile.write(info_dict['annotations']) +            except (KeyError, TypeError): +                self.report_warning(u'There are no annotations to write.') +            except (OSError, IOError): +                 self.report_error(u'Cannot write annotations file: ' + annofn) +                 return +          subtitles_are_requested = any([self.params.get('writesubtitles', False),                                         self.params.get('writeautomaticsub')]) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 3513d719f..fb1270ea2 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -339,6 +339,9 @@ def parseOpts(overrideArguments=None):      filesystem.add_option('--write-info-json',              action='store_true', dest='writeinfojson',              help='write video metadata to a .info.json file', default=False) +    filesystem.add_option('--write-annotations', +            action='store_true', dest='writeannotations', +            help='write video annotations to a .annotation file', default=False)      filesystem.add_option('--write-thumbnail',              action='store_true', dest='writethumbnail',              help='write thumbnail image to disk', default=False) @@ -601,6 +604,7 @@ def _real_main(argv=None):          'nopart': opts.nopart,          'updatetime': opts.updatetime,          'writedescription': opts.writedescription, +        'writeannotations': opts.writeannotations,          'writeinfojson': opts.writeinfojson,          'writethumbnail': opts.writethumbnail,          'writesubtitles': opts.writesubtitles, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8222a880f..4347651d7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1250,6 +1250,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              url_map[itag] = format_url          return url_map +    def _extract_annotations(self, video_id): +        url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id +        return self._download_webpage(url, video_id, note=u'Searching for annotations.', errnote=u'Unable to download video annotations.') +      def _real_extract(self, url):          # Extract original video URL from URL with redirection, like age verification, using next_url parameter          mobj = re.search(self._NEXT_URL_RE, url) @@ -1382,6 +1386,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          else:              video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]) +        # annotations +        video_annotations = None +        if self._downloader.params.get('writeannotations', False): +                video_annotations = self._extract_annotations(video_id) +          # Decide which formats to download          try: @@ -1495,6 +1504,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'subtitles':    video_subtitles,                  'duration':     video_duration,                  'age_limit':    18 if age_gate else 0, +                'annotations':  video_annotations              })          return results | 
