diff options
| author | Filippo Valsorda <filippo.valsorda@gmail.com> | 2013-06-09 14:21:42 +0200 | 
|---|---|---|
| committer | Filippo Valsorda <filippo.valsorda@gmail.com> | 2013-06-09 14:21:42 +0200 | 
| commit | 78d3442b1209d3858cfea1f7ca958f661784b5ab (patch) | |
| tree | 7b9a5ff20a2f9a6448a6795698b2117b336a8549 | |
| parent | 979a9dd4c4d46e0f2b11bc4bcac51ad8d446d186 (diff) | |
test: extend the reach of info_dict checking
* print the info_dict in a format suitable to easy adding to tests.json during tests if un-tested fields are detected
* make it possible to put the crc32 in tests.json if the field is too long
* complete the "info_dict" fields in existing tests
* fixed the bugs catched doing this
| -rw-r--r-- | test/test_download.py | 21 | ||||
| -rw-r--r-- | test/tests.json | 185 | ||||
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 17 | 
3 files changed, 177 insertions, 46 deletions
| diff --git a/test/test_download.py b/test/test_download.py index 565b1ebc5..862152033 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -7,8 +7,8 @@ import os  import json  import unittest  import sys -import hashlib  import socket +import binascii  # Allow direct execution  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -38,6 +38,9 @@ def _try_rm(filename):          if ose.errno != errno.ENOENT:              raise +def crc32(value): +    return '%08x' % (binascii.crc32(value.encode('utf8')) & 0xffffffff) +  class FileDownloader(youtube_dl.FileDownloader):      def __init__(self, *args, **kwargs):          self.to_stderr = self.to_screen @@ -124,7 +127,21 @@ def generator(test_case):                  with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:                      info_dict = json.load(infof)                  for (info_field, value) in tc.get('info_dict', {}).items(): -                    self.assertEqual(value, info_dict.get(info_field)) +                    if isinstance(value, compat_str) and value.startswith('crc32:'): +                        self.assertEqual(value, 'crc32:' + crc32(info_dict.get(info_field))) +                    else: +                        self.assertEqual(value, info_dict.get(info_field)) + +                # If checkable fields are missing from the test case, print the info_dict +                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'crc32:' + crc32(value)) +                    for key, value in info_dict.items() +                    if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location')) +                if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()): +                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n') + +                # Check for the presence of mandatory fields +                for key in ('id', 'url', 'title', 'ext'): +                    self.assertTrue(key in info_dict.keys() and info_dict[key])          finally:              for tc in test_cases:                  _try_rm(tc['file']) diff --git a/test/tests.json b/test/tests.json index 82da27d5b..e9abb0950 100644 --- a/test/tests.json +++ b/test/tests.json @@ -15,43 +15,76 @@      "name": "Dailymotion",      "md5":  "392c4b85a60a90dc4792da41ce3144eb",      "url":  "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech", -    "file":  "x33vw9.mp4" +    "file":  "x33vw9.mp4", +    "info_dict": { +      "uploader": "Alex and Van .", +      "title": "Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" +    }    },    {      "name": "Metacafe",      "add_ie": ["Youtube"],      "url":  "http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", -    "file":  "_aUehQsCQtM.flv" +    "file":  "_aUehQsCQtM.flv", +    "info_dict": { +      "upload_date": "20090102", +      "title": "The Electric Company | \"Short I\" | PBS KIDS GO!", +      "description": "crc32:5ef3bc57", +      "uploader": "PBS", +      "uploader_id": "PBS" +    }    },    {      "name": "BlipTV",      "md5":  "b2d849efcf7ee18917e4b4d9ff37cafe",      "url":  "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352", -    "file":  "5779306.m4v" +    "file":  "5779306.m4v", +    "info_dict": { +      "upload_date": "20111205", +      "description": "crc32:fa658d49", +      "uploader": "Comic Book Resources - CBR TV", +      "title": "CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3" +    }    },    {      "name": "XVideos",      "md5":  "1d0c835822f0a71a7bf011855db929d0",      "url":  "http://www.xvideos.com/video939581/funny_porns_by_s_-1", -    "file":  "939581.flv" +    "file":  "939581.flv", +    "info_dict": { +      "title": "Funny Porns By >>>>S<<<<<< -1" +    }    },    {      "name": "YouPorn",      "md5": "c37ddbaaa39058c76a7e86c6813423c1",      "url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/", -    "file": "505835.mp4" +    "file": "505835.mp4", +    "info_dict": { +      "upload_date": "20101221", +      "description": "Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", +      "uploader": "Ask Dan And Jennifer", +      "title": "Sex Ed: Is It Safe To Masturbate Daily?" +    }    },    {      "name": "Pornotube",      "md5": "374dd6dcedd24234453b295209aa69b6",      "url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing", -    "file": "1689755.flv" +    "file": "1689755.flv", +    "info_dict": { +      "upload_date": "20090708", +      "title": "Marilyn-Monroe-Bathing" +    }    },    {      "name": "YouJizz",      "md5": "07e15fa469ba384c7693fd246905547c",      "url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html", -    "file": "2189178.flv" +    "file": "2189178.flv", +    "info_dict": { +      "title": "Zeichentrick 1" +    }    },    {      "name": "Vimeo", @@ -70,61 +103,103 @@      "name": "Soundcloud",      "md5":  "ebef0a451b909710ed1d7787dddbf0d7",      "url":  "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy", -    "file":  "62986583.mp3" +    "file":  "62986583.mp3", +    "info_dict": { +      "upload_date": "20121011", +      "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", +      "uploader": "E.T. ExTerrestrial Music", +      "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" +    }    },    {      "name": "StanfordOpenClassroom",      "md5":  "544a9468546059d4e80d76265b0443b8",      "url":  "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100", -    "file":  "PracticalUnix_intro-environment.mp4" +    "file":  "PracticalUnix_intro-environment.mp4", +    "info_dict": { +      "title": "Intro Environment" +    }    },    {      "name": "XNXX",      "md5":  "0831677e2b4761795f68d417e0b7b445",      "url":  "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_", -    "file":  "1135332.flv" +    "file":  "1135332.flv", +    "info_dict": { +      "title": "lida » Naked Funny Actress  (5)" +    }    },    {      "name": "Youku",      "url": "http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html",      "file": "XNDgyMDQ2NTQw_part00.flv",      "md5": "ffe3f2e435663dc2d1eea34faeff5b5b", -    "params": { "test": false } +    "params": { "test": false }, +    "info_dict": { +      "title": "youtube-dl test video \"'/\\ä↭𝕐" +    }    },    {      "name": "NBA",      "url": "http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html",      "file": "0021200253-okc-bkn-recap.nba.mp4", -    "md5": "c0edcfc37607344e2ff8f13c378c88a4" +    "md5": "c0edcfc37607344e2ff8f13c378c88a4", +    "info_dict": { +      "description": "Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.", +      "title": "Thunder vs. Nets" +    }    },    {      "name": "JustinTV",      "url": "http://www.twitch.tv/thegamedevhub/b/296128360",      "file": "296128360.flv", -    "md5": "ecaa8a790c22a40770901460af191c9a" +    "md5": "ecaa8a790c22a40770901460af191c9a", +    "info_dict": { +      "upload_date": "20110927", +      "uploader_id": 25114803, +      "uploader": "thegamedevhub", +      "title": "Beginner Series - Scripting With Python Pt.1" +    }    },    {      "name": "MyVideo",      "url": "http://www.myvideo.de/watch/8229274/bowling_fail_or_win",      "file": "8229274.flv", -    "md5": "2d2753e8130479ba2cb7e0a37002053e" +    "md5": "2d2753e8130479ba2cb7e0a37002053e", +    "info_dict": { +      "title": "bowling-fail-or-win" +    }    },    {      "name": "Escapist",      "url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate",      "file": "6618-Breaking-Down-Baldurs-Gate.mp4", -    "md5": "c6793dbda81388f4264c1ba18684a74d" +    "md5": "c6793dbda81388f4264c1ba18684a74d", +    "info_dict": { +      "description": "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", +      "uploader": "the-escapist-presents", +      "title": "Breaking Down Baldur's Gate" +    }    },    {      "name": "GooglePlus",      "url": "https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH", -    "file": "ZButuJc6CtH.flv" +    "file": "ZButuJc6CtH.flv", +    "info_dict": { +      "upload_date": "20120613", +      "uploader": "井上ヨシマサ", +      "title": "嘆きの天使 降臨" +    }    },    {      "name": "FunnyOrDie",      "url": "http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version",      "file": "0732f586d7.mp4", -    "md5": "f647e9e90064b53b6e046e75d0241fbd" +    "md5": "f647e9e90064b53b6e046e75d0241fbd", +    "info_dict": { +      "description": "Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.", +      "title": "Heart-Shaped Box: Literal Video Version" +    }    },    {      "name": "Steam", @@ -161,6 +236,7 @@      "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things",      "file": "12-jan-pythonthings.mp4",      "info_dict": { +      "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.",        "title": "A Few of My Favorite [Python] Things"      },      "params": { @@ -173,7 +249,10 @@      "file": "422212.mp4",      "md5": "4e2f5cb088a83cd8cdb7756132f9739d",      "info_dict": { -        "title": "thedailyshow-kristen-stewart part 1" +      "upload_date": "20121214", +      "description": "Kristen Stewart", +      "uploader": "thedailyshow", +      "title": "thedailyshow-kristen-stewart part 1"      }    },    { @@ -224,42 +303,48 @@          "file": "11885679.m4a",          "md5": "d30b5b5f74217410f4689605c35d1fd7",          "info_dict": { -          "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad" +          "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad", +          "uploader_id": "ytdl"          }        },        {          "file": "11885680.m4a",          "md5": "4eb0a669317cd725f6bbd336a29f923a",          "info_dict": { -          "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad" +          "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad", +          "uploader_id": "ytdl"          }        },        {          "file": "11885682.m4a",          "md5": "1893e872e263a2705558d1d319ad19e8",          "info_dict": { -          "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad" +          "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad", +          "uploader_id": "ytdl"          }        },        {          "file": "11885683.m4a",          "md5": "b673c46f47a216ab1741ae8836af5899",          "info_dict": { -          "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad" +          "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad", +          "uploader_id": "ytdl"          }        },        {          "file": "11885684.m4a",          "md5": "1d74534e95df54986da7f5abf7d842b7",          "info_dict": { -          "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad" +          "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad", +          "uploader_id": "ytdl"          }        },        {          "file": "11885685.m4a",          "md5": "f081f47af8f6ae782ed131d38b9cd1c0",          "info_dict": { -          "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad" +          "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad", +          "uploader_id": "ytdl"          }        }      ] @@ -270,9 +355,9 @@      "file": "NODfbab.mp4",      "md5": "9b0636f8c0f7614afa4ea5e4c6e57e83",      "info_dict": { +      "uploader": "ytdl",        "title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."      } -    },    {      "name": "TED", @@ -290,14 +375,19 @@      "file": "11741.mp4",      "md5": "0b49f4844a068f8b33f4b7c88405862b",      "info_dict": { -        "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" +      "description": "Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?", +      "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"      }    },    {      "name": "Generic",      "url": "http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html",      "file": "13601338388002.mp4", -    "md5": "85b90ccc9d73b4acd9138d3af4c27f89" +    "md5": "85b90ccc9d73b4acd9138d3af4c27f89", +    "info_dict": { +      "uploader": "www.hodiho.fr", +      "title": "Régis plante sa Jeep" +    }    },    {      "name": "Spiegel", @@ -355,42 +445,59 @@          "file":"30510138.mp3",          "md5":"f9136bf103901728f29e419d2c70f55d",          "info_dict": { -          "title":"D-D-Dance" +          "upload_date": "20111213", +          "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", +          "uploader": "The Royal Concept", +          "title": "D-D-Dance"          }        },        {          "file":"47127625.mp3",          "md5":"09b6758a018470570f8fd423c9453dd8",          "info_dict": { -          "title":"The Royal Concept - Gimme Twice" +          "upload_date": "20120521", +          "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", +          "uploader": "The Royal Concept", +          "title": "The Royal Concept - Gimme Twice"          }        },        {          "file":"47127627.mp3",          "md5":"154abd4e418cea19c3b901f1e1306d9c",          "info_dict": { -          "title":"Goldrushed" +          "upload_date": "20120521", +          "uploader": "The Royal Concept", +          "title": "Goldrushed"          }        },        {          "file":"47127629.mp3",          "md5":"2f5471edc79ad3f33a683153e96a79c1",          "info_dict": { -          "title":"In the End" +          "upload_date": "20120521", +          "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", +          "uploader": "The Royal Concept", +          "title": "In the End"          }        },        {          "file":"47127631.mp3",          "md5":"f9ba87aa940af7213f98949254f1c6e2",          "info_dict": { -          "title":"Knocked Up" +          "upload_date": "20120521", +          "description": "The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com", +          "uploader": "The Royal Concept", +          "title": "Knocked Up"          }        },        {          "file":"75206121.mp3",          "md5":"f9d1fe9406717e302980c30de4af9353",          "info_dict": { -          "title":"World On Fire" +          "upload_date": "20130116", +          "description": "The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central).  \r\nAs a gift to our fans we would like to offer you a free download of the track!  ", +          "uploader": "The Royal Concept", +          "title": "World On Fire"          }        }      ] @@ -419,8 +526,10 @@      "url": "http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0",      "file": "zpsc0c3b9fa.mp4",      "md5": "7dabfb92b0a31f6c16cebc0f8e60ff99", -    "info_dict":{ -      "title":"Tired of Link Building? Try BacklinkMyDomain.com!" +    "info_dict": { +      "upload_date": "20130504", +      "uploader": "rachaneronas", +      "title": "Tired of Link Building? Try BacklinkMyDomain.com!"      }    },    { @@ -488,8 +597,10 @@      "url": "http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html",      "file": "1509445.flv",      "md5": "9f48e0e8d58e3076bb236ff412ab62fa", -    "info_dict":{ -      "title":"FemaleAgent Shy beauty takes the bait" +    "info_dict": { +      "upload_date": "20121014", +      "uploader_id": "Ruseful2011", +      "title": "FemaleAgent Shy beauty takes the bait"      }    },    { diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 6060a5988..24e9c4cc7 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -2377,8 +2377,8 @@ class EscapistIE(InfoExtractor):          showName = mobj.group('showname')          videoId = mobj.group('episode') -        self.report_extraction(showName) -        webpage = self._download_webpage(url, showName) +        self.report_extraction(videoId) +        webpage = self._download_webpage(url, videoId)          videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',              webpage, u'description', fatal=False) @@ -2389,10 +2389,13 @@ class EscapistIE(InfoExtractor):          playerUrl = self._html_search_regex('<meta property="og:video" content="([^"]*)"',              webpage, u'player url') +        title = self._html_search_regex('<meta name="title" content="([^"]*)"', +            webpage, u'player url').split(' : ')[-1] +          configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')          configUrl = compat_urllib_parse.unquote(configUrl) -        configJSON = self._download_webpage(configUrl, showName, +        configJSON = self._download_webpage(configUrl, videoId,                                              u'Downloading configuration',                                              u'unable to download configuration') @@ -2412,7 +2415,7 @@ class EscapistIE(InfoExtractor):              'url': videoUrl,              'uploader': showName,              'upload_date': None, -            'title': showName, +            'title': title,              'ext': 'mp4',              'thumbnail': imgUrl,              'description': videoDesc, @@ -3581,14 +3584,14 @@ class YouPornIE(InfoExtractor):              size = format[0]              bitrate = format[1]              format = "-".join( format ) -            title = u'%s-%s-%s' % (video_title, size, bitrate) +            # title = u'%s-%s-%s' % (video_title, size, bitrate)              formats.append({                  'id': video_id,                  'url': video_url,                  'uploader': video_uploader,                  'upload_date': upload_date, -                'title': title, +                'title': video_title,                  'ext': extension,                  'format': format,                  'thumbnail': thumbnail, @@ -4328,7 +4331,7 @@ class XHamsterIE(InfoExtractor):              video_upload_date = None              self._downloader.report_warning(u'Unable to extract upload date') -        video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^>]+)', +        video_uploader_id = self._html_search_regex(r'<a href=\'/user/[^>]+>(?P<uploader_id>[^<]+)',              webpage, u'uploader id', default=u'anonymous')          video_thumbnail = self._search_regex(r'\'image\':\'(?P<thumbnail>[^\']+)\'', | 
