diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2015-02-10 01:39:43 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2015-02-10 01:40:55 +0100 | 
| commit | c73fae1e2e4421df664aefd1d14a72596caf9e2f (patch) | |
| tree | 6708f99a4ef673afd50819d2985ebf2f8341e097 | |
| parent | 834bf069d20745263586284c98196b5b0605b916 (diff) | |
[commonmistakes] Detect BOMs at the beginning of URLs
Reported at https://bugzilla.redhat.com/show_bug.cgi?id=1093517 .
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/commonmistakes.py | 17 | 
2 files changed, 18 insertions, 1 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 55ca0d6e4..fb1e7f325 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -74,7 +74,7 @@ from .collegehumor import CollegeHumorIE  from .collegerama import CollegeRamaIE  from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE  from .comcarcoff import ComCarCoffIE -from .commonmistakes import CommonMistakesIE +from .commonmistakes import CommonMistakesIE, UnicodeBOMIE  from .condenast import CondeNastIE  from .cracked import CrackedIE  from .criterion import CriterionIE diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py index dbbf27a74..2f86e2381 100644 --- a/youtube_dl/extractor/commonmistakes.py +++ b/youtube_dl/extractor/commonmistakes.py @@ -27,3 +27,20 @@ class CommonMistakesIE(InfoExtractor):          if not self._downloader.params.get('verbose'):              msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'          raise ExtractorError(msg, expected=True) + + +class UnicodeBOMIE(InfoExtractor): +        IE_DESC = False +        _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$' + +        _TESTS = [{ +            'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', +            'only_matching': True, +        }] + +        def _real_extract(self, url): +            real_url = self._match_id(url) +            self.report_warning( +                'Your URL starts with a Byte Order Mark (BOM). ' +                'Removing the BOM and looking for "%s" ...' % real_url) +            return self.url_result(real_url)  | 
