diff options
| -rw-r--r-- | test/test_utils.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/airmozilla.py | 47 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 5 | 
3 files changed, 30 insertions, 23 deletions
| diff --git a/test/test_utils.py b/test/test_utils.py index 2f8996d7b..3fba8ae11 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -246,6 +246,7 @@ class TestUtil(unittest.TestCase):          self.assertEqual(parse_duration('2.5 hours'), 9000)          self.assertEqual(parse_duration('02:03:04'), 7384)          self.assertEqual(parse_duration('01:02:03:04'), 93784) +        self.assertEqual(parse_duration('1 hour 3 minutes'), 3780)      def test_fix_xml_ampersands(self):          self.assertEqual( diff --git a/youtube_dl/extractor/airmozilla.py b/youtube_dl/extractor/airmozilla.py index 44c20f886..611ad1e9d 100644 --- a/youtube_dl/extractor/airmozilla.py +++ b/youtube_dl/extractor/airmozilla.py @@ -4,7 +4,11 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import parse_iso8601 +from ..utils import ( +    int_or_none, +    parse_duration, +    parse_iso8601, +)  class AirMozillaIE(InfoExtractor): @@ -27,13 +31,6 @@ class AirMozillaIE(InfoExtractor):          }      } -    _QUALITY_MAP = { -        '360p': 0, -        '576p': 1, -        '640p': 2, -        '720p': 3, -    } -      def _real_extract(self, url):          display_id = self._match_id(url)          webpage = self._download_webpage(url, display_id) @@ -43,19 +40,23 @@ class AirMozillaIE(InfoExtractor):          jwconfig = self._search_regex(r'\svar jwconfig = (\{.*?\});\s', embed_script, 'metadata')          metadata = self._parse_json(jwconfig, video_id) -        formats = [] -        for source in metadata['playlist'][0]['sources']: -            fmt = { -                'url': source['file'], -                'ext': source['type'], -                'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'), -                'resolution': source['label'], -                'quality': self._QUALITY_MAP.get(source['label'], -1), -            } -            formats.append(fmt) +        formats = [{ +            'url': source['file'], +            'ext': source['type'], +            'format_id': self._search_regex(r'&format=(.*)$', source['file'], 'video format'), +            'format': source['label'], +            'height': int(source['label'].rstrip('p')), +        } for source in metadata['playlist'][0]['sources']]          self._sort_formats(formats) -        duration_match = re.search(r'Duration:(?: (?P<H>\d+) hours?)?(?: (?P<M>\d+) minutes?)?', webpage) +        view_count = int_or_none(self._html_search_regex( +            r'Views since archived: ([0-9]+)', +            webpage, 'view count', fatal=False)) +        timestamp = parse_iso8601(self._html_search_regex( +            r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False)) +        duration = parse_duration(self._search_regex( +            r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)', +            webpage, 'duration', fatal=False))          return {              'id': video_id, @@ -63,11 +64,11 @@ class AirMozillaIE(InfoExtractor):              'formats': formats,              'url': self._og_search_url(webpage),              'display_id': display_id, -            'thumbnail': metadata['playlist'][0]['image'], +            'thumbnail': metadata['playlist'][0].get('image'),              'description': self._og_search_description(webpage), -            'timestamp': parse_iso8601(self._html_search_regex(r'<time datetime="(.*?)"', webpage, 'timestamp')), +            'timestamp': timestamp,              'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None), -            'duration': int(duration_match.groupdict()['H'] or 0) * 3600 + int(duration_match.groupdict()['M'] or 0) * 60, -            'view_count': int(self._html_search_regex(r'Views since archived: ([0-9]+)', webpage, 'view count')), +            'duration': duration, +            'view_count': view_count,              'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),          } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 506c896de..1f3bfef7d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1290,6 +1290,7 @@ def parse_duration(s):              (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|              (?P<only_hours>[0-9.]+)\s*(?:hours?)| +            \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*|              (?:                  (?:                      (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)? @@ -1308,10 +1309,14 @@ def parse_duration(s):          return float_or_none(m.group('only_hours'), invscale=60 * 60)      if m.group('secs'):          res += int(m.group('secs')) +    if m.group('mins_reversed'): +        res += int(m.group('mins_reversed')) * 60      if m.group('mins'):          res += int(m.group('mins')) * 60      if m.group('hours'):          res += int(m.group('hours')) * 60 * 60 +    if m.group('hours_reversed'): +        res += int(m.group('hours_reversed')) * 60 * 60      if m.group('days'):          res += int(m.group('days')) * 24 * 60 * 60      if m.group('ms'): | 
