aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/ISSUE_TEMPLATE.md6
-rw-r--r--CONTRIBUTING.md2
-rw-r--r--ChangeLog23
-rw-r--r--README.md10
-rw-r--r--devscripts/prepare_manpage.py26
-rw-r--r--docs/supportedsites.md3
-rw-r--r--youtube_dl/extractor/ctsnews.py49
-rw-r--r--youtube_dl/extractor/extractors.py1
-rw-r--r--youtube_dl/extractor/uol.py128
-rw-r--r--youtube_dl/utils.py1
-rw-r--r--youtube_dl/version.py2
11 files changed, 205 insertions, 46 deletions
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 2319e45df..1c06ba36e 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@
---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.07**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.10**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.08.07
+[debug] youtube-dl version 2016.08.10
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index fbf0ab7e8..95392030e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -46,7 +46,7 @@ Make sure that someone has not already opened the issue you're trying to open. S
### Why are existing options not enough?
-Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
+Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
### Is there enough context in your bug report?
diff --git a/ChangeLog b/ChangeLog
index 657ff3e48..adbdc4f9b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,8 +1,29 @@
version <unreleased>
+Core
++ Recognize more formats in unified_timestamp
+
Extractors
-* [kuwo:singer] Fix extraction
+* [ctsnews] Fix extraction
+
+
+version 2016.08.10
+
+Core
+* Make --metadata-from-title non fatal when title does not match the pattern
+* Introduce options for randomized sleep before each download
+ --min-sleep-interval and --max-sleep-interval (#9930)
+* Respect default in _search_json_ld
+
+Extractors
++ [uol] Add extractor for uol.com.br (#4263)
+* [rbmaradio] Fix extraction and extract all formats (#10242)
++ [sonyliv] Add extractor for sonyliv.com (#10258)
* [aparat] Fix extraction
+* [cwtv] Extract HTTP formats
++ [rozhlas] Add extractor for prehravac.rozhlas.cz (#10253)
+* [kuwo:singer] Fix extraction
+
version 2016.08.07
diff --git a/README.md b/README.md
index b42d5c730..cabbbef76 100644
--- a/README.md
+++ b/README.md
@@ -330,7 +330,15 @@ which means you can modify it, redistribute it or use it however you like.
bidirectional text support. Requires bidiv
or fribidi executable in PATH
--sleep-interval SECONDS Number of seconds to sleep before each
- download.
+ download when used alone or a lower bound
+ of a range for randomized sleep before each
+ download (minimum possible number of
+ seconds to sleep) when used along with
+ --max-sleep-interval.
+ --max-sleep-interval SECONDS Upper bound of a range for randomized sleep
+ before each download (maximum possible
+ number of seconds to sleep). Must only be
+ used along with --min-sleep-interval.
## Video Format Options:
-f, --format FORMAT Video format code, see the "FORMAT
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index e3f6339b5..ce548739f 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -54,17 +54,21 @@ def filter_options(readme):
if in_options:
if line.lstrip().startswith('-'):
- option, description = re.split(r'\s{2,}', line.lstrip())
- split_option = option.split(' ')
-
- if not split_option[-1].startswith('-'): # metavar
- option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
-
- # Pandoc's definition_lists. See http://pandoc.org/README.html
- # for more information.
- ret += '\n%s\n: %s\n' % (option, description)
- else:
- ret += line.lstrip() + '\n'
+ split = re.split(r'\s{2,}', line.lstrip())
+ # Description string may start with `-` as well. If there is
+ # only one piece then it's a description bit not an option.
+ if len(split) > 1:
+ option, description = split
+ split_option = option.split(' ')
+
+ if not split_option[-1].startswith('-'): # metavar
+ option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
+
+ # Pandoc's definition_lists. See http://pandoc.org/README.html
+ # for more information.
+ ret += '\n%s\n: %s\n' % (option, description)
+ continue
+ ret += line.lstrip() + '\n'
else:
ret += line + '\n'
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 3608e1807..a44167a94 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -564,6 +564,7 @@
- **RoosterTeeth**
- **RottenTomatoes**
- **Roxwel**
+ - **Rozhlas**
- **RTBF**
- **rte**: Raidió Teilifís Éireann TV
- **rte:radio**: Raidió Teilifís Éireann radio
@@ -621,6 +622,7 @@
- **smotri:user**: Smotri.com user videos
- **Snotr**
- **Sohu**
+ - **SonyLIV**
- **soundcloud**
- **soundcloud:playlist**
- **soundcloud:search**: Soundcloud search
@@ -747,6 +749,7 @@
- **udemy:course**
- **UDNEmbed**: 聯合影音
- **Unistra**
+ - **uol.com.br**
- **Urort**: NRK P3 Urørt
- **URPlay**
- **USAToday**
diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py
index 1622fc844..83ca90c3b 100644
--- a/youtube_dl/extractor/ctsnews.py
+++ b/youtube_dl/extractor/ctsnews.py
@@ -1,13 +1,12 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import parse_iso8601, ExtractorError
+from ..utils import unified_timestamp
class CtsNewsIE(InfoExtractor):
IE_DESC = '華視新聞'
- # https connection failed (Connection reset)
_VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
_TESTS = [{
'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
@@ -16,7 +15,7 @@ class CtsNewsIE(InfoExtractor):
'id': '201501291578109',
'ext': 'mp4',
'title': '以色列.真主黨交火 3人死亡',
- 'description': 'md5:95e9b295c898b7ff294f09d450178d7d',
+ 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...',
'timestamp': 1422528540,
'upload_date': '20150129',
}
@@ -28,7 +27,7 @@ class CtsNewsIE(InfoExtractor):
'id': '201309031304098',
'ext': 'mp4',
'title': '韓國31歲童顏男 貌如十多歲小孩',
- 'description': 'md5:f183feeba3752b683827aab71adad584',
+ 'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
'thumbnail': 're:^https?://.*\.jpg$',
'timestamp': 1378205880,
'upload_date': '20130903',
@@ -36,8 +35,7 @@ class CtsNewsIE(InfoExtractor):
}, {
# With Youtube embedded video
'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
- 'md5': '1d842c771dc94c8c3bca5af2cc1db9c5',
- 'add_ie': ['Youtube'],
+ 'md5': 'e4726b2ccd70ba2c319865e28f0a91d1',
'info_dict': {
'id': 'OVbfO7d0_hQ',
'ext': 'mp4',
@@ -47,42 +45,37 @@ class CtsNewsIE(InfoExtractor):
'upload_date': '20150128',
'uploader_id': 'TBSCTS',
'uploader': '中華電視公司',
- }
+ },
+ 'add_ie': ['Youtube'],
}]
def _real_extract(self, url):
news_id = self._match_id(url)
page = self._download_webpage(url, news_id)
- if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None):
- feed_url = self._html_search_regex(
- r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)',
- page, 'feed url')
- video_url = self._download_webpage(
- feed_url, news_id, note='Fetching feed')
+ news_id = self._hidden_inputs(page).get('get_id')
+
+ if news_id:
+ mp4_feed = self._download_json(
+ 'http://news.cts.com.tw/action/test_mp4feed.php',
+ news_id, note='Fetching feed', query={'news_id': news_id})
+ video_url = mp4_feed['source_url']
else:
self.to_screen('Not CTSPlayer video, trying Youtube...')
youtube_url = self._search_regex(
- r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url',
- default=None)
- if not youtube_url:
- raise ExtractorError('The news includes no videos!', expected=True)
+ r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url')
- return {
- '_type': 'url',
- 'url': youtube_url,
- 'ie_key': 'Youtube',
- }
+ return self.url_result(youtube_url, ie='Youtube')
description = self._html_search_meta('description', page)
- title = self._html_search_meta('title', page)
+ title = self._html_search_meta('title', page, fatal=True)
thumbnail = self._html_search_meta('image', page)
datetime_str = self._html_search_regex(
- r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time')
- # Transform into ISO 8601 format with timezone info
- datetime_str = datetime_str.replace('/', '-') + ':00+0800'
- timestamp = parse_iso8601(datetime_str, delimiter=' ')
+ r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time', fatal=False)
+ timestamp = None
+ if datetime_str:
+ timestamp = unified_timestamp(datetime_str) - 8 * 3600
return {
'id': news_id,
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index f1043dae6..387230be0 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -929,6 +929,7 @@ from .udemy import (
from .udn import UDNEmbedIE
from .digiteka import DigitekaIE
from .unistra import UnistraIE
+from .uol import UOLIE
from .urort import UrortIE
from .urplay import URPlayIE
from .usatoday import USATodayIE
diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py
new file mode 100644
index 000000000..c27c64387
--- /dev/null
+++ b/youtube_dl/extractor/uol.py
@@ -0,0 +1,128 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ parse_duration,
+ update_url_query,
+ str_or_none,
+)
+
+
+class UOLIE(InfoExtractor):
+ IE_NAME = 'uol.com.br'
+ _VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P<id>\d+|[\w-]+-[A-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931',
+ 'md5': '25291da27dc45e0afb5718a8603d3816',
+ 'info_dict': {
+ 'id': '15951931',
+ 'ext': 'mp4',
+ 'title': 'Miss simpatia é encontrada morta',
+ 'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2',
+ }
+ }, {
+ 'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
+ 'md5': 'e41a2fb7b7398a3a46b6af37b15c00c9',
+ 'info_dict': {
+ 'id': '15954259',
+ 'ext': 'mp4',
+ 'title': 'Incêndio destrói uma das maiores casas noturnas de Londres',
+ 'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.',
+ }
+ }, {
+ 'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mais.uol.com.br/view/15954259',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://noticias.band.uol.com.br/brasilurgente/video/2016/08/05/15951931/miss-simpatia-e-encontrada-morta.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videos.band.uol.com.br/programa.asp?e=noticias&pr=brasil-urgente&v=15951931&t=Policia-desmonte-base-do-PCC-na-Cracolandia',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mais.uol.com.br/view/cphaa0gl2x8r/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://noticias.uol.com.br//videos/assistir.htm?video=rafaela-silva-inspira-criancas-no-judo-04024D983968D4C95326',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mais.uol.com.br/view/e0qbgxid79uv/15275470',
+ 'only_matching': True,
+ }]
+
+ _FORMATS = {
+ '2': {
+ 'width': 640,
+ 'height': 360,
+ },
+ '5': {
+ 'width': 1080,
+ 'height': 720,
+ },
+ '6': {
+ 'width': 426,
+ 'height': 240,
+ },
+ '7': {
+ 'width': 1920,
+ 'height': 1080,
+ },
+ '8': {
+ 'width': 192,
+ 'height': 144,
+ },
+ '9': {
+ 'width': 568,
+ 'height': 320,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ if not video_id.isdigit():
+ embed_page = self._download_webpage('https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, video_id)
+ video_id = self._search_regex(r'mediaId=(\d+)', embed_page, 'media id')
+ video_data = self._download_json(
+ 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % video_id,
+ video_id)['item']
+ title = video_data['title']
+
+ query = {
+ 'ver': video_data.get('numRevision', 2),
+ 'r': 'http://mais.uol.com.br',
+ }
+ formats = []
+ for f in video_data.get('formats', []):
+ f_url = f.get('url') or f.get('secureUrl')
+ if not f_url:
+ continue
+ format_id = str_or_none(f.get('id'))
+ fmt = {
+ 'format_id': format_id,
+ 'url': update_url_query(f_url, query),
+ }
+ fmt.update(self._FORMATS.get(format_id, {}))
+ formats.append(fmt)
+ self._sort_formats(formats)
+
+ tags = []
+ for tag in video_data.get('tags', []):
+ tag_description = tag.get('description')
+ if not tag_description:
+ continue
+ tags.append(tag_description)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': clean_html(video_data.get('desMedia')),
+ 'thumbnail': video_data.get('thumbnail'),
+ 'duration': int_or_none(video_data.get('durationSeconds')) or parse_duration(video_data.get('duration')),
+ 'tags': tags,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index c50238ba1..a03f7184d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -122,6 +122,7 @@ DATE_FORMATS = (
'%Y %m %d',
'%Y-%m-%d',
'%Y/%m/%d',
+ '%Y/%m/%d %H:%M',
'%Y/%m/%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index b48552031..f7ad846d9 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2016.08.07'
+__version__ = '2016.08.10'