diff options
28 files changed, 612 insertions, 427 deletions
@@ -47,211 +47,109 @@ which means you can modify it, redistribute it or use it however you like. # OPTIONS -h, --help print this help text and exit --version print program version and exit - -U, --update update this program to latest version. Make - sure that you have sufficient permissions - (run with sudo if needed) - -i, --ignore-errors continue on download errors, for example to - skip unavailable videos in a playlist - --abort-on-error Abort downloading of further videos (in the - playlist or the command line) if an error - occurs + -U, --update update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed) + -i, --ignore-errors continue on download errors, for example to skip unavailable videos in a playlist + --abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs --dump-user-agent display the current browser identification - --list-extractors List all supported extractors and the URLs - they would handle - --extractor-descriptions Output descriptions of all supported - extractors - --default-search PREFIX Use this prefix for unqualified URLs. For - example "gvsearch2:" downloads two videos - from google videos for youtube-dl "large - apple". Use the value "auto" to let - youtube-dl guess ("auto_warning" to emit a - warning when guessing). "error" just throws - an error. The default value "fixup_error" - repairs broken URLs, but emits an error if - this is not possible instead of searching. - --ignore-config Do not read configuration files. When given - in the global configuration file /etc - /youtube-dl.conf: Do not read the user - configuration in ~/.config/youtube- - dl/config (%APPDATA%/youtube-dl/config.txt - on Windows) - --flat-playlist Do not extract the videos of a playlist, - only list them. + --list-extractors List all supported extractors and the URLs they would handle + --extractor-descriptions Output descriptions of all supported extractors + --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". + Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The + default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching. + --ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration + in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows) + --flat-playlist Do not extract the videos of a playlist, only list them. --no-color Do not emit color codes in output. ## Network Options: - --proxy URL Use the specified HTTP/HTTPS proxy. Pass in - an empty string (--proxy "") for direct - connection + --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection --socket-timeout SECONDS Time to wait before giving up, in seconds - --source-address IP Client-side IP address to bind to - (experimental) - -4, --force-ipv4 Make all connections via IPv4 - (experimental) - -6, --force-ipv6 Make all connections via IPv6 - (experimental) + --source-address IP Client-side IP address to bind to (experimental) + -4, --force-ipv4 Make all connections via IPv4 (experimental) + -6, --force-ipv6 Make all connections via IPv6 (experimental) + --cn-verification-proxy URL Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is + not present) is used for the actual downloading. (experimental) ## Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) --playlist-end NUMBER playlist video to end at (default is last) - --playlist-items ITEM_SPEC playlist video items to download. Specify - indices of the videos in the playlist - seperated by commas like: "--playlist-items - 1,2,5,8" if you want to download videos - indexed 1, 2, 5, 8 in the playlist. You can - specify range: "--playlist-items - 1-3,7,10-13", it will download the videos - at index 1, 2, 3, 7, 10, 11, 12 and 13. - --match-title REGEX download only matching titles (regex or - caseless sub-string) - --reject-title REGEX skip download for matching titles (regex or - caseless sub-string) + --playlist-items ITEM_SPEC playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" + if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will + download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13. + --match-title REGEX download only matching titles (regex or caseless sub-string) + --reject-title REGEX skip download for matching titles (regex or caseless sub-string) --max-downloads NUMBER Abort after downloading NUMBER files - --min-filesize SIZE Do not download any videos smaller than - SIZE (e.g. 50k or 44.6m) - --max-filesize SIZE Do not download any videos larger than SIZE - (e.g. 50k or 44.6m) + --min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m) + --max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m) --date DATE download only videos uploaded in this date - --datebefore DATE download only videos uploaded on or before - this date (i.e. inclusive) - --dateafter DATE download only videos uploaded on or after - this date (i.e. inclusive) - --min-views COUNT Do not download any videos with less than - COUNT views - --max-views COUNT Do not download any videos with more than - COUNT views - --match-filter FILTER (Experimental) Generic video filter. - Specify any key (see help for -o for a list - of available keys) to match if the key is - present, !key to check if the key is not - present,key > NUMBER (like "comment_count > - 12", also works with >=, <, <=, !=, =) to - compare against a number, and & to require - multiple matches. Values which are not - known are excluded unless you put a - question mark (?) after the operator.For - example, to only match videos that have - been liked more than 100 times and disliked - less than 50 times (or the dislike - functionality is not available at the given - service), but who also have a description, - use --match-filter "like_count > 100 & + --datebefore DATE download only videos uploaded on or before this date (i.e. inclusive) + --dateafter DATE download only videos uploaded on or after this date (i.e. inclusive) + --min-views COUNT Do not download any videos with less than COUNT views + --max-views COUNT Do not download any videos with more than COUNT views + --match-filter FILTER (Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, + !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against + a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the + operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike + functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count <? 50 & description" . - --no-playlist If the URL refers to a video and a - playlist, download only the video. - --yes-playlist If the URL refers to a video and a - playlist, download the playlist. - --age-limit YEARS download only videos suitable for the given - age - --download-archive FILE Download only videos not listed in the - archive file. Record the IDs of all - downloaded videos in it. - --include-ads Download advertisements as well - (experimental) + --no-playlist If the URL refers to a video and a playlist, download only the video. + --yes-playlist If the URL refers to a video and a playlist, download the playlist. + --age-limit YEARS download only videos suitable for the given age + --download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it. + --include-ads Download advertisements as well (experimental) ## Download Options: - -r, --rate-limit LIMIT maximum download rate in bytes per second - (e.g. 50K or 4.2M) - -R, --retries RETRIES number of retries (default is 10), or - "infinite". - --buffer-size SIZE size of download buffer (e.g. 1024 or 16K) - (default is 1024) - --no-resize-buffer do not automatically adjust the buffer - size. By default, the buffer size is - automatically resized from an initial value - of SIZE. + -r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. 50K or 4.2M) + -R, --retries RETRIES number of retries (default is 10), or "infinite". + --buffer-size SIZE size of download buffer (e.g. 1024 or 16K) (default is 1024) + --no-resize-buffer do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE. --playlist-reverse Download playlist videos in reverse order - --xattr-set-filesize (experimental) set file xattribute - ytdl.filesize with expected filesize - --hls-prefer-native (experimental) Use the native HLS - downloader instead of ffmpeg. - --external-downloader COMMAND (experimental) Use the specified external - downloader. Currently supports - aria2c,curl,wget + --xattr-set-filesize (experimental) set file xattribute ytdl.filesize with expected filesize + --hls-prefer-native (experimental) Use the native HLS downloader instead of ffmpeg. + --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget + --external-downloader-args ARGS Give these arguments to the external downloader. ## Filesystem Options: - -a, --batch-file FILE file containing URLs to download ('-' for - stdin) + -a, --batch-file FILE file containing URLs to download ('-' for stdin) --id use only video ID in file name - -o, --output TEMPLATE output filename template. Use %(title)s to - get the title, %(uploader)s for the - uploader name, %(uploader_id)s for the - uploader nickname if different, - %(autonumber)s to get an automatically - incremented number, %(ext)s for the - filename extension, %(format)s for the - format description (like "22 - 1280x720" or - "HD"), %(format_id)s for the unique id of - the format (like Youtube's itags: "137"), - %(upload_date)s for the upload date - (YYYYMMDD), %(extractor)s for the provider - (youtube, metacafe, etc), %(id)s for the - video id, %(playlist_title)s, - %(playlist_id)s, or %(playlist)s (=title if - present, ID otherwise) for the playlist the - video is in, %(playlist_index)s for the - position in the playlist. %(height)s and - %(width)s for the width and height of the - video format. %(resolution)s for a textual - description of the resolution of the video - format. %% for a literal percent. Use - to - output to stdout. Can also be used to - download to a different directory, for - example with -o '/my/downloads/%(uploader)s - /%(title)s-%(id)s.%(ext)s' . - --autonumber-size NUMBER Specifies the number of digits in - %(autonumber)s when it is present in output - filename template or --auto-number option - is given - --restrict-filenames Restrict filenames to only ASCII - characters, and avoid "&" and spaces in - filenames - -A, --auto-number [deprecated; use -o - "%(autonumber)s-%(title)s.%(ext)s" ] number - downloaded files starting from 00000 - -t, --title [deprecated] use title in file name - (default) + -o, --output TEMPLATE output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader + nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for + the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube's itags: "137"), + %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id, + %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, + %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format. + %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout. + Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' . + --autonumber-size NUMBER Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given + --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames + -A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000 + -t, --title [deprecated] use title in file name (default) -l, --literal [deprecated] alias of --title -w, --no-overwrites do not overwrite files - -c, --continue force resume of partially downloaded files. - By default, youtube-dl will resume - downloads if possible. - --no-continue do not resume partially downloaded files - (restart from beginning) - --no-part do not use .part files - write directly - into output file - --no-mtime do not use the Last-modified header to set - the file modification time - --write-description write video description to a .description - file + -c, --continue force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible. + --no-continue do not resume partially downloaded files (restart from beginning) + --no-part do not use .part files - write directly into output file + --no-mtime do not use the Last-modified header to set the file modification time + --write-description write video description to a .description file --write-info-json write video metadata to a .info.json file - --write-annotations write video annotations to a .annotation - file - --load-info FILE json file containing the video information - (created with the "--write-json" option) - --cookies FILE file to read cookies from and dump cookie - jar in - --cache-dir DIR Location in the filesystem where youtube-dl - can store some downloaded information - permanently. By default $XDG_CACHE_HOME - /youtube-dl or ~/.cache/youtube-dl . At the - moment, only YouTube player files (for - videos with obfuscated signatures) are - cached, but that may change. + --write-annotations write video annotations to a .annotation file + --load-info FILE json file containing the video information (created with the "--write-json" option) + --cookies FILE file to read cookies from and dump cookie jar in + --cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl + or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may + change. --no-cache-dir Disable filesystem caching --rm-cache-dir Delete all filesystem cache files ## Thumbnail images: --write-thumbnail write thumbnail image to disk --write-all-thumbnails write all thumbnail image formats to disk - --list-thumbnails Simulate and list all available thumbnail - formats + --list-thumbnails Simulate and list all available thumbnail formats ## Verbosity / Simulation Options: -q, --quiet activates quiet mode --no-warnings Ignore warnings - -s, --simulate do not download the video and do not write - anything to disk + -s, --simulate do not download the video and do not write anything to disk --skip-download do not download the video -g, --get-url simulate, quiet but print URL -e, --get-title simulate, quiet but print title @@ -261,153 +159,84 @@ which means you can modify it, redistribute it or use it however you like. --get-duration simulate, quiet but print video length --get-filename simulate, quiet but print output filename --get-format simulate, quiet but print output format - -j, --dump-json simulate, quiet but print JSON information. - See --output for a description of available - keys. - -J, --dump-single-json simulate, quiet but print JSON information - for each command-line argument. If the URL - refers to a playlist, dump the whole - playlist information in a single line. - --print-json Be quiet and print the video information as - JSON (video is still being downloaded). + -j, --dump-json simulate, quiet but print JSON information. See --output for a description of available keys. + -J, --dump-single-json simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist + information in a single line. + --print-json Be quiet and print the video information as JSON (video is still being downloaded). --newline output progress bar as new lines --no-progress do not print progress bar --console-title display progress in console titlebar -v, --verbose print various debugging information - --dump-intermediate-pages print downloaded pages to debug problems - (very verbose) - --write-pages Write downloaded intermediary pages to - files in the current directory to debug - problems + --dump-intermediate-pages print downloaded pages to debug problems (very verbose) + --write-pages Write downloaded intermediary pages to files in the current directory to debug problems --print-traffic Display sent and read HTTP traffic - -C, --call-home Contact the youtube-dl server for - debugging. - --no-call-home Do NOT contact the youtube-dl server for - debugging. + -C, --call-home Contact the youtube-dl server for debugging. + --no-call-home Do NOT contact the youtube-dl server for debugging. ## Workarounds: --encoding ENCODING Force the specified encoding (experimental) --no-check-certificate Suppress HTTPS certificate validation. - --prefer-insecure Use an unencrypted connection to retrieve - information about the video. (Currently - supported only for YouTube) + --prefer-insecure Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube) --user-agent UA specify a custom user agent - --referer URL specify a custom referer, use if the video - access is restricted to one domain - --add-header FIELD:VALUE specify a custom HTTP header and its value, - separated by a colon ':'. You can use this - option multiple times - --bidi-workaround Work around terminals that lack - bidirectional text support. Requires bidiv - or fribidi executable in PATH - --sleep-interval SECONDS Number of seconds to sleep before each - download. + --referer URL specify a custom referer, use if the video access is restricted to one domain + --add-header FIELD:VALUE specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times + --bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH + --sleep-interval SECONDS Number of seconds to sleep before each download. ## Video Format Options: - -f, --format FORMAT video format code, specify the order of - preference using slashes, as in -f 22/17/18 - . Instead of format codes, you can select - by extension for the extensions aac, m4a, - mp3, mp4, ogg, wav, webm. You can also use - the special names "best", "bestvideo", - "bestaudio", "worst". You can filter the - video results by putting a condition in - brackets, as in -f "best[height=720]" (or - -f "[filesize>10M]"). This works for - filesize, height, width, tbr, abr, vbr, - asr, and fps and the comparisons <, <=, >, - >=, =, != and for ext, acodec, vcodec, - container, and protocol and the comparisons - =, != . Formats for which the value is not - known are excluded unless you put a - question mark (?) after the operator. You - can combine format filters, so -f "[height - <=? 720][tbr>500]" selects up to 720p - videos (or videos where the height is not - known) with a bitrate of at least 500 - KBit/s. By default, youtube-dl will pick - the best quality. Use commas to download - multiple audio formats, such as -f - 136/137/mp4/bestvideo,140/m4a/bestaudio. - You can merge the video and audio of two - formats into a single file using -f <video- - format>+<audio-format> (requires ffmpeg or - avconv), for example -f + -f, --format FORMAT video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by + extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio", + "worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]"). + This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, + vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a + question mark (?) after the operator. You can combine format filters, so -f "[height <=? 720][tbr>500]" selects up to 720p videos + (or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality. + Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio + of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f bestvideo+bestaudio. --all-formats download all available video formats - --prefer-free-formats prefer free video formats unless a specific - one is requested + --prefer-free-formats prefer free video formats unless a specific one is requested --max-quality FORMAT highest quality format to download -F, --list-formats list all available formats - --youtube-skip-dash-manifest Do not download the DASH manifest on - YouTube videos - --merge-output-format FORMAT If a merge is required (e.g. - bestvideo+bestaudio), output to given - container format. One of mkv, mp4, ogg, - webm, flv.Ignored if no merge is required + --youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos + --merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no + merge is required ## Subtitle Options: --write-sub write subtitle file - --write-auto-sub write automatic subtitle file (youtube - only) - --all-subs downloads all the available subtitles of - the video + --write-auto-sub write automatic subtitle file (youtube only) + --all-subs downloads all the available subtitles of the video --list-subs lists all available subtitles for the video - --sub-format FORMAT subtitle format, accepts formats - preference, for example: "ass/srt/best" - --sub-lang LANGS languages of the subtitles to download - (optional) separated by commas, use IETF - language tags like 'en,pt' + --sub-format FORMAT subtitle format, accepts formats preference, for example: "ass/srt/best" + --sub-lang LANGS languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt' ## Authentication Options: -u, --username USERNAME login with this account ID - -p, --password PASSWORD account password. If this option is left - out, youtube-dl will ask interactively. + -p, --password PASSWORD account password. If this option is left out, youtube-dl will ask interactively. -2, --twofactor TWOFACTOR two-factor auth code -n, --netrc use .netrc authentication data --video-password PASSWORD video password (vimeo, smotri) ## Post-processing Options: - -x, --extract-audio convert video files to audio-only files - (requires ffmpeg or avconv and ffprobe or - avprobe) - --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", - "opus", or "wav"; "best" by default - --audio-quality QUALITY ffmpeg/avconv audio quality specification, - insert a value between 0 (better) and 9 - (worse) for VBR or a specific bitrate like - 128K (default 5) - --recode-video FORMAT Encode the video to another format if - necessary (currently supported: - mp4|flv|ogg|webm|mkv) - -k, --keep-video keeps the video file on disk after the - post-processing; the video is erased by - default - --no-post-overwrites do not overwrite post-processed files; the - post-processed files are overwritten by - default - --embed-subs embed subtitles in the video (only for mp4 - videos) + -x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe) + --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default + --audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K + (default 5) + --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv) + -k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default + --no-post-overwrites do not overwrite post-processed files; the post-processed files are overwritten by default + --embed-subs embed subtitles in the video (only for mp4 videos) --embed-thumbnail embed thumbnail in the audio as cover art --add-metadata write metadata to the video file - --xattrs write metadata to the video file's xattrs - (using dublin core and xdg standards) - --fixup POLICY Automatically correct known faults of the - file. One of never (do nothing), warn (only - emit a warning), detect_or_warn(the - default; fix file if we can, warn - otherwise) - --prefer-avconv Prefer avconv over ffmpeg for running the - postprocessors (default) - --prefer-ffmpeg Prefer ffmpeg over avconv for running the - postprocessors - --ffmpeg-location PATH Location of the ffmpeg/avconv binary; - either the path to the binary or its - containing directory. - --exec CMD Execute a command on the file after - downloading, similar to find's -exec - syntax. Example: --exec 'adb push {} - /sdcard/Music/ && rm {}' + --xattrs write metadata to the video file's xattrs (using dublin core and xdg standards) + --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; + fix file if we can, warn otherwise) + --prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default) + --prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors + --ffmpeg-location PATH Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory. + --exec CMD Execute a command on the file after downloading, similar to find's -exec syntax. Example: --exec 'adb push {} /sdcard/Music/ && rm + {}' + --convert-subtitles FORMAT Convert the subtitles to other format (currently supported: srt|ass|vtt) # CONFIGURATION @@ -527,6 +356,10 @@ YouTube requires an additional signature since September 2012 which is not suppo In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. +### HTTP Error 429: Too Many Requests or 402: Payment Required + +These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--network-address` options](#network-options) to select another IP address. + ### SyntaxError: Non-ASCII character ### The error diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 49b4ac8c1..062cb3d62 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -210,6 +210,7 @@ - **Jove** - **jpopsuki.tv** - **Jukebox** + - **Kaltura** - **Kankan** - **Karaoketv** - **keek** @@ -308,6 +309,7 @@ - **Nuvid** - **NYTimes** - **ocw.mit.edu** + - **Odnoklassniki** - **OktoberfestTV** - **on.aol.com** - **Ooyala** @@ -334,6 +336,7 @@ - **PornoXO** - **PromptFile** - **prosiebensat1**: ProSiebenSat.1 Digital + - **Puls4** - **Pyvideo** - **QuickVid** - **R7** @@ -412,7 +415,7 @@ - **StreamCZ** - **StreetVoice** - **SunPorno** - - **SVTPlay** + - **SVTPlay**: SVT Play and Öppet arkiv - **SWRMediathek** - **Syfy** - **SztvHu** diff --git a/test/test_netrc.py b/test/test_netrc.py new file mode 100644 index 000000000..7cf3a6a2e --- /dev/null +++ b/test/test_netrc.py @@ -0,0 +1,26 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from youtube_dl.extractor import ( + gen_extractors, +) + + +class TestNetRc(unittest.TestCase): + def test_netrc_present(self): + for ie in gen_extractors(): + if not hasattr(ie, '_login'): + continue + self.assertTrue( + hasattr(ie, '_NETRC_MACHINE'), + 'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 3fba8ae11..64fad58ad 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -85,8 +85,11 @@ class TestUtil(unittest.TestCase): self.assertEqual( sanitize_filename('New World record at 0:12:34'), 'New World record at 0_12_34') + self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf') self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') + self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf') + self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf') forbidden = '"\0\\/' for fc in forbidden: diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 76fc394bc..df2aebb59 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -4,8 +4,10 @@ from __future__ import absolute_import, unicode_literals import collections +import contextlib import datetime import errno +import fileinput import io import itertools import json @@ -28,6 +30,7 @@ from .compat import ( compat_basestring, compat_cookiejar, compat_expanduser, + compat_get_terminal_size, compat_http_client, compat_kwargs, compat_str, @@ -46,12 +49,12 @@ from .utils import ( ExtractorError, format_bytes, formatSeconds, - get_term_width, locked_file, make_HTTPS_handler, MaxDownloadsReached, PagedList, parse_filesize, + PerRequestProxyHandler, PostProcessingError, platform_name, preferredencoding, @@ -181,6 +184,8 @@ class YoutubeDL(object): prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. proxy: URL of the proxy server to use + cn_verification_proxy: URL of the proxy to use for IP address verification + on Chinese sites. (Experimental) socket_timeout: Time to wait for unresponsive hosts, in seconds bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi @@ -247,10 +252,10 @@ class YoutubeDL(object): hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv. The following parameters are not used by YoutubeDL itself, they are used by - the FileDownloader: + the downloader (see youtube_dl/downloader/common.py): nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle, - xattr_set_filesize. + xattr_set_filesize, external_downloader_args. The following options are used by the post processors: prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available, @@ -284,7 +289,7 @@ class YoutubeDL(object): try: import pty master, slave = pty.openpty() - width = get_term_width() + width = compat_get_terminal_size().columns if width is None: width_args = [] else: @@ -1300,17 +1305,18 @@ class YoutubeDL(object): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE subtitles = info_dict['requested_subtitles'] + ie = self.get_info_extractor(info_dict['extractor_key']) for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] if sub_info.get('data') is not None: sub_data = sub_info['data'] else: try: - uf = self.urlopen(sub_info['url']) - sub_data = uf.read().decode('utf-8') - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + sub_data = ie._download_webpage( + sub_info['url'], info_dict['id'], note=False) + except ExtractorError as err: self.report_warning('Unable to download subtitle for "%s": %s' % - (sub_lang, compat_str(err))) + (sub_lang, compat_str(err.cause))) continue try: sub_filename = subtitles_filename(filename, sub_lang, sub_format) @@ -1451,8 +1457,11 @@ class YoutubeDL(object): return self._download_retcode def download_with_info_file(self, info_filename): - with io.open(info_filename, 'r', encoding='utf-8') as f: - info = json.load(f) + with contextlib.closing(fileinput.FileInput( + [info_filename], mode='r', + openhook=fileinput.hook_encoded('utf-8'))) as f: + # FileInput doesn't have a read method, we can't call json.load + info = json.loads('\n'.join(f)) try: self.process_ie_result(info, download=True) except DownloadError: @@ -1756,13 +1765,14 @@ class YoutubeDL(object): # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) if 'http' in proxies and 'https' not in proxies: proxies['https'] = proxies['http'] - proxy_handler = compat_urllib_request.ProxyHandler(proxies) + proxy_handler = PerRequestProxyHandler(proxies) debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) opener = compat_urllib_request.build_opener( - https_handler, proxy_handler, cookie_processor, ydlh) + proxy_handler, https_handler, cookie_processor, ydlh) + # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play # (See https://github.com/rg3/youtube-dl/issues/1309 for details) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5ce201800..a08ddd670 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -9,6 +9,7 @@ import codecs import io import os import random +import shlex import sys @@ -170,6 +171,9 @@ def _real_main(argv=None): if opts.recodevideo is not None: if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']: parser.error('invalid video recode format specified') + if opts.convertsubtitles is not None: + if opts.convertsubtitles not in ['srt', 'vtt', 'ass']: + parser.error('invalid subtitle format specified') if opts.date is not None: date = DateRange.day(opts.date) @@ -223,6 +227,11 @@ def _real_main(argv=None): 'key': 'FFmpegVideoConvertor', 'preferedformat': opts.recodevideo, }) + if opts.convertsubtitles: + postprocessors.append({ + 'key': 'FFmpegSubtitlesConvertor', + 'format': opts.convertsubtitles, + }) if opts.embedsubtitles: postprocessors.append({ 'key': 'FFmpegEmbedSubtitle', @@ -247,6 +256,9 @@ def _real_main(argv=None): xattr # Confuse flake8 except ImportError: parser.error('setting filesize xattr requested but python-xattr is not available') + external_downloader_args = None + if opts.external_downloader_args: + external_downloader_args = shlex.split(opts.external_downloader_args) match_filter = ( None if opts.match_filter is None else match_filter_func(opts.match_filter)) @@ -351,6 +363,8 @@ def _real_main(argv=None): 'no_color': opts.no_color, 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, + 'external_downloader_args': external_downloader_args, + 'cn_verification_proxy': opts.cn_verification_proxy, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index e989cdbbd..b2bf149ef 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -1,9 +1,11 @@ from __future__ import unicode_literals +import collections import getpass import optparse import os import re +import shutil import socket import subprocess import sys @@ -364,6 +366,33 @@ def workaround_optparse_bug9161(): return real_add_option(self, *bargs, **bkwargs) optparse.OptionGroup.add_option = _compat_add_option +if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3 + compat_get_terminal_size = shutil.get_terminal_size +else: + _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines']) + + def compat_get_terminal_size(): + columns = compat_getenv('COLUMNS', None) + if columns: + columns = int(columns) + else: + columns = None + lines = compat_getenv('LINES', None) + if lines: + lines = int(lines) + else: + lines = None + + try: + sp = subprocess.Popen( + ['stty', 'size'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = sp.communicate() + lines, columns = map(int, out.split()) + except: + pass + return _terminal_size(columns, lines) + __all__ = [ 'compat_HTTPError', @@ -371,6 +400,7 @@ __all__ = [ 'compat_chr', 'compat_cookiejar', 'compat_expanduser', + 'compat_get_terminal_size', 'compat_getenv', 'compat_getpass', 'compat_html_entities', diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 3ae90021a..8ed5c19a6 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -42,6 +42,8 @@ class FileDownloader(object): max_filesize: Skip files larger than this size xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. (experimenatal) + external_downloader_args: A list of additional command-line arguments for the + external downloader. Subclasses of this one must re-define the real_download method. """ diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 51c41c704..1673b2382 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -51,6 +51,13 @@ class ExternalFD(FileDownloader): return [] return [command_option, source_address] + def _configuration_args(self, default=[]): + ex_args = self.params.get('external_downloader_args') + if ex_args is None: + return default + assert isinstance(ex_args, list) + return ex_args + def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ cmd = self._make_cmd(tmpfilename, info_dict) @@ -79,6 +86,7 @@ class CurlFD(ExternalFD): for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--interface') + cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -89,15 +97,16 @@ class WgetFD(ExternalFD): for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] cmd += self._source_address('--bind-address') + cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd class Aria2cFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): - cmd = [ - self.exe, '-c', - '--min-split-size', '1M', '--max-connection-per-server', '4'] + cmd = [self.exe, '-c'] + cmd += self._configuration_args([ + '--min-split-size', '1M', '--max-connection-per-server', '4']) dn = os.path.dirname(tmpfilename) if dn: cmd += ['--dir', dn] diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index aecb67bf4..ffcc7d9ab 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -374,6 +374,7 @@ from .pornotube import PornotubeIE from .pornoxo import PornoXOIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE +from .puls4 import Puls4IE from .pyvideo import PyvideoIE from .quickvid import QuickVidIE from .r7 import R7IE diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 7669e0e3d..29f8795d3 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -19,6 +19,7 @@ from ..utils import ( class AtresPlayerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html' + _NETRC_MACHINE = 'atresplayer' _TESTS = [ { 'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html', diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 7977fa8d0..cf39c0c21 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -767,6 +767,10 @@ class InfoExtractor(object): formats) def _is_valid_url(self, url, video_id, item='video'): + url = self._proto_relative_url(url, scheme='http:') + # For now assume non HTTP(S) URLs always valid + if not (url.startswith('http://') or url.startswith('https://')): + return True try: self._request_webpage(url, video_id, 'Checking %s URL' % item) return True diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index f1da7d09b..e64b88fbc 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -29,6 +29,7 @@ from ..aes import ( class CrunchyrollIE(InfoExtractor): _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' + _NETRC_MACHINE = 'crunchyroll' _TESTS = [{ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'info_dict': { diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py index 80e9084f4..e47f3e27a 100644 --- a/youtube_dl/extractor/escapist.py +++ b/youtube_dl/extractor/escapist.py @@ -8,6 +8,7 @@ from ..compat import ( from ..utils import ( ExtractorError, js_to_json, + parse_duration, ) @@ -25,6 +26,7 @@ class EscapistIE(InfoExtractor): 'uploader': 'The Escapist Presents', 'title': "Breaking Down Baldur's Gate", 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 264, } } @@ -41,6 +43,7 @@ class EscapistIE(InfoExtractor): r"<h1\s+class='headline'>(.*?)</a>", webpage, 'uploader', fatal=False) description = self._html_search_meta('description', webpage) + duration = parse_duration(self._html_search_meta('duration', webpage)) raw_title = self._html_search_meta('title', webpage, fatal=True) title = raw_title.partition(' : ')[2] @@ -105,6 +108,7 @@ class EscapistIE(InfoExtractor): 'title': title, 'thumbnail': self._og_search_thumbnail(webpage), 'description': description, + 'duration': duration, } if self._downloader.params.get('include_ads') and ad_formats: diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index f7b467b0a..51796f3a4 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -12,6 +12,7 @@ from ..utils import remove_end class GDCVaultIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)' + _NETRC_MACHINE = 'gdcvault' _TESTS = [ { 'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple', diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 27e2bc300..5dc53685c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -26,6 +26,7 @@ from ..utils import ( unsmuggle_url, UnsupportedError, url_basename, + xpath_text, ) from .brightcove import BrightcoveIE from .ooyala import OoyalaIE @@ -569,6 +570,16 @@ class GenericIE(InfoExtractor): 'title': 'John Carlson Postgame 2/25/15', }, }, + # RSS feed with enclosure + { + 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', + 'info_dict': { + 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + 'ext': 'm4v', + 'upload_date': '20150228', + 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + } + } ] def report_following_redirect(self, new_url): @@ -580,11 +591,24 @@ class GenericIE(InfoExtractor): playlist_desc_el = doc.find('./channel/description') playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text - entries = [{ - '_type': 'url', - 'url': e.find('link').text, - 'title': e.find('title').text, - } for e in doc.findall('./channel/item')] + entries = [] + for it in doc.findall('./channel/item'): + next_url = xpath_text(it, 'link', fatal=False) + if not next_url: + enclosure_nodes = it.findall('./enclosure') + for e in enclosure_nodes: + next_url = e.attrib.get('url') + if next_url: + break + + if not next_url: + continue + + entries.append({ + '_type': 'url', + 'url': next_url, + 'title': it.find('title').text, + }) return { '_type': 'playlist', diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index 583ce35b9..85eee141b 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -7,8 +7,9 @@ import time from .common import InfoExtractor from ..compat import ( - compat_urlparse, compat_urllib_parse, + compat_urllib_request, + compat_urlparse, ) from ..utils import ( determine_ext, @@ -39,12 +40,20 @@ class LetvIE(InfoExtractor): 'title': '美人天下01', 'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda', }, - 'expected_warnings': [ - 'publish time' - ] + }, { + 'note': 'This video is available only in Mainland China, thus a proxy is needed', + 'url': 'http://www.letv.com/ptv/vplay/1118082.html', + 'md5': 'f80936fbe20fb2f58648e81386ff7927', + 'info_dict': { + 'id': '1118082', + 'ext': 'mp4', + 'title': '与龙共舞 完整版', + 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986', + }, + 'params': { + 'cn_verification_proxy': 'http://proxy.uku.im:8888' + }, }] - # http://www.letv.com/ptv/vplay/1118082.html - # This video is available only in Mainland China @staticmethod def urshift(val, n): @@ -76,8 +85,14 @@ class LetvIE(InfoExtractor): 'tkey': self.calc_time_key(int(time.time())), 'domain': 'www.letv.com' } + play_json_req = compat_urllib_request.Request( + 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params) + ) + play_json_req.add_header( + 'Ytdl-request-proxy', + self._downloader.params.get('cn_verification_proxy')) play_json = self._download_json( - 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params), + play_json_req, media_id, 'playJson data') # Check for errors @@ -114,7 +129,8 @@ class LetvIE(InfoExtractor): url_info_dict = { 'url': media_url, - 'ext': determine_ext(dispatch[format_id][1]) + 'ext': determine_ext(dispatch[format_id][1]), + 'format_id': format_id, } if format_id[-1:] == 'p': @@ -123,7 +139,7 @@ class LetvIE(InfoExtractor): urls.append(url_info_dict) publish_time = parse_iso8601(self._html_search_regex( - r'发布时间 ([^<>]+) ', page, 'publish time', fatal=False), + r'发布时间 ([^<>]+) ', page, 'publish time', default=None), delimiter=' ', timezone=datetime.timedelta(hours=8)) description = self._html_search_meta('description', page, fatal=False) diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 5dc22da22..cfd3b14f4 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -15,18 +15,73 @@ from ..utils import ( ) -class LyndaIE(InfoExtractor): +class LyndaBaseIE(InfoExtractor): + _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' + _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true' + _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' + _NETRC_MACHINE = 'lynda' + + def _real_initialize(self): + self._login() + + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + + login_form = { + 'username': username, + 'password': password, + 'remember': 'false', + 'stayPut': 'false' + } + request = compat_urllib_request.Request( + self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) + login_page = self._download_webpage( + request, None, 'Logging in as %s' % username) + + # Not (yet) logged in + m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page) + if m is not None: + response = m.group('json') + response_json = json.loads(response) + state = response_json['state'] + + if state == 'notlogged': + raise ExtractorError( + 'Unable to login, incorrect username and/or password', + expected=True) + + # This is when we get popup: + # > You're already logged in to lynda.com on two devices. + # > If you log in here, we'll log you out of another device. + # So, we need to confirm this. + if state == 'conflicted': + confirm_form = { + 'username': '', + 'password': '', + 'resolve': 'true', + 'remember': 'false', + 'stayPut': 'false', + } + request = compat_urllib_request.Request( + self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form)) + login_page = self._download_webpage( + request, None, + 'Confirming log in and log out from another device') + + if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: + raise ExtractorError('Unable to log in') + + +class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' IE_DESC = 'lynda.com videos' - _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)' - _LOGIN_URL = 'https://www.lynda.com/login/login.aspx' + _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)' _NETRC_MACHINE = 'lynda' - _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true' _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' - ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' - _TESTS = [{ 'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', 'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', @@ -41,23 +96,22 @@ class LyndaIE(InfoExtractor): 'only_matching': True, }] - def _real_initialize(self): - self._login() - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = self._match_id(url) - page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id, - 'Downloading video JSON') + page = self._download_webpage( + 'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, + video_id, 'Downloading video JSON') video_json = json.loads(page) if 'Status' in video_json: - raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True) + raise ExtractorError( + 'lynda returned error: %s' % video_json['Message'], expected=True) if video_json['HasAccess'] is False: raise ExtractorError( - 'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True) + 'Video %s is only available for members. ' + % video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True) video_id = compat_str(video_json['ID']) duration = video_json['DurationInSeconds'] @@ -100,50 +154,9 @@ class LyndaIE(InfoExtractor): 'formats': formats } - def _login(self): - (username, password) = self._get_login_info() - if username is None: - return - - login_form = { - 'username': username, - 'password': password, - 'remember': 'false', - 'stayPut': 'false' - } - request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) - login_page = self._download_webpage(request, None, 'Logging in as %s' % username) - - # Not (yet) logged in - m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page) - if m is not None: - response = m.group('json') - response_json = json.loads(response) - state = response_json['state'] - - if state == 'notlogged': - raise ExtractorError('Unable to login, incorrect username and/or password', expected=True) - - # This is when we get popup: - # > You're already logged in to lynda.com on two devices. - # > If you log in here, we'll log you out of another device. - # So, we need to confirm this. - if state == 'conflicted': - confirm_form = { - 'username': '', - 'password': '', - 'resolve': 'true', - 'remember': 'false', - 'stayPut': 'false', - } - request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form)) - login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device') - - if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: - raise ExtractorError('Unable to log in') - def _fix_subtitles(self, subs): srt = '' + seq_counter = 0 for pos in range(0, len(subs) - 1): seq_current = subs[pos] m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode']) @@ -155,8 +168,10 @@ class LyndaIE(InfoExtractor): continue appear_time = m_current.group('timecode') disappear_time = m_next.group('timecode') - text = seq_current['Caption'].lstrip() - srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text) + text = seq_current['Caption'].strip() + if text: + seq_counter += 1 + srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text) if srt: return srt @@ -169,7 +184,7 @@ class LyndaIE(InfoExtractor): return {} -class LyndaCourseIE(InfoExtractor): +class LyndaCourseIE(LyndaBaseIE): IE_NAME = 'lynda:course' IE_DESC = 'lynda.com online courses' @@ -182,35 +197,37 @@ class LyndaCourseIE(InfoExtractor): course_path = mobj.group('coursepath') course_id = mobj.group('courseid') - page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, - course_id, 'Downloading course JSON') + page = self._download_webpage( + 'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, + course_id, 'Downloading course JSON') course_json = json.loads(page) if 'Status' in course_json and course_json['Status'] == 'NotFound': - raise ExtractorError('Course %s does not exist' % course_id, expected=True) + raise ExtractorError( + 'Course %s does not exist' % course_id, expected=True) unaccessible_videos = 0 videos = [] - (username, _) = self._get_login_info() # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided # by single video API anymore for chapter in course_json['Chapters']: for video in chapter['Videos']: - if username is None and video['HasAccess'] is False: + if video['HasAccess'] is False: unaccessible_videos += 1 continue videos.append(video['ID']) if unaccessible_videos > 0: - self._downloader.report_warning('%s videos are only available for members and will not be downloaded. ' - % unaccessible_videos + LyndaIE.ACCOUNT_CREDENTIALS_HINT) + self._downloader.report_warning( + '%s videos are only available for members (or paid members) and will not be downloaded. ' + % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT) entries = [ - self.url_result('http://www.lynda.com/%s/%s-4.html' % - (course_path, video_id), - 'Lynda') + self.url_result( + 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id), + 'Lynda') for video_id in videos] course_title = course_json['Title'] diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py new file mode 100644 index 000000000..cce84b9e4 --- /dev/null +++ b/youtube_dl/extractor/puls4.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + unified_strdate, + int_or_none, +) + + +class Puls4IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816', + 'md5': '49f6a6629747eeec43cef6a46b5df81d', + 'info_dict': { + 'id': '2716816', + 'ext': 'mp4', + 'title': 'Pro und Contra vom 23.02.2015', + 'description': 'md5:293e44634d9477a67122489994675db6', + 'duration': 2989, + 'upload_date': '20150224', + 'uploader': 'PULS_4', + }, + 'skip': 'Only works from Germany', + }, { + 'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106', + 'md5': '6a48316c8903ece8dab9b9a7bf7a59ec', + 'info_dict': { + 'id': '1298106', + 'ext': 'mp4', + 'title': 'Lucky Fritz', + }, + 'skip': 'Only works from Germany', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + error_message = self._html_search_regex( + r'<div class="message-error">(.+?)</div>', + webpage, 'error message', default=None) + if error_message: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) + + real_url = self._html_search_regex( + r'\"fsk-button\".+?href=\"([^"]+)', + webpage, 'fsk_button', default=None) + if real_url: + webpage = self._download_webpage(real_url, video_id) + + player = self._search_regex( + r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}', + webpage, 'player') + + player_json = self._parse_json( + '[%s]' % player, video_id, + transform_source=lambda s: s.replace('undefined,', '')) + + formats = None + result = None + + for v in player_json: + if isinstance(v, list) and not formats: + formats = [{ + 'url': f['url'], + 'format': 'hd' if f.get('hd') else 'sd', + 'width': int_or_none(f.get('size_x')), + 'height': int_or_none(f.get('size_y')), + 'tbr': int_or_none(f.get('bitrate')), + } for f in v] + self._sort_formats(formats) + elif isinstance(v, dict) and not result: + result = { + 'id': video_id, + 'title': v['videopartname'].strip(), + 'description': v.get('videotitle'), + 'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')), + 'upload_date': unified_strdate(v.get('clipreleasetime')), + 'uploader': v.get('channel'), + } + + result['formats'] = formats + + return result diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index c5284fa67..9d4505972 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -180,7 +180,7 @@ class SoundcloudIE(InfoExtractor): 'format_id': key, 'url': url, 'play_path': 'mp3:' + path, - 'ext': ext, + 'ext': 'flv', 'vcodec': 'none', }) @@ -200,8 +200,9 @@ class SoundcloudIE(InfoExtractor): if f['format_id'].startswith('rtmp'): f['protocol'] = 'rtmp' - self._sort_formats(formats) - result['formats'] = formats + self._check_formats(formats, track_id) + self._sort_formats(formats) + result['formats'] = formats return result diff --git a/youtube_dl/extractor/svtplay.py b/youtube_dl/extractor/svtplay.py index eadb9ccb4..433dfd1cb 100644 --- a/youtube_dl/extractor/svtplay.py +++ b/youtube_dl/extractor/svtplay.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( determine_ext, @@ -8,23 +10,40 @@ from ..utils import ( class SVTPlayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?svtplay\.se/video/(?P<id>[0-9]+)' - _TEST = { + IE_DESC = 'SVT Play and Öppet arkiv' + _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)' + _TESTS = [{ 'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final', - 'md5': 'f4a184968bc9c802a9b41316657aaa80', + 'md5': 'ade3def0643fa1c40587a422f98edfd9', 'info_dict': { 'id': '2609989', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'SM veckan vinter, Örebro - Rally, final', 'duration': 4500, 'thumbnail': 're:^https?://.*[\.-]jpg$', + 'age_limit': 0, }, - } + }, { + 'url': 'http://www.oppetarkiv.se/video/1058509/rederiet-sasong-1-avsnitt-1-av-318', + 'md5': 'c3101a17ce9634f4c1f9800f0746c187', + 'info_dict': { + 'id': '1058509', + 'ext': 'flv', + 'title': 'Farlig kryssning', + 'duration': 2566, + 'thumbnail': 're:^https?://.*[\.-]jpg$', + 'age_limit': 0, + }, + 'skip': 'Only works from Sweden', + }] def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + host = mobj.group('host') + info = self._download_json( - 'http://www.svtplay.se/video/%s?output=json' % video_id, video_id) + 'http://www.%s.se/video/%s?output=json' % (host, video_id), video_id) title = info['context']['title'] thumbnail = info['context'].get('thumbnailImage') @@ -33,11 +52,16 @@ class SVTPlayIE(InfoExtractor): formats = [] for vr in video_info['videoReferences']: vurl = vr['url'] - if determine_ext(vurl) == 'm3u8': + ext = determine_ext(vurl) + if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( vurl, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id=vr.get('playerType'))) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + vurl + '?hdcore=3.3.0', video_id, + f4m_id=vr.get('playerType'))) else: formats.append({ 'format_id': vr.get('playerType'), @@ -46,6 +70,7 @@ class SVTPlayIE(InfoExtractor): self._sort_formats(formats) duration = video_info.get('materialLength') + age_limit = 18 if video_info.get('inappropriateForChildren') else 0 return { 'id': video_id, @@ -53,4 +78,5 @@ class SVTPlayIE(InfoExtractor): 'formats': formats, 'thumbnail': thumbnail, 'duration': duration, + 'age_limit': age_limit, } diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index 4b0d8988d..8af136147 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -23,6 +23,7 @@ class TwitchBaseIE(InfoExtractor): _API_BASE = 'https://api.twitch.tv' _USHER_BASE = 'http://usher.twitch.tv' _LOGIN_URL = 'https://secure.twitch.tv/user/login' + _NETRC_MACHINE = 'twitch' def _handle_error(self, response): if not isinstance(response, dict): @@ -34,7 +35,15 @@ class TwitchBaseIE(InfoExtractor): expected=True) def _download_json(self, url, video_id, note='Downloading JSON metadata'): - response = super(TwitchBaseIE, self)._download_json(url, video_id, note) + headers = { + 'Referer': 'http://api.twitch.tv/crossdomain/receiver.html?v=2', + 'X-Requested-With': 'XMLHttpRequest', + } + for cookie in self._downloader.cookiejar: + if cookie.name == 'api_token': + headers['Twitch-Api-Token'] = cookie.value + request = compat_urllib_request.Request(url, headers=headers) + response = super(TwitchBaseIE, self)._download_json(request, video_id, note) self._handle_error(response) return response diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 7dea8c59d..cc384adbf 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -31,7 +31,7 @@ class VKIE(InfoExtractor): 'id': '162222515', 'ext': 'flv', 'title': 'ProtivoGunz - Хуёвая песня', - 'uploader': 're:Noize MC.*', + 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', 'duration': 195, 'upload_date': '20120212', }, @@ -140,7 +140,7 @@ class VKIE(InfoExtractor): if not video_id: video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id')) - info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id + info_url = 'http://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id info_page = self._download_webpage(info_url, video_id) ERRORS = { @@ -152,7 +152,10 @@ class VKIE(InfoExtractor): 'use --username and --password options to provide account credentials.', r'<!>Unknown error': - 'Video %s does not exist.' + 'Video %s does not exist.', + + r'<!>Видео временно недоступно': + 'Video %s is temporarily unavailable.', } for error_re, error_msg in ERRORS.items(): diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 886ce9613..a4ca8adc4 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -8,11 +8,11 @@ import sys from .downloader.external import list_external_downloaders from .compat import ( compat_expanduser, + compat_get_terminal_size, compat_getenv, compat_kwargs, ) from .utils import ( - get_term_width, write_string, ) from .version import __version__ @@ -100,7 +100,7 @@ def parseOpts(overrideArguments=None): return opts # No need to wrap help messages if we're on a wide console - columns = get_term_width() + columns = compat_get_terminal_size().columns max_width = columns if columns else 80 max_help_position = 80 @@ -195,6 +195,12 @@ def parseOpts(overrideArguments=None): action='store_const', const='::', dest='source_address', help='Make all connections via IPv6 (experimental)', ) + network.add_option( + '--cn-verification-proxy', + dest='cn_verification_proxy', default=None, metavar='URL', + help='Use this proxy to verify the IP address for some Chinese sites. ' + 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)' + ) selection = optparse.OptionGroup(parser, 'Video Selection') selection.add_option( @@ -435,8 +441,12 @@ def parseOpts(overrideArguments=None): downloader.add_option( '--external-downloader', dest='external_downloader', metavar='COMMAND', - help='(experimental) Use the specified external downloader. ' + help='Use the specified external downloader. ' 'Currently supports %s' % ','.join(list_external_downloaders())) + downloader.add_option( + '--external-downloader-args', + dest='external_downloader_args', metavar='ARGS', + help='Give these arguments to the external downloader.') workarounds = optparse.OptionGroup(parser, 'Workarounds') workarounds.add_option( @@ -751,6 +761,10 @@ def parseOpts(overrideArguments=None): '--exec', metavar='CMD', dest='exec_cmd', help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'') + postproc.add_option( + '--convert-subtitles', '--convert-subs', + metavar='FORMAT', dest='convertsubtitles', default=None, + help='Convert the subtitles to other format (currently supported: srt|ass|vtt)') parser.add_option_group(general) parser.add_option_group(network) diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py index 0ffbca258..708df3dd4 100644 --- a/youtube_dl/postprocessor/__init__.py +++ b/youtube_dl/postprocessor/__init__.py @@ -11,6 +11,7 @@ from .ffmpeg import ( FFmpegMergerPP, FFmpegMetadataPP, FFmpegVideoConvertorPP, + FFmpegSubtitlesConvertorPP, ) from .xattrpp import XAttrMetadataPP from .execafterdownload import ExecAfterDownloadPP @@ -31,6 +32,7 @@ __all__ = [ 'FFmpegMergerPP', 'FFmpegMetadataPP', 'FFmpegPostProcessor', + 'FFmpegSubtitlesConvertorPP', 'FFmpegVideoConvertorPP', 'XAttrMetadataPP', ] diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 398fe050e..30094c2f3 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import io import os import subprocess import sys @@ -635,3 +636,40 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor): os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return True, info + + +class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): + def __init__(self, downloader=None, format=None): + super(FFmpegSubtitlesConvertorPP, self).__init__(downloader) + self.format = format + + def run(self, info): + subs = info.get('requested_subtitles') + filename = info['filepath'] + new_ext = self.format + new_format = new_ext + if new_format == 'vtt': + new_format = 'webvtt' + if subs is None: + self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert') + return True, info + self._downloader.to_screen('[ffmpeg] Converting subtitles') + for lang, sub in subs.items(): + ext = sub['ext'] + if ext == new_ext: + self._downloader.to_screen( + '[ffmpeg] Subtitle file for %s is already in the requested' + 'format' % new_ext) + continue + new_file = subtitles_filename(filename, lang, new_ext) + self.run_ffmpeg( + subtitles_filename(filename, lang, ext), + new_file, ['-f', new_format]) + + with io.open(new_file, 'rt', encoding='utf-8') as f: + subs[lang] = { + 'ext': ext, + 'data': f.read(), + } + + return True, info diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1f3bfef7d..7426e2a1f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -35,7 +35,6 @@ import zlib from .compat import ( compat_basestring, compat_chr, - compat_getenv, compat_html_entities, compat_http_client, compat_parse_qs, @@ -306,6 +305,7 @@ def sanitize_filename(s, restricted=False, is_id=False): result = result[2:] if result.startswith('-'): result = '_' + result[len('-'):] + result = result.lstrip('.') if not result: result = '_' return result @@ -1173,22 +1173,6 @@ def parse_filesize(s): return int(float(num_str) * mult) -def get_term_width(): - columns = compat_getenv('COLUMNS', None) - if columns: - return int(columns) - - try: - sp = subprocess.Popen( - ['stty', 'size'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = sp.communicate() - return int(out.split()[1]) - except: - pass - return None - - def month_by_name(name): """ Return the number of a month by (locale-independently) English name """ @@ -1784,3 +1768,24 @@ def match_filter_func(filter_str): video_title = info_dict.get('title', info_dict.get('id', 'video')) return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) return _match_func + + +class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): + def __init__(self, proxies=None): + # Set default handlers + for type in ('http', 'https'): + setattr(self, '%s_open' % type, + lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: + meth(r, proxy, type)) + return compat_urllib_request.ProxyHandler.__init__(self, proxies) + + def proxy_open(self, req, proxy, type): + req_proxy = req.headers.get('Ytdl-request-proxy') + if req_proxy is not None: + proxy = req_proxy + del req.headers['Ytdl-request-proxy'] + + if proxy == '__noproxy__': + return None # No Proxy + return compat_urllib_request.ProxyHandler.proxy_open( + self, req, proxy, type) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index cf3e28bbe..252933993 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.02.26.2' +__version__ = '2015.03.03.1' |