aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.travis.yml1
-rw-r--r--AUTHORS6
-rw-r--r--CONTRIBUTING.md4
-rw-r--r--Makefile2
-rw-r--r--README.md438
-rw-r--r--devscripts/check-porn.py2
-rw-r--r--devscripts/generate_aes_testdata.py42
-rw-r--r--docs/supportedsites.md26
-rw-r--r--test/test_YoutubeDL.py116
-rw-r--r--test/test_aes.py55
-rw-r--r--test/test_all_urls.py12
-rw-r--r--test/test_execution.py9
-rw-r--r--test/test_http.py49
-rw-r--r--test/test_netrc.py26
-rw-r--r--test/test_postprocessors.py17
-rw-r--r--test/test_subtitles.py13
-rw-r--r--test/test_unicode_literals.py11
-rw-r--r--test/test_utils.py76
-rw-r--r--tox.ini7
-rwxr-xr-xyoutube_dl/YoutubeDL.py50
-rw-r--r--youtube_dl/__init__.py11
-rw-r--r--youtube_dl/compat.py2
-rw-r--r--youtube_dl/downloader/common.py6
-rw-r--r--youtube_dl/downloader/external.py15
-rw-r--r--youtube_dl/downloader/f4m.py4
-rw-r--r--youtube_dl/downloader/http.py10
-rw-r--r--youtube_dl/downloader/rtmp.py2
-rw-r--r--youtube_dl/extractor/__init__.py36
-rw-r--r--youtube_dl/extractor/adultswim.py44
-rw-r--r--youtube_dl/extractor/aftenposten.py11
-rw-r--r--youtube_dl/extractor/ard.py3
-rw-r--r--youtube_dl/extractor/arte.py1
-rw-r--r--youtube_dl/extractor/atresplayer.py1
-rw-r--r--youtube_dl/extractor/beatportpro.py103
-rw-r--r--youtube_dl/extractor/breakcom.py2
-rw-r--r--youtube_dl/extractor/cloudy.py3
-rw-r--r--youtube_dl/extractor/cnn.py5
-rw-r--r--youtube_dl/extractor/common.py16
-rw-r--r--youtube_dl/extractor/crunchyroll.py9
-rw-r--r--youtube_dl/extractor/dailymotion.py18
-rw-r--r--youtube_dl/extractor/dhm.py73
-rw-r--r--youtube_dl/extractor/douyutv.py112
-rw-r--r--youtube_dl/extractor/dumpert.py56
-rw-r--r--youtube_dl/extractor/eagleplatform.py98
-rw-r--r--youtube_dl/extractor/eighttracks.py14
-rw-r--r--youtube_dl/extractor/eroprofile.py52
-rw-r--r--youtube_dl/extractor/extremetube.py30
-rw-r--r--youtube_dl/extractor/footyroom.py41
-rw-r--r--youtube_dl/extractor/funnyordie.py9
-rw-r--r--youtube_dl/extractor/gazeta.py38
-rw-r--r--youtube_dl/extractor/gdcvault.py1
-rw-r--r--youtube_dl/extractor/generic.py155
-rw-r--r--youtube_dl/extractor/globo.py2
-rw-r--r--youtube_dl/extractor/grooveshark.py6
-rw-r--r--youtube_dl/extractor/jeuxvideo.py36
-rw-r--r--youtube_dl/extractor/kanalplay.py96
-rw-r--r--youtube_dl/extractor/krasview.py6
-rw-r--r--youtube_dl/extractor/letv.py37
-rw-r--r--youtube_dl/extractor/libsyn.py59
-rw-r--r--youtube_dl/extractor/livestream.py41
-rw-r--r--youtube_dl/extractor/lrt.py1
-rw-r--r--youtube_dl/extractor/lynda.py157
-rw-r--r--youtube_dl/extractor/mixcloud.py69
-rw-r--r--youtube_dl/extractor/mlb.py6
-rw-r--r--youtube_dl/extractor/nbc.py53
-rw-r--r--youtube_dl/extractor/niconico.py28
-rw-r--r--youtube_dl/extractor/npo.py9
-rw-r--r--youtube_dl/extractor/nrk.py80
-rw-r--r--youtube_dl/extractor/nytimes.py40
-rw-r--r--youtube_dl/extractor/orf.py94
-rw-r--r--youtube_dl/extractor/phoenix.py40
-rw-r--r--youtube_dl/extractor/pladform.py90
-rw-r--r--youtube_dl/extractor/playfm.py87
-rw-r--r--youtube_dl/extractor/playwire.py78
-rw-r--r--youtube_dl/extractor/pornhub.py17
-rw-r--r--youtube_dl/extractor/primesharetv.py69
-rw-r--r--youtube_dl/extractor/redtube.py9
-rw-r--r--youtube_dl/extractor/rtve.py41
-rw-r--r--youtube_dl/extractor/safari.py157
-rw-r--r--youtube_dl/extractor/slideshare.py2
-rw-r--r--youtube_dl/extractor/sohu.py93
-rw-r--r--youtube_dl/extractor/soundcloud.py11
-rw-r--r--youtube_dl/extractor/ssa.py58
-rw-r--r--youtube_dl/extractor/teamcoco.py6
-rw-r--r--youtube_dl/extractor/theplatform.py2
-rw-r--r--youtube_dl/extractor/tvplay.py3
-rw-r--r--youtube_dl/extractor/twentytwotracks.py86
-rw-r--r--youtube_dl/extractor/twitch.py40
-rw-r--r--youtube_dl/extractor/ultimedia.py103
-rw-r--r--youtube_dl/extractor/varzesh3.py45
-rw-r--r--youtube_dl/extractor/vessel.py127
-rw-r--r--youtube_dl/extractor/videomega.py45
-rw-r--r--youtube_dl/extractor/vidme.py15
-rw-r--r--youtube_dl/extractor/viewster.py129
-rw-r--r--youtube_dl/extractor/vimeo.py86
-rw-r--r--youtube_dl/extractor/vine.py15
-rw-r--r--youtube_dl/extractor/vk.py9
-rw-r--r--youtube_dl/extractor/xuite.py14
-rw-r--r--youtube_dl/extractor/yahoo.py15
-rw-r--r--youtube_dl/extractor/yam.py37
-rw-r--r--youtube_dl/extractor/yandexmusic.py127
-rw-r--r--youtube_dl/extractor/youporn.py5
-rw-r--r--youtube_dl/extractor/youtube.py62
-rw-r--r--youtube_dl/options.py28
-rw-r--r--youtube_dl/postprocessor/__init__.py2
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py21
-rw-r--r--youtube_dl/postprocessor/metadatafromtitle.py47
-rw-r--r--youtube_dl/update.py4
-rw-r--r--youtube_dl/utils.py60
-rw-r--r--youtube_dl/version.py2
110 files changed, 3771 insertions, 879 deletions
diff --git a/.travis.yml b/.travis.yml
index fb34299fc..511bee64c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,7 @@ language: python
python:
- "2.6"
- "2.7"
+ - "3.2"
- "3.3"
- "3.4"
before_install:
diff --git a/AUTHORS b/AUTHORS
index 4674a5af3..48be31e29 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -113,3 +113,9 @@ Robin de Rooij
Ryan Schmidt
Leslie P. Polzer
Duncan Keall
+Alexander Mamay
+Devin J. Pohly
+Eduardo Ferro Aldama
+Jeff Buchbinder
+Amish Bhadeshia
+Joram Schrijver
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 351229f21..588b15bde 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -18,7 +18,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
-Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
+If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
+
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
### Are you using the latest version?
diff --git a/Makefile b/Makefile
index c6c76274f..fdb1abb60 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
- find -name "*.pyc" -delete
+ find . -name "*.pyc" -delete
PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin
diff --git a/README.md b/README.md
index 04f664cd3..4f9fc8174 100644
--- a/README.md
+++ b/README.md
@@ -47,211 +47,109 @@ which means you can modify it, redistribute it or use it however you like.
# OPTIONS
-h, --help print this help text and exit
--version print program version and exit
- -U, --update update this program to latest version. Make
- sure that you have sufficient permissions
- (run with sudo if needed)
- -i, --ignore-errors continue on download errors, for example to
- skip unavailable videos in a playlist
- --abort-on-error Abort downloading of further videos (in the
- playlist or the command line) if an error
- occurs
+ -U, --update update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
+ -i, --ignore-errors continue on download errors, for example to skip unavailable videos in a playlist
+ --abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
--dump-user-agent display the current browser identification
- --list-extractors List all supported extractors and the URLs
- they would handle
- --extractor-descriptions Output descriptions of all supported
- extractors
- --default-search PREFIX Use this prefix for unqualified URLs. For
- example "gvsearch2:" downloads two videos
- from google videos for youtube-dl "large
- apple". Use the value "auto" to let
- youtube-dl guess ("auto_warning" to emit a
- warning when guessing). "error" just throws
- an error. The default value "fixup_error"
- repairs broken URLs, but emits an error if
- this is not possible instead of searching.
- --ignore-config Do not read configuration files. When given
- in the global configuration file /etc
- /youtube-dl.conf: Do not read the user
- configuration in ~/.config/youtube-
- dl/config (%APPDATA%/youtube-dl/config.txt
- on Windows)
- --flat-playlist Do not extract the videos of a playlist,
- only list them.
+ --list-extractors List all supported extractors and the URLs they would handle
+ --extractor-descriptions Output descriptions of all supported extractors
+ --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
+ Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
+ default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
+ --ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration
+ in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
+ --flat-playlist Do not extract the videos of a playlist, only list them.
--no-color Do not emit color codes in output.
## Network Options:
- --proxy URL Use the specified HTTP/HTTPS proxy. Pass in
- an empty string (--proxy "") for direct
- connection
+ --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection
--socket-timeout SECONDS Time to wait before giving up, in seconds
- --source-address IP Client-side IP address to bind to
- (experimental)
- -4, --force-ipv4 Make all connections via IPv4
- (experimental)
- -6, --force-ipv6 Make all connections via IPv6
- (experimental)
+ --source-address IP Client-side IP address to bind to (experimental)
+ -4, --force-ipv4 Make all connections via IPv4 (experimental)
+ -6, --force-ipv6 Make all connections via IPv6 (experimental)
+ --cn-verification-proxy URL Use this proxy to verify the IP address for some Chinese sites. The default proxy specified by --proxy (or none, if the options is
+ not present) is used for the actual downloading. (experimental)
## Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1)
--playlist-end NUMBER playlist video to end at (default is last)
- --playlist-items ITEM_SPEC playlist video items to download. Specify
- indices of the videos in the playlist
- seperated by commas like: "--playlist-items
- 1,2,5,8" if you want to download videos
- indexed 1, 2, 5, 8 in the playlist. You can
- specify range: "--playlist-items
- 1-3,7,10-13", it will download the videos
- at index 1, 2, 3, 7, 10, 11, 12 and 13.
- --match-title REGEX download only matching titles (regex or
- caseless sub-string)
- --reject-title REGEX skip download for matching titles (regex or
- caseless sub-string)
+ --playlist-items ITEM_SPEC playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
+ if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
+ download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
+ --match-title REGEX download only matching titles (regex or caseless sub-string)
+ --reject-title REGEX skip download for matching titles (regex or caseless sub-string)
--max-downloads NUMBER Abort after downloading NUMBER files
- --min-filesize SIZE Do not download any videos smaller than
- SIZE (e.g. 50k or 44.6m)
- --max-filesize SIZE Do not download any videos larger than SIZE
- (e.g. 50k or 44.6m)
+ --min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)
+ --max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m)
--date DATE download only videos uploaded in this date
- --datebefore DATE download only videos uploaded on or before
- this date (i.e. inclusive)
- --dateafter DATE download only videos uploaded on or after
- this date (i.e. inclusive)
- --min-views COUNT Do not download any videos with less than
- COUNT views
- --max-views COUNT Do not download any videos with more than
- COUNT views
- --match-filter FILTER (Experimental) Generic video filter.
- Specify any key (see help for -o for a list
- of available keys) to match if the key is
- present, !key to check if the key is not
- present,key > NUMBER (like "comment_count >
- 12", also works with >=, <, <=, !=, =) to
- compare against a number, and & to require
- multiple matches. Values which are not
- known are excluded unless you put a
- question mark (?) after the operator.For
- example, to only match videos that have
- been liked more than 100 times and disliked
- less than 50 times (or the dislike
- functionality is not available at the given
- service), but who also have a description,
- use --match-filter "like_count > 100 &
+ --datebefore DATE download only videos uploaded on or before this date (i.e. inclusive)
+ --dateafter DATE download only videos uploaded on or after this date (i.e. inclusive)
+ --min-views COUNT Do not download any videos with less than COUNT views
+ --max-views COUNT Do not download any videos with more than COUNT views
+ --match-filter FILTER (Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present,
+ !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against
+ a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the
+ operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike
+ functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 &
dislike_count <? 50 & description" .
- --no-playlist If the URL refers to a video and a
- playlist, download only the video.
- --yes-playlist If the URL refers to a video and a
- playlist, download the playlist.
- --age-limit YEARS download only videos suitable for the given
- age
- --download-archive FILE Download only videos not listed in the
- archive file. Record the IDs of all
- downloaded videos in it.
- --include-ads Download advertisements as well
- (experimental)
+ --no-playlist If the URL refers to a video and a playlist, download only the video.
+ --yes-playlist If the URL refers to a video and a playlist, download the playlist.
+ --age-limit YEARS download only videos suitable for the given age
+ --download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
+ --include-ads Download advertisements as well (experimental)
## Download Options:
- -r, --rate-limit LIMIT maximum download rate in bytes per second
- (e.g. 50K or 4.2M)
- -R, --retries RETRIES number of retries (default is 10), or
- "infinite".
- --buffer-size SIZE size of download buffer (e.g. 1024 or 16K)
- (default is 1024)
- --no-resize-buffer do not automatically adjust the buffer
- size. By default, the buffer size is
- automatically resized from an initial value
- of SIZE.
+ -r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. 50K or 4.2M)
+ -R, --retries RETRIES number of retries (default is 10), or "infinite".
+ --buffer-size SIZE size of download buffer (e.g. 1024 or 16K) (default is 1024)
+ --no-resize-buffer do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
--playlist-reverse Download playlist videos in reverse order
- --xattr-set-filesize (experimental) set file xattribute
- ytdl.filesize with expected filesize
- --hls-prefer-native (experimental) Use the native HLS
- downloader instead of ffmpeg.
- --external-downloader COMMAND (experimental) Use the specified external
- downloader. Currently supports
- aria2c,curl,wget
+ --xattr-set-filesize (experimental) set file xattribute ytdl.filesize with expected filesize
+ --hls-prefer-native (experimental) Use the native HLS downloader instead of ffmpeg.
+ --external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
+ --external-downloader-args ARGS Give these arguments to the external downloader.
## Filesystem Options:
- -a, --batch-file FILE file containing URLs to download ('-' for
- stdin)
+ -a, --batch-file FILE file containing URLs to download ('-' for stdin)
--id use only video ID in file name
- -o, --output TEMPLATE output filename template. Use %(title)s to
- get the title, %(uploader)s for the
- uploader name, %(uploader_id)s for the
- uploader nickname if different,
- %(autonumber)s to get an automatically
- incremented number, %(ext)s for the
- filename extension, %(format)s for the
- format description (like "22 - 1280x720" or
- "HD"), %(format_id)s for the unique id of
- the format (like Youtube's itags: "137"),
- %(upload_date)s for the upload date
- (YYYYMMDD), %(extractor)s for the provider
- (youtube, metacafe, etc), %(id)s for the
- video id, %(playlist_title)s,
- %(playlist_id)s, or %(playlist)s (=title if
- present, ID otherwise) for the playlist the
- video is in, %(playlist_index)s for the
- position in the playlist. %(height)s and
- %(width)s for the width and height of the
- video format. %(resolution)s for a textual
- description of the resolution of the video
- format. %% for a literal percent. Use - to
- output to stdout. Can also be used to
- download to a different directory, for
- example with -o '/my/downloads/%(uploader)s
- /%(title)s-%(id)s.%(ext)s' .
- --autonumber-size NUMBER Specifies the number of digits in
- %(autonumber)s when it is present in output
- filename template or --auto-number option
- is given
- --restrict-filenames Restrict filenames to only ASCII
- characters, and avoid "&" and spaces in
- filenames
- -A, --auto-number [deprecated; use -o
- "%(autonumber)s-%(title)s.%(ext)s" ] number
- downloaded files starting from 00000
- -t, --title [deprecated] use title in file name
- (default)
+ -o, --output TEMPLATE output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
+ nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for
+ the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube's itags: "137"),
+ %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id,
+ %(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in,
+ %(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format.
+ %(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout.
+ Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
+ --autonumber-size NUMBER Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
+ --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames
+ -A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000
+ -t, --title [deprecated] use title in file name (default)
-l, --literal [deprecated] alias of --title
-w, --no-overwrites do not overwrite files
- -c, --continue force resume of partially downloaded files.
- By default, youtube-dl will resume
- downloads if possible.
- --no-continue do not resume partially downloaded files
- (restart from beginning)
- --no-part do not use .part files - write directly
- into output file
- --no-mtime do not use the Last-modified header to set
- the file modification time
- --write-description write video description to a .description
- file
+ -c, --continue force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
+ --no-continue do not resume partially downloaded files (restart from beginning)
+ --no-part do not use .part files - write directly into output file
+ --no-mtime do not use the Last-modified header to set the file modification time
+ --write-description write video description to a .description file
--write-info-json write video metadata to a .info.json file
- --write-annotations write video annotations to a .annotation
- file
- --load-info FILE json file containing the video information
- (created with the "--write-json" option)
- --cookies FILE file to read cookies from and dump cookie
- jar in
- --cache-dir DIR Location in the filesystem where youtube-dl
- can store some downloaded information
- permanently. By default $XDG_CACHE_HOME
- /youtube-dl or ~/.cache/youtube-dl . At the
- moment, only YouTube player files (for
- videos with obfuscated signatures) are
- cached, but that may change.
+ --write-annotations write video annotations to a .annotation file
+ --load-info FILE json file containing the video information (created with the "--write-json" option)
+ --cookies FILE file to read cookies from and dump cookie jar in
+ --cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
+ or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may
+ change.
--no-cache-dir Disable filesystem caching
--rm-cache-dir Delete all filesystem cache files
## Thumbnail images:
--write-thumbnail write thumbnail image to disk
--write-all-thumbnails write all thumbnail image formats to disk
- --list-thumbnails Simulate and list all available thumbnail
- formats
+ --list-thumbnails Simulate and list all available thumbnail formats
## Verbosity / Simulation Options:
-q, --quiet activates quiet mode
--no-warnings Ignore warnings
- -s, --simulate do not download the video and do not write
- anything to disk
+ -s, --simulate do not download the video and do not write anything to disk
--skip-download do not download the video
-g, --get-url simulate, quiet but print URL
-e, --get-title simulate, quiet but print title
@@ -261,155 +159,87 @@ which means you can modify it, redistribute it or use it however you like.
--get-duration simulate, quiet but print video length
--get-filename simulate, quiet but print output filename
--get-format simulate, quiet but print output format
- -j, --dump-json simulate, quiet but print JSON information.
- See --output for a description of available
- keys.
- -J, --dump-single-json simulate, quiet but print JSON information
- for each command-line argument. If the URL
- refers to a playlist, dump the whole
- playlist information in a single line.
- --print-json Be quiet and print the video information as
- JSON (video is still being downloaded).
+ -j, --dump-json simulate, quiet but print JSON information. See --output for a description of available keys.
+ -J, --dump-single-json simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
+ information in a single line.
+ --print-json Be quiet and print the video information as JSON (video is still being downloaded).
--newline output progress bar as new lines
--no-progress do not print progress bar
--console-title display progress in console titlebar
-v, --verbose print various debugging information
- --dump-intermediate-pages print downloaded pages to debug problems
- (very verbose)
- --write-pages Write downloaded intermediary pages to
- files in the current directory to debug
- problems
+ --dump-pages print downloaded pages to debug problems (very verbose)
+ --write-pages Write downloaded intermediary pages to files in the current directory to debug problems
--print-traffic Display sent and read HTTP traffic
- -C, --call-home Contact the youtube-dl server for
- debugging.
- --no-call-home Do NOT contact the youtube-dl server for
- debugging.
+ -C, --call-home Contact the youtube-dl server for debugging.
+ --no-call-home Do NOT contact the youtube-dl server for debugging.
## Workarounds:
--encoding ENCODING Force the specified encoding (experimental)
--no-check-certificate Suppress HTTPS certificate validation.
- --prefer-insecure Use an unencrypted connection to retrieve
- information about the video. (Currently
- supported only for YouTube)
+ --prefer-insecure Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)
--user-agent UA specify a custom user agent
- --referer URL specify a custom referer, use if the video
- access is restricted to one domain
- --add-header FIELD:VALUE specify a custom HTTP header and its value,
- separated by a colon ':'. You can use this
- option multiple times
- --bidi-workaround Work around terminals that lack
- bidirectional text support. Requires bidiv
- or fribidi executable in PATH
- --sleep-interval SECONDS Number of seconds to sleep before each
- download.
+ --referer URL specify a custom referer, use if the video access is restricted to one domain
+ --add-header FIELD:VALUE specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
+ --bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH
+ --sleep-interval SECONDS Number of seconds to sleep before each download.
## Video Format Options:
- -f, --format FORMAT video format code, specify the order of
- preference using slashes, as in -f 22/17/18
- . Instead of format codes, you can select
- by extension for the extensions aac, m4a,
- mp3, mp4, ogg, wav, webm. You can also use
- the special names "best", "bestvideo",
- "bestaudio", "worst". You can filter the
- video results by putting a condition in
- brackets, as in -f "best[height=720]" (or
- -f "[filesize>10M]"). This works for
- filesize, height, width, tbr, abr, vbr,
- asr, and fps and the comparisons <, <=, >,
- >=, =, != and for ext, acodec, vcodec,
- container, and protocol and the comparisons
- =, != . Formats for which the value is not
- known are excluded unless you put a
- question mark (?) after the operator. You
- can combine format filters, so -f "[height
- <=? 720][tbr>500]" selects up to 720p
- videos (or videos where the height is not
- known) with a bitrate of at least 500
- KBit/s. By default, youtube-dl will pick
- the best quality. Use commas to download
- multiple audio formats, such as -f
- 136/137/mp4/bestvideo,140/m4a/bestaudio.
- You can merge the video and audio of two
- formats into a single file using -f <video-
- format>+<audio-format> (requires ffmpeg or
- avconv), for example -f
+ -f, --format FORMAT video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by
+ extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio",
+ "worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").
+ This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec,
+ vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a
+ question mark (?) after the operator. You can combine format filters, so -f "[height <=? 720][tbr>500]" selects up to 720p videos
+ (or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality.
+ Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio
+ of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f
bestvideo+bestaudio.
--all-formats download all available video formats
- --prefer-free-formats prefer free video formats unless a specific
- one is requested
+ --prefer-free-formats prefer free video formats unless a specific one is requested
--max-quality FORMAT highest quality format to download
-F, --list-formats list all available formats
- --youtube-skip-dash-manifest Do not download the DASH manifest on
- YouTube videos
- --merge-output-format FORMAT If a merge is required (e.g.
- bestvideo+bestaudio), output to given
- container format. One of mkv, mp4, ogg,
- webm, flv.Ignored if no merge is required
+ --youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
+ --merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
+ merge is required
## Subtitle Options:
--write-sub write subtitle file
- --write-auto-sub write automatic subtitle file (youtube
- only)
- --all-subs downloads all the available subtitles of
- the video
+ --write-auto-sub write automatic subtitle file (youtube only)
+ --all-subs downloads all the available subtitles of the video
--list-subs lists all available subtitles for the video
- --sub-format FORMAT subtitle format, accepts formats
- preference, for example: "ass/srt/best"
- --sub-lang LANGS languages of the subtitles to download
- (optional) separated by commas, use IETF
- language tags like 'en,pt'
+ --sub-format FORMAT subtitle format, accepts formats preference, for example: "ass/srt/best"
+ --sub-lang LANGS languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
## Authentication Options:
-u, --username USERNAME login with this account ID
- -p, --password PASSWORD account password. If this option is left
- out, youtube-dl will ask interactively.
+ -p, --password PASSWORD account password. If this option is left out, youtube-dl will ask interactively.
-2, --twofactor TWOFACTOR two-factor auth code
-n, --netrc use .netrc authentication data
--video-password PASSWORD video password (vimeo, smotri)
## Post-processing Options:
- -x, --extract-audio convert video files to audio-only files
- (requires ffmpeg or avconv and ffprobe or
- avprobe)
- --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a",
- "opus", or "wav"; "best" by default
- --audio-quality QUALITY ffmpeg/avconv audio quality specification,
- insert a value between 0 (better) and 9
- (worse) for VBR or a specific bitrate like
- 128K (default 5)
- --recode-video FORMAT Encode the video to another format if
- necessary (currently supported:
- mp4|flv|ogg|webm|mkv)
- -k, --keep-video keeps the video file on disk after the
- post-processing; the video is erased by
- default
- --no-post-overwrites do not overwrite post-processed files; the
- post-processed files are overwritten by
- default
- --embed-subs embed subtitles in the video (only for mp4
- videos)
+ -x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
+ --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
+ --audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K
+ (default 5)
+ --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
+ -k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default
+ --no-post-overwrites do not overwrite post-processed files; the post-processed files are overwritten by default
+ --embed-subs embed subtitles in the video (only for mp4 videos)
--embed-thumbnail embed thumbnail in the audio as cover art
--add-metadata write metadata to the video file
- --xattrs write metadata to the video file's xattrs
- (using dublin core and xdg standards)
- --fixup POLICY Automatically correct known faults of the
- file. One of never (do nothing), warn (only
- emit a warning), detect_or_warn(the
- default; fix file if we can, warn
- otherwise)
- --prefer-avconv Prefer avconv over ffmpeg for running the
- postprocessors (default)
- --prefer-ffmpeg Prefer ffmpeg over avconv for running the
- postprocessors
- --ffmpeg-location PATH Location of the ffmpeg/avconv binary;
- either the path to the binary or its
- containing directory.
- --exec CMD Execute a command on the file after
- downloading, similar to find's -exec
- syntax. Example: --exec 'adb push {}
- /sdcard/Music/ && rm {}'
- --convert-subtitles FORMAT Convert the subtitles to other format
- (currently supported: srt|ass|vtt)
+ --metadata-from-title FORMAT parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
+ parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
+ %(title)s" matches a title like "Coldplay - Paradise"
+ --xattrs write metadata to the video file's xattrs (using dublin core and xdg standards)
+ --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
+ fix file if we can, warn otherwise)
+ --prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
+ --prefer-ffmpeg Prefer ffmpeg over avconv for running the postprocessors
+ --ffmpeg-location PATH Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.
+ --exec CMD Execute a command on the file after downloading, similar to find's -exec syntax. Example: --exec 'adb push {} /sdcard/Music/ && rm
+ {}'
+ --convert-subtitles FORMAT Convert the subtitles to other format (currently supported: srt|ass|vtt)
# CONFIGURATION
@@ -529,6 +359,10 @@ YouTube requires an additional signature since September 2012 which is not suppo
In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
+### HTTP Error 429: Too Many Requests or 402: Payment Required
+
+These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--network-address` options](#network-options) to select another IP address.
+
### SyntaxError: Non-ASCII character ###
The error
@@ -573,6 +407,18 @@ A note on the service that they don't host the infringing content, but just link
Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
+### How can I speed up work on my issue?
+
+(Also known as: Help, my important issue not being solved!) The youtube-dl core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do:
+
+First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the youtube-dl project has grown too large to use personal email as an effective communication channel.
+
+Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the youtube-dl developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues.
+
+If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so).
+
+Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in youtube-dl version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`.
+
### How can I detect whether a given URL is supported by youtube-dl?
For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
@@ -672,6 +518,7 @@ youtube-dl makes the best effort to be a good command-line program, and thus sho
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
```python
+from __future__ import unicode_literals
import youtube_dl
ydl_opts = {}
@@ -684,6 +531,7 @@ Most likely, you'll want to use various options. For a list of what can be done,
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
```python
+from __future__ import unicode_literals
import youtube_dl
@@ -741,7 +589,9 @@ If your report is shorter than two lines, it is almost certainly missing some of
For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the -v flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
-Site support requests **must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
+If your server has multiple IPs or you suspect censorship, adding --call-home may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
+
+**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like http://www.youtube.com/watch?v=BaW_jenozKc . There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. http://www.youtube.com/ ) is *not* an example URL.
### Are you using the latest version?
diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py
index 6a5bd9eda..7a219ebe9 100644
--- a/devscripts/check-porn.py
+++ b/devscripts/check-porn.py
@@ -28,7 +28,7 @@ for test in get_testcases():
if METHOD == 'EURISTIC':
try:
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
- except:
+ except Exception:
print('\nFail: {0}'.format(test['name']))
continue
diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py
new file mode 100644
index 000000000..2e389fc8e
--- /dev/null
+++ b/devscripts/generate_aes_testdata.py
@@ -0,0 +1,42 @@
+from __future__ import unicode_literals
+
+import codecs
+import subprocess
+
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.utils import intlist_to_bytes
+from youtube_dl.aes import aes_encrypt, key_expansion
+
+secret_msg = b'Secret message goes here'
+
+
+def hex_str(int_list):
+ return codecs.encode(intlist_to_bytes(int_list), 'hex')
+
+
+def openssl_encode(algo, key, iv):
+ cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
+ prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+ out, _ = prog.communicate(secret_msg)
+ return out
+
+iv = key = [0x20, 0x15] + 14 * [0]
+
+r = openssl_encode('aes-128-cbc', key, iv)
+print('aes_cbc_decrypt')
+print(repr(r))
+
+password = key
+new_key = aes_encrypt(password, key_expansion(password))
+r = openssl_encode('aes-128-ctr', new_key, iv)
+print('aes_decrypt_text 16')
+print(repr(r))
+
+password = key + 16 * [0]
+new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
+r = openssl_encode('aes-256-ctr', new_key, iv)
+print('aes_decrypt_text 32')
+print(repr(r))
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 062cb3d62..fd59cc2be 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -2,6 +2,8 @@
- **1tv**: Первый канал
- **1up.com**
- **220.ro**
+ - **22tracks:genre**
+ - **22tracks:track**
- **24video**
- **3sat**
- **4tube**
@@ -47,6 +49,7 @@
- **Bandcamp**
- **Bandcamp:album**
- **bbc.co.uk**: BBC iPlayer
+ - **BeatportPro**
- **Beeg**
- **BehindKink**
- **Bet**
@@ -111,12 +114,14 @@
- **Discovery**
- **divxstage**: DivxStage
- **Dotsub**
+ - **DouyuTV**
- **DRBonanza**
- **Dropbox**
- **DrTuber**
- **DRTV**
- **Dump**
- **dvtv**: http://video.aktualne.cz/
+ - **EaglePlatform**
- **EbaumsWorld**
- **EchoMsk**
- **eHow**
@@ -144,6 +149,7 @@
- **Firstpost**
- **Flickr**
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
+ - **FootyRoom**
- **Foxgay**
- **FoxNews**
- **france2.fr:generation-quoi**
@@ -161,6 +167,7 @@
- **GameSpot**
- **GameStar**
- **Gametrailers**
+ - **Gazeta**
- **GDCVault**
- **generic**: Generic downloader that works on some sites
- **GiantBomb**
@@ -211,6 +218,7 @@
- **jpopsuki.tv**
- **Jukebox**
- **Kaltura**
+ - **KanalPlay**: Kanal 5/9/11 Play
- **Kankan**
- **Karaoketv**
- **keek**
@@ -225,6 +233,7 @@
- **Letv**
- **LetvPlaylist**
- **LetvTv**
+ - **Libsyn**
- **lifenews**: LIFE | NEWS
- **LiveLeak**
- **livestream**
@@ -304,6 +313,7 @@
- **npo.nl:radio**
- **npo.nl:radio:fragment**
- **NRK**
+ - **NRKPlaylist**
- **NRKTV**
- **ntv.ru**
- **Nuvid**
@@ -315,6 +325,7 @@
- **Ooyala**
- **OpenFilm**
- **orf:fm4**: radio FM4
+ - **orf:iptv**: iptv.ORF.at
- **orf:oe1**: Radio Österreich 1
- **orf:tvthek**: ORF TVthek
- **parliamentlive.tv**: UK parliament videos
@@ -322,10 +333,12 @@
- **PBS**
- **Phoenix**
- **Photobucket**
+ - **Pladform**
- **PlanetaPlay**
- **play.fm**
- **played.to**
- **Playvid**
+ - **Playwire**
- **plus.google**: Google Plus
- **pluzz.francetv.fr**
- **podomatic**
@@ -334,6 +347,7 @@
- **PornHubPlaylist**
- **Pornotube**
- **PornoXO**
+ - **PrimeShareTV**
- **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital
- **Puls4**
@@ -359,6 +373,7 @@
- **RTP**
- **RTS**: RTS.ch
- **rtve.es:alacarta**: RTVE a la carta
+ - **rtve.es:infantil**: RTVE infantil
- **rtve.es:live**: RTVE.es live streams
- **RUHD**
- **rutube**: Rutube videos
@@ -367,6 +382,8 @@
- **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos
- **RUTV**: RUTV.RU
+ - **safari**: safaribooksonline.com online video
+ - **safari:course**: safaribooksonline.com online courses
- **Sandia**: Sandia National Laboratories
- **Sapo**: SAPO Vídeos
- **savefrom.net**
@@ -409,6 +426,7 @@
- **SportBox**
- **SportDeutschland**
- **SRMediathek**: Saarländischer Rundfunk
+ - **SSA**
- **stanfordoc**: Stanford Open ClassRoom
- **Steam**
- **streamcloud.eu**
@@ -478,6 +496,7 @@
- **Ubu**
- **udemy**
- **udemy:course**
+ - **Ultimedia**
- **Unistra**
- **Urort**: NRK P3 Urørt
- **ustream**
@@ -485,6 +504,7 @@
- **Vbox7**
- **VeeHD**
- **Veoh**
+ - **Vessel**
- **Vesti**: Вести.Ru
- **Vevo**
- **VGTV**
@@ -505,6 +525,7 @@
- **Vidzi**
- **vier**
- **vier:videos**
+ - **Viewster**
- **viki**
- **vimeo**
- **vimeo:album**
@@ -551,6 +572,9 @@
- **XXXYMovies**
- **Yahoo**: Yahoo screen and movies
- **Yam**
+ - **yandexmusic:album**: Яндекс.Музыка - Альбом
+ - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
+ - **yandexmusic:track**: Яндекс.Музыка - Трек
- **YesJapan**
- **Ynet**
- **YouJizz**
@@ -569,7 +593,7 @@
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
- - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
+ - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **Zapiks**
- **ZDF**
- **ZDFChannel**
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 055e42555..652519831 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -14,6 +14,9 @@ from test.helper import FakeYDL, assertRegexpMatches
from youtube_dl import YoutubeDL
from youtube_dl.extractor import YoutubeIE
from youtube_dl.postprocessor.common import PostProcessor
+from youtube_dl.utils import match_filter_func
+
+TEST_URL = 'http://localhost/sample.mp4'
class YDL(FakeYDL):
@@ -46,8 +49,8 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL()
ydl.params['prefer_free_formats'] = True
formats = [
- {'ext': 'webm', 'height': 460, 'url': 'x'},
- {'ext': 'mp4', 'height': 460, 'url': 'y'},
+ {'ext': 'webm', 'height': 460, 'url': TEST_URL},
+ {'ext': 'mp4', 'height': 460, 'url': TEST_URL},
]
info_dict = _make_result(formats)
yie = YoutubeIE(ydl)
@@ -60,8 +63,8 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL()
ydl.params['prefer_free_formats'] = True
formats = [
- {'ext': 'webm', 'height': 720, 'url': 'a'},
- {'ext': 'mp4', 'height': 1080, 'url': 'b'},
+ {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+ {'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
]
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
@@ -74,9 +77,9 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
- {'ext': 'webm', 'height': 720, 'url': '_'},
- {'ext': 'mp4', 'height': 720, 'url': '_'},
- {'ext': 'flv', 'height': 720, 'url': '_'},
+ {'ext': 'webm', 'height': 720, 'url': TEST_URL},
+ {'ext': 'mp4', 'height': 720, 'url': TEST_URL},
+ {'ext': 'flv', 'height': 720, 'url': TEST_URL},
]
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
@@ -88,8 +91,8 @@ class TestFormatSelection(unittest.TestCase):
ydl = YDL()
ydl.params['prefer_free_formats'] = False
formats = [
- {'ext': 'flv', 'height': 720, 'url': '_'},
- {'ext': 'webm', 'height': 720, 'url': '_'},
+ {'ext': 'flv', 'height': 720, 'url': TEST_URL},
+ {'ext': 'webm', 'height': 720, 'url': TEST_URL},
]
info_dict['formats'] = formats
yie = YoutubeIE(ydl)
@@ -133,10 +136,10 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection(self):
formats = [
- {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': '_'},
- {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': '_'},
- {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': '_'},
- {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': '_'},
+ {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+ {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
+ {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
+ {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
]
info_dict = _make_result(formats)
@@ -167,10 +170,10 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection_audio(self):
formats = [
- {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': '_'},
- {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': '_'},
- {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': '_'},
- {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': '_'},
+ {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL},
+ {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL},
]
info_dict = _make_result(formats)
@@ -185,8 +188,8 @@ class TestFormatSelection(unittest.TestCase):
self.assertEqual(downloaded['format_id'], 'audio-low')
formats = [
- {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': '_'},
- {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': '_'},
+ {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+ {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL},
]
info_dict = _make_result(formats)
@@ -228,9 +231,9 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection_video(self):
formats = [
- {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': '_'},
- {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': '_'},
- {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': '_'},
+ {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL},
+ {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL},
]
info_dict = _make_result(formats)
@@ -337,6 +340,8 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'G')
+
+class TestYoutubeDL(unittest.TestCase):
def test_subtitles(self):
def s_formats(lang, autocaption=False):
return [{
@@ -459,6 +464,73 @@ class TestFormatSelection(unittest.TestCase):
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
os.unlink(audiofile)
+ def test_match_filter(self):
+ class FilterYDL(YDL):
+ def __init__(self, *args, **kwargs):
+ super(FilterYDL, self).__init__(*args, **kwargs)
+ self.params['simulate'] = True
+
+ def process_info(self, info_dict):
+ super(YDL, self).process_info(info_dict)
+
+ def _match_entry(self, info_dict, incomplete):
+ res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
+ if res is None:
+ self.downloaded_info_dicts.append(info_dict)
+ return res
+
+ first = {
+ 'id': '1',
+ 'url': TEST_URL,
+ 'title': 'one',
+ 'extractor': 'TEST',
+ 'duration': 30,
+ 'filesize': 10 * 1024,
+ }
+ second = {
+ 'id': '2',
+ 'url': TEST_URL,
+ 'title': 'two',
+ 'extractor': 'TEST',
+ 'duration': 10,
+ 'description': 'foo',
+ 'filesize': 5 * 1024,
+ }
+ videos = [first, second]
+
+ def get_videos(filter_=None):
+ ydl = FilterYDL({'match_filter': filter_})
+ for v in videos:
+ ydl.process_ie_result(v, download=True)
+ return [v['id'] for v in ydl.downloaded_info_dicts]
+
+ res = get_videos()
+ self.assertEqual(res, ['1', '2'])
+
+ def f(v):
+ if v['id'] == '1':
+ return None
+ else:
+ return 'Video id is not 1'
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
+ f = match_filter_func('duration < 30')
+ res = get_videos(f)
+ self.assertEqual(res, ['2'])
+
+ f = match_filter_func('description = foo')
+ res = get_videos(f)
+ self.assertEqual(res, ['2'])
+
+ f = match_filter_func('description =? foo')
+ res = get_videos(f)
+ self.assertEqual(res, ['1', '2'])
+
+ f = match_filter_func('filesize > 5KiB')
+ res = get_videos(f)
+ self.assertEqual(res, ['1'])
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_aes.py b/test/test_aes.py
new file mode 100644
index 000000000..4dc7de7b5
--- /dev/null
+++ b/test/test_aes.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
+from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
+import base64
+
+# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
+
+
+class TestAES(unittest.TestCase):
+ def setUp(self):
+ self.key = self.iv = [0x20, 0x15] + 14 * [0]
+ self.secret_msg = b'Secret message goes here'
+
+ def test_encrypt(self):
+ msg = b'message'
+ key = list(range(16))
+ encrypted = aes_encrypt(bytes_to_intlist(msg), key)
+ decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
+ self.assertEqual(decrypted, msg)
+
+ def test_cbc_decrypt(self):
+ data = bytes_to_intlist(
+ b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
+ )
+ decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
+ self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
+
+ def test_decrypt_text(self):
+ password = intlist_to_bytes(self.key).decode('utf-8')
+ encrypted = base64.b64encode(
+ intlist_to_bytes(self.iv[:8]) +
+ b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
+ )
+ decrypted = (aes_decrypt_text(encrypted, password, 16))
+ self.assertEqual(decrypted, self.secret_msg)
+
+ password = intlist_to_bytes(self.key).decode('utf-8')
+ encrypted = base64.b64encode(
+ intlist_to_bytes(self.iv[:8]) +
+ b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
+ )
+ decrypted = (aes_decrypt_text(encrypted, password, 32))
+ self.assertEqual(decrypted, self.secret_msg)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index e66264b4b..a9db42b30 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -59,7 +59,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
def test_youtube_feeds(self):
- self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
+ self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
@@ -104,11 +104,11 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch(':tds', ['ComedyCentralShows'])
def test_vimeo_matching(self):
- self.assertMatch('http://vimeo.com/channels/tributes', ['vimeo:channel'])
- self.assertMatch('http://vimeo.com/channels/31259', ['vimeo:channel'])
- self.assertMatch('http://vimeo.com/channels/31259/53576664', ['vimeo'])
- self.assertMatch('http://vimeo.com/user7108434', ['vimeo:user'])
- self.assertMatch('http://vimeo.com/user7108434/videos', ['vimeo:user'])
+ self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel'])
+ self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel'])
+ self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo'])
+ self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user'])
+ self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
# https://github.com/rg3/youtube-dl/issues/1930
diff --git a/test/test_execution.py b/test/test_execution.py
index 60df187de..f31e51558 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -1,4 +1,6 @@
#!/usr/bin/env python
+# coding: utf-8
+
from __future__ import unicode_literals
import unittest
@@ -27,5 +29,12 @@ class TestExecution(unittest.TestCase):
def test_main_exec(self):
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+ def test_cmdline_umlauts(self):
+ p = subprocess.Popen(
+ [sys.executable, 'youtube_dl/__main__.py', 'ä', '--version'],
+ cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
+ _, stderr = p.communicate()
+ self.assertFalse(stderr)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_http.py b/test/test_http.py
index bd4d46fef..f2e305b6f 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -8,7 +8,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_http_server
+from youtube_dl.compat import compat_http_server, compat_urllib_request
import ssl
import threading
@@ -68,5 +68,52 @@ class TestHTTP(unittest.TestCase):
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
+
+def _build_proxy_handler(name):
+ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ proxy_name = name
+
+ def log_message(self, format, *args):
+ pass
+
+ def do_GET(self):
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/plain; charset=utf-8')
+ self.end_headers()
+ self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
+ return HTTPTestRequestHandler
+
+
+class TestProxy(unittest.TestCase):
+ def setUp(self):
+ self.proxy = compat_http_server.HTTPServer(
+ ('localhost', 0), _build_proxy_handler('normal'))
+ self.port = self.proxy.socket.getsockname()[1]
+ self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
+ self.proxy_thread.daemon = True
+ self.proxy_thread.start()
+
+ self.cn_proxy = compat_http_server.HTTPServer(
+ ('localhost', 0), _build_proxy_handler('cn'))
+ self.cn_port = self.cn_proxy.socket.getsockname()[1]
+ self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
+ self.cn_proxy_thread.daemon = True
+ self.cn_proxy_thread.start()
+
+ def test_proxy(self):
+ cn_proxy = 'localhost:{0}'.format(self.cn_port)
+ ydl = YoutubeDL({
+ 'proxy': 'localhost:{0}'.format(self.port),
+ 'cn_verification_proxy': cn_proxy,
+ })
+ url = 'http://foo.com/bar'
+ response = ydl.urlopen(url).read().decode('utf-8')
+ self.assertEqual(response, 'normal: {0}'.format(url))
+
+ req = compat_urllib_request.Request(url)
+ req.add_header('Ytdl-request-proxy', cn_proxy)
+ response = ydl.urlopen(req).read().decode('utf-8')
+ self.assertEqual(response, 'cn: {0}'.format(url))
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_netrc.py b/test/test_netrc.py
new file mode 100644
index 000000000..7cf3a6a2e
--- /dev/null
+++ b/test/test_netrc.py
@@ -0,0 +1,26 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+from youtube_dl.extractor import (
+ gen_extractors,
+)
+
+
+class TestNetRc(unittest.TestCase):
+ def test_netrc_present(self):
+ for ie in gen_extractors():
+ if not hasattr(ie, '_login'):
+ continue
+ self.assertTrue(
+ hasattr(ie, '_NETRC_MACHINE'),
+ 'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py
new file mode 100644
index 000000000..addb69d6f
--- /dev/null
+++ b/test/test_postprocessors.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+
+from __future__ import unicode_literals
+
+# Allow direct execution
+import os
+import sys
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.postprocessor import MetadataFromTitlePP
+
+
+class TestMetadataFromTitle(unittest.TestCase):
+ def test_format_to_regex(self):
+ pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
+ self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 3f2d8a2ba..891ee620b 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -26,6 +26,7 @@ from youtube_dl.extractor import (
VikiIE,
ThePlatformIE,
RTVEALaCartaIE,
+ FunnyOrDieIE,
)
@@ -320,5 +321,17 @@ class TestRtveSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
+class TestFunnyOrDieSubtitles(BaseTestSubtitles):
+ url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
+ IE = FunnyOrDieIE
+
+ def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(set(subtitles.keys()), set(['en']))
+ self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_unicode_literals.py b/test/test_unicode_literals.py
index 7f816698e..6c1b7ec91 100644
--- a/test/test_unicode_literals.py
+++ b/test/test_unicode_literals.py
@@ -17,13 +17,22 @@ IGNORED_FILES = [
'buildserver.py',
]
+IGNORED_DIRS = [
+ '.git',
+ '.tox',
+]
from test.helper import assertRegexpMatches
class TestUnicodeLiterals(unittest.TestCase):
def test_all_files(self):
- for dirpath, _, filenames in os.walk(rootDir):
+ for dirpath, dirnames, filenames in os.walk(rootDir):
+ for ignore_dir in IGNORED_DIRS:
+ if ignore_dir in dirnames:
+ # If we remove the directory from dirnames os.walk won't
+ # recurse into it
+ dirnames.remove(ignore_dir)
for basename in filenames:
if not basename.endswith('.py'):
continue
diff --git a/test/test_utils.py b/test/test_utils.py
index 3fba8ae11..abaf1ab73 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -24,6 +24,7 @@ from youtube_dl.utils import (
encodeFilename,
escape_rfc3986,
escape_url,
+ ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
InAdvancePagedList,
@@ -38,6 +39,8 @@ from youtube_dl.utils import (
parse_iso8601,
read_batch_urls,
sanitize_filename,
+ sanitize_path,
+ sanitize_url_path_consecutive_slashes,
shell_quote,
smuggle_url,
str_to_int,
@@ -52,6 +55,7 @@ from youtube_dl.utils import (
urlencode_postdata,
version_tuple,
xpath_with_ns,
+ xpath_text,
render_table,
match_str,
)
@@ -85,8 +89,11 @@ class TestUtil(unittest.TestCase):
self.assertEqual(
sanitize_filename('New World record at 0:12:34'),
'New World record at 0_12_34')
+
self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
+ self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
+ self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
forbidden = '"\0\\/'
for fc in forbidden:
@@ -128,6 +135,62 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
+ def test_sanitize_path(self):
+ if sys.platform != 'win32':
+ return
+
+ self.assertEqual(sanitize_path('abc'), 'abc')
+ self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
+ self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
+ self.assertEqual(sanitize_path('abc|def'), 'abc#def')
+ self.assertEqual(sanitize_path('<>:"|?*'), '#######')
+ self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def')
+ self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def')
+
+ self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc')
+ self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc')
+
+ self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+ self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc')
+ self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f')
+ self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc')
+
+ self.assertEqual(
+ sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'),
+ 'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s')
+
+ self.assertEqual(
+ sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'),
+ 'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part')
+ self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#')
+ self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def')
+ self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#')
+
+ self.assertEqual(sanitize_path('../abc'), '..\\abc')
+ self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc')
+ self.assertEqual(sanitize_path('./abc'), 'abc')
+ self.assertEqual(sanitize_path('./../abc'), '..\\abc')
+
+ def test_sanitize_url_path_consecutive_slashes(self):
+ self.assertEqual(
+ sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
+ 'http://hostname/foo/bar/filename.html')
+ self.assertEqual(
+ sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
+ 'http://hostname/foo/bar/filename.html')
+ self.assertEqual(
+ sanitize_url_path_consecutive_slashes('http://hostname//'),
+ 'http://hostname/')
+ self.assertEqual(
+ sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
+ 'http://hostname/foo/bar/filename.html')
+ self.assertEqual(
+ sanitize_url_path_consecutive_slashes('http://hostname/'),
+ 'http://hostname/')
+ self.assertEqual(
+ sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
+ 'http://hostname/abc/')
+
def test_ordered_set(self):
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
self.assertEqual(orderedSet([]), [])
@@ -137,6 +200,8 @@ class TestUtil(unittest.TestCase):
def test_unescape_html(self):
self.assertEqual(unescapeHTML('%20;'), '%20;')
+ self.assertEqual(unescapeHTML('&#x2F;'), '/')
+ self.assertEqual(unescapeHTML('&#47;'), '/')
self.assertEqual(
unescapeHTML('&eacute;'), 'é')
@@ -189,6 +254,17 @@ class TestUtil(unittest.TestCase):
self.assertEqual(find('media:song/media:author').text, 'The Author')
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
+ def test_xpath_text(self):
+ testxml = '''<root>
+ <div>
+ <p>Foo</p>
+ </div>
+ </root>'''
+ doc = xml.etree.ElementTree.fromstring(testxml)
+ self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
+ self.assertTrue(xpath_text(doc, 'div/bar') is None)
+ self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
+
def test_smuggle_url(self):
data = {"ö": "ö", "abc": [3]}
url = 'https://foo.bar/baz?x=y#a'
diff --git a/tox.ini b/tox.ini
index ed01e3386..00c6e00e3 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,8 +1,11 @@
[tox]
-envlist = py26,py27,py33
+envlist = py26,py27,py33,py34
[testenv]
deps =
nose
coverage
-commands = nosetests --verbose {posargs:test} # --with-coverage --cover-package=youtube_dl --cover-html
+defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
+ --exclude test_subtitles.py --exclude test_write_annotations.py
+ --exclude test_youtube_lists.py
+commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html
# test.test_download:TestDownload.test_NowVideo
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index d7c6db0ff..4fa2223ad 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -4,8 +4,10 @@
from __future__ import absolute_import, unicode_literals
import collections
+import contextlib
import datetime
import errno
+import fileinput
import io
import itertools
import json
@@ -52,12 +54,14 @@ from .utils import (
MaxDownloadsReached,
PagedList,
parse_filesize,
+ PerRequestProxyHandler,
PostProcessingError,
platform_name,
preferredencoding,
render_table,
SameFileError,
sanitize_filename,
+ sanitize_path,
std_headers,
subtitles_filename,
takewhile_inclusive,
@@ -181,6 +185,8 @@ class YoutubeDL(object):
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use
+ cn_verification_proxy: URL of the proxy to use for IP address verification
+ on Chinese sites. (Experimental)
socket_timeout: Time to wait for unresponsive hosts, in seconds
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
@@ -247,10 +253,10 @@ class YoutubeDL(object):
hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
The following parameters are not used by YoutubeDL itself, they are used by
- the FileDownloader:
+ the downloader (see youtube_dl/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
noresizebuffer, retries, continuedl, noprogress, consoletitle,
- xattr_set_filesize.
+ xattr_set_filesize, external_downloader_args.
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
@@ -317,8 +323,10 @@ class YoutubeDL(object):
'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
- if '%(stitle)s' in self.params.get('outtmpl', ''):
- self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
+ if isinstance(params.get('outtmpl'), bytes):
+ self.report_warning(
+ 'Parameter outtmpl is bytes, but should be a unicode string. '
+ 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
self._setup_opener()
@@ -557,7 +565,7 @@ class YoutubeDL(object):
if v is not None)
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
- outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+ outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
tmpl = compat_expanduser(outtmpl)
filename = tmpl % template_dict
# Temporary fix for #4787
@@ -624,7 +632,7 @@ class YoutubeDL(object):
Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos.
extra_info is a dict containing the extra values to add to each result
- '''
+ '''
if ie_key:
ies = [self.get_info_extractor(ie_key)]
@@ -1080,8 +1088,7 @@ class YoutubeDL(object):
if req_format is None:
req_format = 'best'
formats_to_download = []
- # The -1 is for supporting YoutubeIE
- if req_format in ('-1', 'all'):
+ if req_format == 'all':
formats_to_download = formats
else:
for rfstr in req_format.split(','):
@@ -1208,9 +1215,6 @@ class YoutubeDL(object):
if len(info_dict['title']) > 200:
info_dict['title'] = info_dict['title'][:197] + '...'
- # Keep for backwards compatibility
- info_dict['stitle'] = info_dict['title']
-
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
@@ -1256,7 +1260,7 @@ class YoutubeDL(object):
return
try:
- dn = os.path.dirname(encodeFilename(filename))
+ dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
if dn and not os.path.exists(dn):
os.makedirs(dn)
except (OSError, IOError) as err:
@@ -1452,8 +1456,11 @@ class YoutubeDL(object):
return self._download_retcode
def download_with_info_file(self, info_filename):
- with io.open(info_filename, 'r', encoding='utf-8') as f:
- info = json.load(f)
+ with contextlib.closing(fileinput.FileInput(
+ [info_filename], mode='r',
+ openhook=fileinput.hook_encoded('utf-8'))) as f:
+ # FileInput doesn't have a read method, we can't call json.load
+ info = json.loads('\n'.join(f))
try:
self.process_ie_result(info, download=True)
except DownloadError:
@@ -1694,10 +1701,10 @@ class YoutubeDL(object):
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
self._write_string('[debug] Git HEAD: ' + out + '\n')
- except:
+ except Exception:
try:
sys.exc_clear()
- except:
+ except Exception:
pass
self._write_string('[debug] Python version %s - %s\n' % (
platform.python_version(), platform_name()))
@@ -1757,13 +1764,20 @@ class YoutubeDL(object):
# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
- proxy_handler = compat_urllib_request.ProxyHandler(proxies)
+ proxy_handler = PerRequestProxyHandler(proxies)
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
+ # The ssl context is only available in python 2.7.9 and 3.x
+ if hasattr(https_handler, '_context'):
+ if len(https_handler._context.get_ca_certs()) == 0:
+ self.report_warning(
+ 'No ssl certificates were loaded, urls that use https '
+ 'won\'t work')
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
opener = compat_urllib_request.build_opener(
- https_handler, proxy_handler, cookie_processor, ydlh)
+ proxy_handler, https_handler, cookie_processor, ydlh)
+
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
# (See https://github.com/rg3/youtube-dl/issues/1309 for details)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 49f382695..852b2fc3d 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -9,6 +9,7 @@ import codecs
import io
import os
import random
+import shlex
import sys
@@ -212,6 +213,11 @@ def _real_main(argv=None):
# PostProcessors
postprocessors = []
# Add the metadata pp first, the other pps will copy it
+ if opts.metafromtitle:
+ postprocessors.append({
+ 'key': 'MetadataFromTitle',
+ 'titleformat': opts.metafromtitle
+ })
if opts.addmetadata:
postprocessors.append({'key': 'FFmpegMetadata'})
if opts.extractaudio:
@@ -255,6 +261,9 @@ def _real_main(argv=None):
xattr # Confuse flake8
except ImportError:
parser.error('setting filesize xattr requested but python-xattr is not available')
+ external_downloader_args = None
+ if opts.external_downloader_args:
+ external_downloader_args = shlex.split(opts.external_downloader_args)
match_filter = (
None if opts.match_filter is None
else match_filter_func(opts.match_filter))
@@ -359,6 +368,8 @@ def _real_main(argv=None):
'no_color': opts.no_color,
'ffmpeg_location': opts.ffmpeg_location,
'hls_prefer_native': opts.hls_prefer_native,
+ 'external_downloader_args': external_downloader_args,
+ 'cn_verification_proxy': opts.cn_verification_proxy,
}
with YoutubeDL(ydl_opts) as ydl:
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index b2bf149ef..973bcd320 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -389,7 +389,7 @@ else:
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = sp.communicate()
lines, columns = map(int, out.split())
- except:
+ except Exception:
pass
return _terminal_size(columns, lines)
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 3ae90021a..a0fc5ead0 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -42,6 +42,8 @@ class FileDownloader(object):
max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
(experimenatal)
+ external_downloader_args: A list of additional command-line arguments for the
+ external downloader.
Subclasses of this one must re-define the real_download method.
"""
@@ -202,7 +204,7 @@ class FileDownloader(object):
return
try:
os.utime(filename, (time.time(), filetime))
- except:
+ except Exception:
pass
return filetime
@@ -316,7 +318,7 @@ class FileDownloader(object):
)
continuedl_and_exists = (
- self.params.get('continuedl', False) and
+ self.params.get('continuedl', True) and
os.path.isfile(encodeFilename(filename)) and
not self.params.get('nopart', False)
)
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index 51c41c704..1673b2382 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -51,6 +51,13 @@ class ExternalFD(FileDownloader):
return []
return [command_option, source_address]
+ def _configuration_args(self, default=[]):
+ ex_args = self.params.get('external_downloader_args')
+ if ex_args is None:
+ return default
+ assert isinstance(ex_args, list)
+ return ex_args
+
def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """
cmd = self._make_cmd(tmpfilename, info_dict)
@@ -79,6 +86,7 @@ class CurlFD(ExternalFD):
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._source_address('--interface')
+ cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@@ -89,15 +97,16 @@ class WgetFD(ExternalFD):
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._source_address('--bind-address')
+ cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
class Aria2cFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [
- self.exe, '-c',
- '--min-split-size', '1M', '--max-connection-per-server', '4']
+ cmd = [self.exe, '-c']
+ cmd += self._configuration_args([
+ '--min-split-size', '1M', '--max-connection-per-server', '4'])
dn = os.path.dirname(tmpfilename)
if dn:
cmd += ['--dir', dn]
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index 3dc796faa..4ab000d67 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -281,7 +281,7 @@ class F4mFD(FileDownloader):
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
- bootstrap = base64.b64decode(node.text)
+ bootstrap = base64.b64decode(node.text.encode('ascii'))
boot_info = read_bootstrap_info(bootstrap)
return (boot_info, bootstrap_url)
@@ -308,7 +308,7 @@ class F4mFD(FileDownloader):
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
- metadata = base64.b64decode(metadata_node.text)
+ metadata = base64.b64decode(metadata_node.text.encode('ascii'))
else:
metadata = None
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index 2e3dac825..d136bebd1 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
open_mode = 'wb'
if resume_len != 0:
- if self.params.get('continuedl', False):
+ if self.params.get('continuedl', True):
self.report_resuming_byte(resume_len)
request.add_header('Range', 'bytes=%d-' % resume_len)
open_mode = 'ab'
@@ -92,6 +92,8 @@ class HttpFD(FileDownloader):
self._hook_progress({
'filename': filename,
'status': 'finished',
+ 'downloaded_bytes': resume_len,
+ 'total_bytes': resume_len,
})
return True
else:
@@ -218,12 +220,6 @@ class HttpFD(FileDownloader):
if tmpfilename != '-':
stream.close()
- self._hook_progress({
- 'downloaded_bytes': byte_counter,
- 'total_bytes': data_len,
- 'tmpfilename': tmpfilename,
- 'status': 'error',
- })
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
index 89e98ae61..ddf5724ae 100644
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -105,7 +105,7 @@ class RtmpFD(FileDownloader):
protocol = info_dict.get('rtmp_protocol', None)
real_time = info_dict.get('rtmp_real_time', False)
no_resume = info_dict.get('no_resume', False)
- continue_dl = info_dict.get('continuedl', False)
+ continue_dl = info_dict.get('continuedl', True)
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 370154773..0b9736f2d 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -37,6 +37,7 @@ from .bandcamp import BandcampIE, BandcampAlbumIE
from .bbccouk import BBCCoUkIE
from .beeg import BeegIE
from .behindkink import BehindKinkIE
+from .beatportpro import BeatportProIE
from .bet import BetIE
from .bild import BildIE
from .bilibili import BiliBiliIE
@@ -105,17 +106,21 @@ from .dbtv import DBTVIE
from .dctp import DctpTvIE
from .deezer import DeezerPlaylistIE
from .dfb import DFBIE
+from .dhm import DHMIE
from .dotsub import DotsubIE
+from .douyutv import DouyuTVIE
from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
from .drtuber import DrTuberIE
from .drtv import DRTVIE
from .dvtv import DVTVIE
from .dump import DumpIE
+from .dumpert import DumpertIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE
from .divxstage import DivxStageIE
from .dropbox import DropboxIE
+from .eagleplatform import EaglePlatformIE
from .ebaumsworld import EbaumsWorldIE
from .echomsk import EchoMskIE
from .ehow import EHowIE
@@ -150,6 +155,7 @@ from .fktv import (
)
from .flickr import FlickrIE
from .folketinget import FolketingetIE
+from .footyroom import FootyRoomIE
from .fourtube import FourTubeIE
from .foxgay import FoxgayIE
from .foxnews import FoxNewsIE
@@ -174,6 +180,7 @@ from .gameone import (
from .gamespot import GameSpotIE
from .gamestar import GameStarIE
from .gametrailers import GametrailersIE
+from .gazeta import GazetaIE
from .gdcvault import GDCVaultIE
from .generic import GenericIE
from .giantbomb import GiantBombIE
@@ -228,6 +235,7 @@ from .jove import JoveIE
from .jukebox import JukeboxIE
from .jpopsukitv import JpopsukiIE
from .kaltura import KalturaIE
+from .kanalplay import KanalPlayIE
from .kankan import KankanIE
from .karaoketv import KaraoketvIE
from .keezmovies import KeezMoviesIE
@@ -244,6 +252,7 @@ from .letv import (
LetvTvIE,
LetvPlaylistIE
)
+from .libsyn import LibsynIE
from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE
from .livestream import (
@@ -303,6 +312,8 @@ from .nba import NBAIE
from .nbc import (
NBCIE,
NBCNewsIE,
+ NBCSportsIE,
+ NBCSportsVPlayerIE,
)
from .ndr import NDRIE
from .ndtv import NDTVIE
@@ -341,6 +352,7 @@ from .npo import (
)
from .nrk import (
NRKIE,
+ NRKPlaylistIE,
NRKTVIE,
)
from .ntvde import NTVDeIE
@@ -355,6 +367,7 @@ from .orf import (
ORFTVthekIE,
ORFOE1IE,
ORFFM4IE,
+ ORFIPTVIE,
)
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
@@ -362,9 +375,11 @@ from .pbs import PBSIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
from .planetaplay import PlanetaPlayIE
+from .pladform import PladformIE
from .played import PlayedIE
from .playfm import PlayFMIE
from .playvid import PlayvidIE
+from .playwire import PlaywireIE
from .podomatic import PodomaticIE
from .pornhd import PornHdIE
from .pornhub import (
@@ -373,6 +388,7 @@ from .pornhub import (
)
from .pornotube import PornotubeIE
from .pornoxo import PornoXOIE
+from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE
@@ -398,7 +414,7 @@ from .rtlnow import RTLnowIE
from .rtl2 import RTL2IE
from .rtp import RTPIE
from .rts import RTSIE
-from .rtve import RTVEALaCartaIE, RTVELiveIE
+from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
from .ruhd import RUHDIE
from .rutube import (
RutubeIE,
@@ -409,6 +425,10 @@ from .rutube import (
)
from .rutv import RUTVIE
from .sandia import SandiaIE
+from .safari import (
+ SafariIE,
+ SafariCourseIE,
+)
from .sapo import SapoIE
from .savefrom import SaveFromIE
from .sbs import SBSIE
@@ -456,6 +476,7 @@ from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
from .srmediathek import SRMediathekIE
+from .ssa import SSAIE
from .stanfordoc import StanfordOpenClassroomIE
from .steam import SteamIE
from .streamcloud import StreamcloudIE
@@ -514,6 +535,10 @@ from .tvp import TvpIE, TvpSeriesIE
from .tvplay import TVPlayIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
+from .twentytwotracks import (
+ TwentyTwoTracksIE,
+ TwentyTwoTracksGenreIE
+)
from .twitch import (
TwitchVideoIE,
TwitchChapterIE,
@@ -528,12 +553,15 @@ from .udemy import (
UdemyIE,
UdemyCourseIE
)
+from .ultimedia import UltimediaIE
from .unistra import UnistraIE
from .urort import UrortIE
from .ustream import UstreamIE, UstreamChannelIE
+from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
+from .vessel import VesselIE
from .vesti import VestiIE
from .vevo import VevoIE
from .vgtv import VGTVIE
@@ -551,6 +579,7 @@ from .videoweed import VideoWeedIE
from .vidme import VidmeIE
from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE
+from .viewster import ViewsterIE
from .vimeo import (
VimeoIE,
VimeoAlbumIE,
@@ -607,6 +636,11 @@ from .yahoo import (
YahooSearchIE,
)
from .yam import YamIE
+from .yandexmusic import (
+ YandexMusicTrackIE,
+ YandexMusicAlbumIE,
+ YandexMusicPlaylistIE,
+)
from .yesjapan import YesJapanIE
from .ynet import YnetIE
from .youjizz import YouJizzIE
diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py
index 34b8b0115..39335b827 100644
--- a/youtube_dl/extractor/adultswim.py
+++ b/youtube_dl/extractor/adultswim.py
@@ -2,13 +2,12 @@
from __future__ import unicode_literals
import re
-import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
- xpath_text,
float_or_none,
+ xpath_text,
)
@@ -60,6 +59,24 @@ class AdultSwimIE(InfoExtractor):
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
},
+ }, {
+ 'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
+ 'playlist': [
+ {
+ 'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
+ 'info_dict': {
+ 'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
+ 'ext': 'flv',
+ 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+ 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+ },
+ }
+ ],
+ 'info_dict': {
+ 'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
+ 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+ 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+ },
}]
@staticmethod
@@ -80,6 +97,7 @@ class AdultSwimIE(InfoExtractor):
for video in collection.get('videos'):
if video.get('slug') == slug:
return collection, video
+ return None, None
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -90,28 +108,30 @@ class AdultSwimIE(InfoExtractor):
webpage = self._download_webpage(url, episode_path)
# Extract the value of `bootstrappedData` from the Javascript in the page.
- bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
-
- try:
- bootstrappedData = json.loads(bootstrappedDataJS)
- except ValueError as ve:
- errmsg = '%s: Failed to parse JSON ' % episode_path
- raise ExtractorError(errmsg, cause=ve)
+ bootstrapped_data = self._parse_json(self._search_regex(
+ r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
# NOTE: We are only downloading one video (the current one) not the playlist
if is_playlist:
- collections = bootstrappedData['playlists']['collections']
+ collections = bootstrapped_data['playlists']['collections']
collection = self.find_collection_by_linkURL(collections, show_path)
video_info = self.find_video_info(collection, episode_path)
show_title = video_info['showTitle']
segment_ids = [video_info['videoPlaybackID']]
else:
- collections = bootstrappedData['show']['collections']
+ collections = bootstrapped_data['show']['collections']
collection, video_info = self.find_collection_containing_video(collections, episode_path)
- show = bootstrappedData['show']
+ # Video wasn't found in the collections, let's try `slugged_video`.
+ if video_info is None:
+ if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
+ video_info = bootstrapped_data['slugged_video']
+ else:
+ raise ExtractorError('Unable to find video info')
+
+ show = bootstrapped_data['show']
show_title = show['title']
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
diff --git a/youtube_dl/extractor/aftenposten.py b/youtube_dl/extractor/aftenposten.py
index 2b257ede7..e15c015fb 100644
--- a/youtube_dl/extractor/aftenposten.py
+++ b/youtube_dl/extractor/aftenposten.py
@@ -14,10 +14,10 @@ from ..utils import (
class AftenpostenIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/([^/]+/)*(?P<id>[^/]+)-\d+\.html'
+ _VALID_URL = r'https?://(?:www\.)?aftenposten\.no/webtv/(?:#!/)?video/(?P<id>\d+)'
_TEST = {
- 'url': 'http://www.aftenposten.no/webtv/serier-og-programmer/sweatshopenglish/TRAILER-SWEATSHOP---I-cant-take-any-more-7800835.html?paging=&section=webtv_serierogprogrammer_sweatshop_sweatshopenglish',
+ 'url': 'http://www.aftenposten.no/webtv/#!/video/21039/trailer-sweatshop-i-can-t-take-any-more',
'md5': 'fd828cd29774a729bf4d4425fe192972',
'info_dict': {
'id': '21039',
@@ -30,12 +30,7 @@ class AftenpostenIE(InfoExtractor):
}
def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- video_id = self._html_search_regex(
- r'data-xs-id="(\d+)"', webpage, 'video id')
+ video_id = self._match_id(url)
data = self._download_xml(
'http://frontend.xstream.dk/ap/feed/video/?platform=web&id=%s' % video_id, video_id)
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index 783b53e23..6a35ea463 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -50,6 +50,9 @@ class ARDMediathekIE(InfoExtractor):
if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage:
raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+ if 'Diese Sendung ist für Jugendliche unter 12 Jahren nicht geeignet. Der Clip ist deshalb nur von 20 bis 6 Uhr verfügbar.' in webpage:
+ raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
+
if re.search(r'[\?&]rss($|[=&])', url):
doc = parse_xml(webpage)
if doc.tag == 'rss':
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 929dd3cc5..8273bd6c9 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor):
formats.append(format)
+ self._check_formats(formats, video_id)
self._sort_formats(formats)
info_dict['formats'] = formats
diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py
index 7669e0e3d..29f8795d3 100644
--- a/youtube_dl/extractor/atresplayer.py
+++ b/youtube_dl/extractor/atresplayer.py
@@ -19,6 +19,7 @@ from ..utils import (
class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
+ _NETRC_MACHINE = 'atresplayer'
_TESTS = [
{
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
diff --git a/youtube_dl/extractor/beatportpro.py b/youtube_dl/extractor/beatportpro.py
new file mode 100644
index 000000000..3c7775d3e
--- /dev/null
+++ b/youtube_dl/extractor/beatportpro.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class BeatportProIE(InfoExtractor):
+ _VALID_URL = r'https?://pro\.beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://pro.beatport.com/track/synesthesia-original-mix/5379371',
+ 'md5': 'b3c34d8639a2f6a7f734382358478887',
+ 'info_dict': {
+ 'id': '5379371',
+ 'display_id': 'synesthesia-original-mix',
+ 'ext': 'mp4',
+ 'title': 'Froxic - Synesthesia (Original Mix)',
+ },
+ }, {
+ 'url': 'https://pro.beatport.com/track/love-and-war-original-mix/3756896',
+ 'md5': 'e44c3025dfa38c6577fbaeb43da43514',
+ 'info_dict': {
+ 'id': '3756896',
+ 'display_id': 'love-and-war-original-mix',
+ 'ext': 'mp3',
+ 'title': 'Wolfgang Gartner - Love & War (Original Mix)',
+ },
+ }, {
+ 'url': 'https://pro.beatport.com/track/birds-original-mix/4991738',
+ 'md5': 'a1fd8e8046de3950fd039304c186c05f',
+ 'info_dict': {
+ 'id': '4991738',
+ 'display_id': 'birds-original-mix',
+ 'ext': 'mp4',
+ 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
+ }
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ track_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ playables = self._parse_json(
+ self._search_regex(
+ r'window\.Playables\s*=\s*({.+?});', webpage,
+ 'playables info', flags=re.DOTALL),
+ track_id)
+
+ track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
+
+ title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
+ if track['mix']:
+ title += ' (' + track['mix'] + ')'
+
+ formats = []
+ for ext, info in track['preview'].items():
+ if not info['url']:
+ continue
+ fmt = {
+ 'url': info['url'],
+ 'ext': ext,
+ 'format_id': ext,
+ 'vcodec': 'none',
+ }
+ if ext == 'mp3':
+ fmt['preference'] = 0
+ fmt['acodec'] = 'mp3'
+ fmt['abr'] = 96
+ fmt['asr'] = 44100
+ elif ext == 'mp4':
+ fmt['preference'] = 1
+ fmt['acodec'] = 'aac'
+ fmt['abr'] = 96
+ fmt['asr'] = 44100
+ formats.append(fmt)
+ self._sort_formats(formats)
+
+ images = []
+ for name, info in track['images'].items():
+ image_url = info.get('url')
+ if name == 'dynamic' or not image_url:
+ continue
+ image = {
+ 'id': name,
+ 'url': image_url,
+ 'height': int_or_none(info.get('height')),
+ 'width': int_or_none(info.get('width')),
+ }
+ images.append(image)
+
+ return {
+ 'id': compat_str(track.get('id')) or track_id,
+ 'display_id': track.get('slug') or display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': images,
+ }
diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py
index 4bcc897c9..809287d14 100644
--- a/youtube_dl/extractor/breakcom.py
+++ b/youtube_dl/extractor/breakcom.py
@@ -41,7 +41,7 @@ class BreakIE(InfoExtractor):
'tbr': media['bitRate'],
'width': media['width'],
'height': media['height'],
- } for media in info['media']]
+ } for media in info['media'] if media.get('mediaPurpose') == 'play']
if not formats:
formats.append({
diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py
index abf8cc280..0fa720ee8 100644
--- a/youtube_dl/extractor/cloudy.py
+++ b/youtube_dl/extractor/cloudy.py
@@ -105,6 +105,7 @@ class CloudyIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
file_key = self._search_regex(
- r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
+ [r'key\s*:\s*"([^"]+)"', r'filekey\s*=\s*"([^"]+)"'],
+ webpage, 'file_key')
return self._extract_video(video_host, video_id, file_key)
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
index 90ea07438..0a77e951c 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -12,7 +12,7 @@ from ..utils import (
class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
- (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
+ (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln|ktvk)(?:-ap)?|(?=&)))'''
_TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
@@ -45,6 +45,9 @@ class CNNIE(InfoExtractor):
'description': 'md5:e7223a503315c9f150acac52e76de086',
'upload_date': '20141222',
}
+ }, {
+ 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 7977fa8d0..e5245ec3f 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -767,6 +767,10 @@ class InfoExtractor(object):
formats)
def _is_valid_url(self, url, video_id, item='video'):
+ url = self._proto_relative_url(url, scheme='http:')
+ # For now assume non HTTP(S) URLs always valid
+ if not (url.startswith('http://') or url.startswith('https://')):
+ return True
try:
self._request_webpage(url, video_id, 'Checking %s URL' % item)
return True
@@ -835,7 +839,7 @@ class InfoExtractor(object):
m3u8_id=None):
formats = [{
- 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-meta'])),
+ 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
'url': m3u8_url,
'ext': ext,
'protocol': 'm3u8',
@@ -879,8 +883,13 @@ class InfoExtractor(object):
formats.append({'url': format_url(line)})
continue
tbr = int_or_none(last_info.get('BANDWIDTH'), scale=1000)
+ format_id = []
+ if m3u8_id:
+ format_id.append(m3u8_id)
+ last_media_name = last_media.get('NAME') if last_media else None
+ format_id.append(last_media_name if last_media_name else '%d' % (tbr if tbr else len(formats)))
f = {
- 'format_id': '-'.join(filter(None, [m3u8_id, 'm3u8-%d' % (tbr if tbr else len(formats))])),
+ 'format_id': '-'.join(format_id),
'url': format_url(line.strip()),
'tbr': tbr,
'ext': ext,
@@ -1053,6 +1062,9 @@ class InfoExtractor(object):
def _get_automatic_captions(self, *args, **kwargs):
raise NotImplementedError("This method must be implemented by subclasses")
+ def _subtitles_timecode(self, seconds):
+ return '%02d:%02d:%02d.%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
+
class SearchInfoExtractor(InfoExtractor):
"""
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index f1da7d09b..6ded723c9 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -23,12 +23,12 @@ from ..utils import (
)
from ..aes import (
aes_cbc_decrypt,
- inc,
)
class CrunchyrollIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+ _NETRC_MACHINE = 'crunchyroll'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'info_dict': {
@@ -101,13 +101,6 @@ class CrunchyrollIE(InfoExtractor):
key = obfuscate_key(id)
- class Counter:
- __value = iv
-
- def next_value(self):
- temp = self.__value
- self.__value = inc(self.__value)
- return temp
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
return zlib.decompress(decrypted_data)
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 42b20a46d..47d58330b 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -25,8 +25,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
def _build_request(url):
"""Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url)
- request.add_header('Cookie', 'family_filter=off')
- request.add_header('Cookie', 'ff=off')
+ request.add_header('Cookie', 'family_filter=off; ff=off')
return request
@@ -46,13 +45,13 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
_TESTS = [
{
- 'url': 'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
- 'md5': '392c4b85a60a90dc4792da41ce3144eb',
+ 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
+ 'md5': '2137c41a8e78554bb09225b8eb322406',
'info_dict': {
- 'id': 'x33vw9',
+ 'id': 'x2iuewm',
'ext': 'mp4',
- 'uploader': 'Amphora Alex and Van .',
- 'title': 'Tutoriel de Youtubeur"DL DES VIDEO DE YOUTUBE"',
+ 'uploader': 'IGN',
+ 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
}
},
# Vevo video
@@ -112,8 +111,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
- embed_page = self._download_webpage(embed_url, video_id,
- 'Downloading embed page')
+ embed_request = self._build_request(embed_url)
+ embed_page = self._download_webpage(
+ embed_request, video_id, 'Downloading embed page')
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info)
diff --git a/youtube_dl/extractor/dhm.py b/youtube_dl/extractor/dhm.py
new file mode 100644
index 000000000..3ed1f1663
--- /dev/null
+++ b/youtube_dl/extractor/dhm.py
@@ -0,0 +1,73 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ parse_duration,
+)
+
+
+class DHMIE(InfoExtractor):
+ IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
+ _VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
+ 'md5': '11c475f670209bf6acca0b2b7ef51827',
+ 'info_dict': {
+ 'id': 'the-marshallplan-at-work-in-west-germany',
+ 'ext': 'flv',
+ 'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
+ 'description': 'md5:1fabd480c153f97b07add61c44407c82',
+ 'duration': 660,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
+ 'md5': '09890226332476a3e3f6f2cb74734aa5',
+ 'info_dict': {
+ 'id': 'rolle-1',
+ 'ext': 'flv',
+ 'title': 'ROLLE 1',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ playlist_url = self._search_regex(
+ r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
+
+ playlist = self._download_xml(playlist_url, video_id)
+
+ track = playlist.find(
+ './{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
+
+ video_url = xpath_text(
+ track, './{http://xspf.org/ns/0/}location',
+ 'video url', fatal=True)
+ thumbnail = xpath_text(
+ track, './{http://xspf.org/ns/0/}image',
+ 'thumbnail')
+
+ title = self._search_regex(
+ [r'dc:title="([^"]+)"', r'<title> &raquo;([^<]+)</title>'],
+ webpage, 'title').strip()
+ description = self._html_search_regex(
+ r'<p><strong>Description:</strong>(.+?)</p>',
+ webpage, 'description', default=None)
+ duration = parse_duration(self._search_regex(
+ r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
+ webpage, 'duration', default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py
new file mode 100644
index 000000000..479430c51
--- /dev/null
+++ b/youtube_dl/extractor/douyutv.py
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import time
+from .common import InfoExtractor
+from ..utils import (ExtractorError, unescapeHTML)
+from ..compat import (compat_str, compat_basestring)
+
+
+class DouyuTVIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.douyutv.com/iseven',
+ 'info_dict': {
+ 'id': '17732',
+ 'display_id': 'iseven',
+ 'ext': 'flv',
+ 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': '7师傅',
+ 'uploader_id': '431925',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.douyutv.com/85982',
+ 'info_dict': {
+ 'id': '85982',
+ 'display_id': '85982',
+ 'ext': 'flv',
+ 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'uploader': 'douyu小漠',
+ 'uploader_id': '3769985',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ if video_id.isdigit():
+ room_id = video_id
+ else:
+ page = self._download_webpage(url, video_id)
+ room_id = self._html_search_regex(
+ r'"room_id"\s*:\s*(\d+),', page, 'room id')
+
+ prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
+ room_id, int(time.time()))
+
+ auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
+ config = self._download_json(
+ 'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
+ video_id)
+
+ data = config['data']
+
+ error_code = config.get('error', 0)
+ if error_code is not 0:
+ error_desc = 'Server reported error %i' % error_code
+ if isinstance(data, (compat_str, compat_basestring)):
+ error_desc += ': ' + data
+ raise ExtractorError(error_desc, expected=True)
+
+ show_status = data.get('show_status')
+ # 1 = live, 2 = offline
+ if show_status == '2':
+ raise ExtractorError(
+ 'Live stream is offline', expected=True)
+
+ base_url = data['rtmp_url']
+ live_path = data['rtmp_live']
+
+ title = self._live_title(unescapeHTML(data['room_name']))
+ description = data.get('show_details')
+ thumbnail = data.get('room_src')
+
+ uploader = data.get('nickname')
+ uploader_id = data.get('owner_uid')
+
+ multi_formats = data.get('rtmp_multi_bitrate')
+ if not isinstance(multi_formats, dict):
+ multi_formats = {}
+ multi_formats['live'] = live_path
+
+ formats = [{
+ 'url': '%s/%s' % (base_url, format_path),
+ 'format_id': format_id,
+ 'preference': 1 if format_id == 'live' else 0,
+ } for format_id, format_path in multi_formats.items()]
+ self._sort_formats(formats)
+
+ return {
+ 'id': room_id,
+ 'display_id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py
new file mode 100644
index 000000000..e43bc81b2
--- /dev/null
+++ b/youtube_dl/extractor/dumpert.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+
+from .common import InfoExtractor
+from ..utils import qualities
+
+
+class DumpertIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
+ _TEST = {
+ 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
+ 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
+ 'info_dict': {
+ 'id': '6646981/951bc60f',
+ 'ext': 'mp4',
+ 'title': 'Ik heb nieuws voor je',
+ 'description': 'Niet schrikken hoor',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ files_base64 = self._search_regex(
+ r'data-files="([^"]+)"', webpage, 'data files')
+
+ files = self._parse_json(
+ base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
+ video_id)
+
+ quality = qualities(['flv', 'mobile', 'tablet', '720p'])
+
+ formats = [{
+ 'url': video_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ } for format_id, video_url in files.items() if format_id != 'still']
+ self._sort_formats(formats)
+
+ title = self._html_search_meta(
+ 'title', webpage) or self._og_search_title(webpage)
+ description = self._html_search_meta(
+ 'description', webpage) or self._og_search_description(webpage)
+ thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats
+ }
diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py
new file mode 100644
index 000000000..7173371ee
--- /dev/null
+++ b/youtube_dl/extractor/eagleplatform.py
@@ -0,0 +1,98 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
+
+
+class EaglePlatformIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ eagleplatform:(?P<custom_host>[^/]+):|
+ https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ # http://lenta.ru/news/2015/03/06/navalny/
+ 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
+ 'md5': '0b7994faa2bd5c0f69a3db6db28d078d',
+ 'info_dict': {
+ 'id': '227304',
+ 'ext': 'mp4',
+ 'title': 'Навальный вышел на свободу',
+ 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 87,
+ 'view_count': int,
+ 'age_limit': 0,
+ },
+ }, {
+ # http://muz-tv.ru/play/7129/
+ # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
+ 'url': 'eagleplatform:media.clipyou.ru:12820',
+ 'md5': '6c2ebeab03b739597ce8d86339d5a905',
+ 'info_dict': {
+ 'id': '12820',
+ 'ext': 'mp4',
+ 'title': "'O Sole Mio",
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 216,
+ 'view_count': int,
+ },
+ }]
+
+ def _handle_error(self, response):
+ status = int_or_none(response.get('status', 200))
+ if status != 200:
+ raise ExtractorError(' '.join(response['errors']), expected=True)
+
+ def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'):
+ response = super(EaglePlatformIE, self)._download_json(url_or_request, video_id, note)
+ self._handle_error(response)
+ return response
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
+
+ player_data = self._download_json(
+ 'http://%s/api/player_data?id=%s' % (host, video_id), video_id)
+
+ media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
+
+ title = media['title']
+ description = media.get('description')
+ thumbnail = media.get('snapshot')
+ duration = int_or_none(media.get('duration'))
+ view_count = int_or_none(media.get('views'))
+
+ age_restriction = media.get('age_restriction')
+ age_limit = None
+ if age_restriction:
+ age_limit = 0 if age_restriction == 'allow_all' else 18
+
+ m3u8_data = self._download_json(
+ media['sources']['secure_m3u8']['auto'],
+ video_id, 'Downloading m3u8 JSON')
+
+ formats = self._extract_m3u8_formats(
+ m3u8_data['data'][0], video_id,
+ 'mp4', entry_protocol='m3u8_native')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py
index fb5dbbe2b..0b61ea0ba 100644
--- a/youtube_dl/extractor/eighttracks.py
+++ b/youtube_dl/extractor/eighttracks.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import json
import random
-import re
from .common import InfoExtractor
from ..compat import (
@@ -103,20 +102,23 @@ class EightTracksIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
+ playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
- json_like = self._search_regex(
- r"(?s)PAGE.mix = (.*?);\n", webpage, 'trax information')
- data = json.loads(json_like)
+ data = self._parse_json(
+ self._search_regex(
+ r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
+ playlist_id)
session = str(random.randint(0, 1000000000))
mix_id = data['id']
track_count = data['tracks_count']
duration = data['duration']
avg_song_duration = float(duration) / track_count
+ # duration is sometimes negative, use predefined avg duration
+ if avg_song_duration <= 0:
+ avg_song_duration = 300
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
next_url = first_url
entries = []
diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py
index 79e2fbd39..0cbca90b0 100644
--- a/youtube_dl/extractor/eroprofile.py
+++ b/youtube_dl/extractor/eroprofile.py
@@ -1,11 +1,17 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..compat import compat_urllib_parse
+from ..utils import ExtractorError
class EroProfileIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
- _TEST = {
+ _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
+ _NETRC_MACHINE = 'eroprofile'
+ _TESTS = [{
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
'info_dict': {
@@ -16,13 +22,55 @@ class EroProfileIE(InfoExtractor):
'thumbnail': 're:https?://.*\.jpg',
'age_limit': 18,
}
- }
+ }, {
+ 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
+ 'md5': '1baa9602ede46ce904c431f5418d8916',
+ 'info_dict': {
+ 'id': '1133519',
+ 'ext': 'm4v',
+ 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
+ 'thumbnail': 're:https?://.*\.jpg',
+ 'age_limit': 18,
+ },
+ 'skip': 'Requires login',
+ }]
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ query = compat_urllib_parse.urlencode({
+ 'username': username,
+ 'password': password,
+ 'url': 'http://www.eroprofile.com/',
+ })
+ login_url = self._LOGIN_URL + query
+ login_page = self._download_webpage(login_url, None, False)
+
+ m = re.search(r'Your username or password was incorrect\.', login_page)
+ if m:
+ raise ExtractorError(
+ 'Wrong username and/or password.', expected=True)
+
+ self.report_login()
+ redirect_url = self._search_regex(
+ r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
+ self._download_webpage(redirect_url, None, False)
+
+ def _real_initialize(self):
+ self._login()
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
+ m = re.search(r'You must be logged in to view this video\.', webpage)
+ if m:
+ raise ExtractorError(
+ 'This video requires login. Please specify a username and password and try again.', expected=True)
+
video_id = self._search_regex(
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
webpage, 'video id', default=None)
diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py
index 36ba33128..c826a5404 100644
--- a/youtube_dl/extractor/extremetube.py
+++ b/youtube_dl/extractor/extremetube.py
@@ -4,11 +4,11 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_urllib_parse_urlparse,
+ compat_parse_qs,
compat_urllib_request,
- compat_urllib_parse,
)
from ..utils import (
+ qualities,
str_to_int,
)
@@ -17,7 +17,7 @@ class ExtremeTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<url>extremetube\.com/.*?video/.+?(?P<id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
- 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
+ 'md5': '344d0c6d50e2f16b06e49ca011d8ac69',
'info_dict': {
'id': '652431',
'ext': 'mp4',
@@ -49,19 +49,27 @@ class ExtremeTubeIE(InfoExtractor):
r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
webpage, 'view count', fatal=False))
- video_url = compat_urllib_parse.unquote(self._html_search_regex(
- r'video_url=(.+?)&amp;', webpage, 'video_url'))
- path = compat_urllib_parse_urlparse(video_url).path
- format = path.split('/')[5].split('_')[:2]
- format = "-".join(format)
+ flash_vars = compat_parse_qs(self._search_regex(
+ r'<param[^>]+?name="flashvars"[^>]+?value="([^"]+)"', webpage, 'flash vars'))
+
+ formats = []
+ quality = qualities(['180p', '240p', '360p', '480p', '720p', '1080p'])
+ for k, vals in flash_vars.items():
+ m = re.match(r'quality_(?P<quality>[0-9]+p)$', k)
+ if m is not None:
+ formats.append({
+ 'format_id': m.group('quality'),
+ 'quality': quality(m.group('quality')),
+ 'url': vals[0],
+ })
+
+ self._sort_formats(formats)
return {
'id': video_id,
'title': video_title,
+ 'formats': formats,
'uploader': uploader,
'view_count': view_count,
- 'url': video_url,
- 'format': format,
- 'format_id': format,
'age_limit': 18,
}
diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dl/extractor/footyroom.py
new file mode 100644
index 000000000..2b4691ae8
--- /dev/null
+++ b/youtube_dl/extractor/footyroom.py
@@ -0,0 +1,41 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class FootyRoomIE(InfoExtractor):
+ _VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
+ 'info_dict': {
+ 'id': 'schalke-04-0-2-real-madrid-2015-02',
+ 'title': 'Schalke 04 0 – 2 Real Madrid',
+ },
+ 'playlist_count': 3,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'VideoSelector\.load\((\[.+?\])\);', webpage, 'video selector'),
+ playlist_id)
+
+ playlist_title = self._og_search_title(webpage)
+
+ entries = []
+ for video in playlist:
+ payload = video.get('payload')
+ if not payload:
+ continue
+ playwire_url = self._search_regex(
+ r'data-config="([^"]+)"', payload,
+ 'playwire url', default=None)
+ if playwire_url:
+ entries.append(self.url_result(playwire_url, 'Playwire'))
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index a49fc1151..dd87257c4 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -50,7 +50,6 @@ class FunnyOrDieIE(InfoExtractor):
bitrates.sort()
formats = []
-
for bitrate in bitrates:
for link in links:
formats.append({
@@ -59,6 +58,13 @@ class FunnyOrDieIE(InfoExtractor):
'vbr': bitrate,
})
+ subtitles = {}
+ for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
+ subtitles[src_lang] = [{
+ 'ext': src.split('/')[-1],
+ 'url': 'http://www.funnyordie.com%s' % src,
+ }]
+
post_json = self._search_regex(
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details')
post = json.loads(post_json)
@@ -69,4 +75,5 @@ class FunnyOrDieIE(InfoExtractor):
'description': post.get('description'),
'thumbnail': post.get('picture'),
'formats': formats,
+ 'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dl/extractor/gazeta.py
new file mode 100644
index 000000000..ea32b621c
--- /dev/null
+++ b/youtube_dl/extractor/gazeta.py
@@ -0,0 +1,38 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class GazetaIE(InfoExtractor):
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:(?:main|\d{4}/\d{2}/\d{2})/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
+ _TESTS = [{
+ 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',
+ 'md5': 'd49c9bdc6e5a7888f27475dc215ee789',
+ 'info_dict': {
+ 'id': '205566',
+ 'ext': 'mp4',
+ 'title': '«70–80 процентов гражданских в Донецке на грани голода»',
+ 'description': 'md5:38617526050bd17b234728e7f9620a71',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'http://www.gazeta.ru/lifestyle/video/2015/03/08/master-klass_krasivoi_byt._delaem_vesennii_makiyazh.shtml',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ display_id = mobj.group('id')
+ embed_url = '%s?p=embed' % mobj.group('url')
+ embed_page = self._download_webpage(
+ embed_url, display_id, 'Downloading embed page')
+
+ video_id = self._search_regex(
+ r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id')
+
+ return self.url_result(
+ 'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform')
diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py
index f7b467b0a..51796f3a4 100644
--- a/youtube_dl/extractor/gdcvault.py
+++ b/youtube_dl/extractor/gdcvault.py
@@ -12,6 +12,7 @@ from ..utils import remove_end
class GDCVaultIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
+ _NETRC_MACHINE = 'gdcvault'
_TESTS = [
{
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 27e2bc300..2ff002643 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -26,8 +26,10 @@ from ..utils import (
unsmuggle_url,
UnsupportedError,
url_basename,
+ xpath_text,
)
from .brightcove import BrightcoveIE
+from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .smotri import SmotriIE
@@ -526,6 +528,17 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Viddler'],
},
+ # Libsyn embed
+ {
+ 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
+ 'info_dict': {
+ 'id': '3377616',
+ 'ext': 'mp3',
+ 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+ 'description': 'md5:601cb790edd05908957dae8aaa866465',
+ 'upload_date': '20150220',
+ },
+ },
# jwplayer YouTube
{
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
@@ -569,6 +582,75 @@ class GenericIE(InfoExtractor):
'title': 'John Carlson Postgame 2/25/15',
},
},
+ # Eagle.Platform embed (generic URL)
+ {
+ 'url': 'http://lenta.ru/news/2015/03/06/navalny/',
+ 'info_dict': {
+ 'id': '227304',
+ 'ext': 'mp4',
+ 'title': 'Навальный вышел на свободу',
+ 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 87,
+ 'view_count': int,
+ 'age_limit': 0,
+ },
+ },
+ # ClipYou (Eagle.Platform) embed (custom URL)
+ {
+ 'url': 'http://muz-tv.ru/play/7129/',
+ 'info_dict': {
+ 'id': '12820',
+ 'ext': 'mp4',
+ 'title': "'O Sole Mio",
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 216,
+ 'view_count': int,
+ },
+ },
+ # Pladform embed
+ {
+ 'url': 'http://muz-tv.ru/kinozal/view/7400/',
+ 'info_dict': {
+ 'id': '100183293',
+ 'ext': 'mp4',
+ 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
+ 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 694,
+ 'age_limit': 0,
+ },
+ },
+ # 5min embed
+ {
+ 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
+ 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
+ 'info_dict': {
+ 'id': '518726732',
+ 'ext': 'mp4',
+ 'title': 'Facebook Creates "On This Day" | Crunch Report',
+ },
+ },
+ # RSS feed with enclosure
+ {
+ 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
+ 'info_dict': {
+ 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+ 'ext': 'm4v',
+ 'upload_date': '20150228',
+ 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+ }
+ },
+ # NBC Sports vplayer embed
+ {
+ 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
+ 'info_dict': {
+ 'id': 'ln7x1qSThw4k',
+ 'ext': 'flv',
+ 'title': "PFT Live: New leader in the 'new-look' defense",
+ 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+ },
+ }
]
def report_following_redirect(self, new_url):
@@ -580,11 +662,24 @@ class GenericIE(InfoExtractor):
playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
- entries = [{
- '_type': 'url',
- 'url': e.find('link').text,
- 'title': e.find('title').text,
- } for e in doc.findall('./channel/item')]
+ entries = []
+ for it in doc.findall('./channel/item'):
+ next_url = xpath_text(it, 'link', fatal=False)
+ if not next_url:
+ enclosure_nodes = it.findall('./enclosure')
+ for e in enclosure_nodes:
+ next_url = e.attrib.get('url')
+ if next_url:
+ break
+
+ if not next_url:
+ continue
+
+ entries.append({
+ '_type': 'url',
+ 'url': next_url,
+ 'title': it.find('title').text,
+ })
return {
'_type': 'playlist',
@@ -943,6 +1038,19 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'))
+ # Look for NYTimes player
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for Libsyn player
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
# Look for Ooyala videos
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
@@ -1131,6 +1239,35 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura')
+ # Look for Eagle.Platform embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'EaglePlatform')
+
+ # Look for ClipYou (uses Eagle.Platform) embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
+ if mobj is not None:
+ return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
+
+ # Look for Pladform embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://out\.pladform\.ru/player\?.+?)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Pladform')
+
+ # Look for 5min embeds
+ mobj = re.search(
+ r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
+ if mobj is not None:
+ return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
+
+ # Look for NBC Sports VPlayer embeds
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
@@ -1187,10 +1324,16 @@ class GenericIE(InfoExtractor):
# HTML5 video
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
if not found:
+ REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
- r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
webpage)
+ if not found:
+ # Look also in Refresh HTTP header
+ refresh_header = head_response.headers.get('Refresh')
+ if refresh_header:
+ found = re.search(REDIRECT_REGEX, refresh_header)
if found:
new_url = found.group(1)
self.report_following_redirect(new_url)
diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py
index 29638a194..8a95793ca 100644
--- a/youtube_dl/extractor/globo.py
+++ b/youtube_dl/extractor/globo.py
@@ -20,7 +20,7 @@ class GloboIE(InfoExtractor):
_VALID_URL = 'https?://.+?\.globo\.com/(?P<id>.+)'
_API_URL_TEMPLATE = 'http://api.globovideos.com/videos/%s/playlist'
- _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=2.9.9.50&resource_id=%s'
+ _SECURITY_URL_TEMPLATE = 'http://security.video.globo.com/videos/%s/hash?player=flash&version=17.0.0.132&resource_id=%s'
_VIDEOID_REGEXES = [
r'\bdata-video-id="(\d+)"',
diff --git a/youtube_dl/extractor/grooveshark.py b/youtube_dl/extractor/grooveshark.py
index 848d17beb..36ad4915c 100644
--- a/youtube_dl/extractor/grooveshark.py
+++ b/youtube_dl/extractor/grooveshark.py
@@ -140,9 +140,9 @@ class GroovesharkIE(InfoExtractor):
if webpage is not None:
o = GroovesharkHtmlParser.extract_object_tags(webpage)
- return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
+ return webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']
- return (webpage, None)
+ return webpage, None
def _real_initialize(self):
self.ts = int(time.time() * 1000) # timestamp in millis
@@ -154,7 +154,7 @@ class GroovesharkIE(InfoExtractor):
swf_referer = None
if self.do_playerpage_request:
(_, player_objs) = self._get_playerpage(url)
- if player_objs is not None:
+ if player_objs:
swf_referer = self._build_swf_referer(url, player_objs[0])
self.to_screen('SWF Referer: %s' % swf_referer)
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py
index 8094cc2e4..d0720ff56 100644
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
@@ -15,10 +14,10 @@ class JeuxVideoIE(InfoExtractor):
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',
'md5': '046e491afb32a8aaac1f44dd4ddd54ee',
'info_dict': {
- 'id': '5182',
+ 'id': '114765',
'ext': 'mp4',
- 'title': 'GC 2013 : Tearaway nous présente ses papiers d\'identité',
- 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.\n',
+ 'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité',
+ 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.',
},
}
@@ -26,26 +25,29 @@ class JeuxVideoIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
title = mobj.group(1)
webpage = self._download_webpage(url, title)
- xml_link = self._html_search_regex(
- r'<param name="flashvars" value="config=(.*?)" />',
+ title = self._html_search_meta('name', webpage)
+ config_url = self._html_search_regex(
+ r'data-src="(/contenu/medias/video.php.*?)"',
webpage, 'config URL')
+ config_url = 'http://www.jeuxvideo.com' + config_url
video_id = self._search_regex(
- r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
- xml_link, 'video ID')
+ r'id=(\d+)',
+ config_url, 'video ID')
- config = self._download_xml(
- xml_link, title, 'Downloading XML config')
- info_json = config.find('format.json').text
- info = json.loads(info_json)['versions'][0]
+ config = self._download_json(
+ config_url, title, 'Downloading JSON config')
- video_url = 'http://video720.jeuxvideo.com/' + info['file']
+ formats = [{
+ 'url': source['file'],
+ 'format_id': source['label'],
+ 'resolution': source['label'],
+ } for source in reversed(config['sources'])]
return {
'id': video_id,
- 'title': config.find('titre_video').text,
- 'ext': 'mp4',
- 'url': video_url,
+ 'title': title,
+ 'formats': formats,
'description': self._og_search_description(webpage),
- 'thumbnail': config.find('image').text,
+ 'thumbnail': config.get('image'),
}
diff --git a/youtube_dl/extractor/kanalplay.py b/youtube_dl/extractor/kanalplay.py
new file mode 100644
index 000000000..2bb078036
--- /dev/null
+++ b/youtube_dl/extractor/kanalplay.py
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+)
+
+
+class KanalPlayIE(InfoExtractor):
+ IE_DESC = 'Kanal 5/9/11 Play'
+ _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
+ 'info_dict': {
+ 'id': '3270012277',
+ 'ext': 'flv',
+ 'title': 'Saknar både dusch och avlopp',
+ 'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
+ 'duration': 2636.36,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
+ 'only_matching': True,
+ }]
+
+ def _fix_subtitles(self, subs):
+ return '\r\n\r\n'.join(
+ '%s\r\n%s --> %s\r\n%s'
+ % (
+ num,
+ self._subtitles_timecode(item['startMillis'] / 1000.0),
+ self._subtitles_timecode(item['endMillis'] / 1000.0),
+ item['text'],
+ ) for num, item in enumerate(subs, 1))
+
+ def _get_subtitles(self, channel_id, video_id):
+ subs = self._download_json(
+ 'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
+ video_id, 'Downloading subtitles JSON', fatal=False)
+ return {'se': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ channel_id = mobj.group('channel_id')
+
+ video = self._download_json(
+ 'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
+ video_id)
+
+ reasons_for_no_streams = video.get('reasonsForNoStreams')
+ if reasons_for_no_streams:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
+ expected=True)
+
+ title = video['title']
+ description = video.get('description')
+ duration = float_or_none(video.get('length'), 1000)
+ thumbnail = video.get('posterUrl')
+
+ stream_base_url = video['streamBaseUrl']
+
+ formats = [{
+ 'url': stream_base_url,
+ 'play_path': stream['source'],
+ 'ext': 'flv',
+ 'tbr': float_or_none(stream.get('bitrate'), 1000),
+ 'rtmp_real_time': True,
+ } for stream in video['streams']]
+ self._sort_formats(formats)
+
+ subtitles = {}
+ if video.get('hasSubtitle'):
+ subtitles = self.extract_subtitles(channel_id, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dl/extractor/krasview.py b/youtube_dl/extractor/krasview.py
index e46954b47..96f95979a 100644
--- a/youtube_dl/extractor/krasview.py
+++ b/youtube_dl/extractor/krasview.py
@@ -40,8 +40,10 @@ class KrasViewIE(InfoExtractor):
description = self._og_search_description(webpage, default=None)
thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
duration = int_or_none(flashvars.get('duration'))
- width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
- height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
+ width = int_or_none(self._og_search_property(
+ 'video:width', webpage, 'video width', default=None))
+ height = int_or_none(self._og_search_property(
+ 'video:height', webpage, 'video height', default=None))
return {
'id': video_id,
diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py
index 583ce35b9..1484ac0d2 100644
--- a/youtube_dl/extractor/letv.py
+++ b/youtube_dl/extractor/letv.py
@@ -7,8 +7,9 @@ import time
from .common import InfoExtractor
from ..compat import (
- compat_urlparse,
compat_urllib_parse,
+ compat_urllib_request,
+ compat_urlparse,
)
from ..utils import (
determine_ext,
@@ -39,12 +40,20 @@ class LetvIE(InfoExtractor):
'title': '美人天下01',
'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
},
- 'expected_warnings': [
- 'publish time'
- ]
+ }, {
+ 'note': 'This video is available only in Mainland China, thus a proxy is needed',
+ 'url': 'http://www.letv.com/ptv/vplay/1118082.html',
+ 'md5': 'f80936fbe20fb2f58648e81386ff7927',
+ 'info_dict': {
+ 'id': '1118082',
+ 'ext': 'mp4',
+ 'title': '与龙共舞 完整版',
+ 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986',
+ },
+ 'params': {
+ 'cn_verification_proxy': 'http://proxy.uku.im:8888'
+ },
}]
- # http://www.letv.com/ptv/vplay/1118082.html
- # This video is available only in Mainland China
@staticmethod
def urshift(val, n):
@@ -76,9 +85,16 @@ class LetvIE(InfoExtractor):
'tkey': self.calc_time_key(int(time.time())),
'domain': 'www.letv.com'
}
+ play_json_req = compat_urllib_request.Request(
+ 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
+ )
+ cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+ if cn_verification_proxy:
+ play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
+
play_json = self._download_json(
- 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params),
- media_id, 'playJson data')
+ play_json_req,
+ media_id, 'Downloading playJson data')
# Check for errors
playstatus = play_json['playstatus']
@@ -114,7 +130,8 @@ class LetvIE(InfoExtractor):
url_info_dict = {
'url': media_url,
- 'ext': determine_ext(dispatch[format_id][1])
+ 'ext': determine_ext(dispatch[format_id][1]),
+ 'format_id': format_id,
}
if format_id[-1:] == 'p':
@@ -123,7 +140,7 @@ class LetvIE(InfoExtractor):
urls.append(url_info_dict)
publish_time = parse_iso8601(self._html_search_regex(
- r'发布时间&nbsp;([^<>]+) ', page, 'publish time', fatal=False),
+ r'发布时间&nbsp;([^<>]+) ', page, 'publish time', default=None),
delimiter=' ', timezone=datetime.timedelta(hours=8))
description = self._html_search_meta('description', page, fatal=False)
diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py
new file mode 100644
index 000000000..9ab1416f5
--- /dev/null
+++ b/youtube_dl/extractor/libsyn.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unified_strdate
+
+
+class LibsynIE(InfoExtractor):
+ _VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
+
+ _TEST = {
+ 'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
+ 'md5': '443360ee1b58007bc3dcf09b41d093bb',
+ 'info_dict': {
+ 'id': '3377616',
+ 'ext': 'mp3',
+ 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+ 'description': 'md5:601cb790edd05908957dae8aaa866465',
+ 'upload_date': '20150220',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ formats = [{
+ 'url': media_url,
+ } for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
+
+ podcast_title = self._search_regex(
+ r'<h2>([^<]+)</h2>', webpage, 'title')
+ episode_title = self._search_regex(
+ r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
+
+ title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
+
+ description = self._html_search_regex(
+ r'<div id="info_text_body">(.+?)</div>', webpage,
+ 'description', fatal=False)
+
+ thumbnail = self._search_regex(
+ r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+
+ release_date = unified_strdate(self._search_regex(
+ r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': release_date,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index 3642089f7..2467f8bdd 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
import re
import json
+import itertools
from .common import InfoExtractor
from ..compat import (
@@ -41,6 +42,13 @@ class LivestreamIE(InfoExtractor):
},
'playlist_mincount': 4,
}, {
+ 'url': 'http://new.livestream.com/chess24/tatasteelchess',
+ 'info_dict': {
+ 'title': 'Tata Steel Chess',
+ 'id': '3705884',
+ },
+ 'playlist_mincount': 60,
+ }, {
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
'only_matching': True,
}]
@@ -117,6 +125,30 @@ class LivestreamIE(InfoExtractor):
'view_count': video_data.get('views'),
}
+ def _extract_event(self, info):
+ event_id = compat_str(info['id'])
+ account = compat_str(info['owner_account_id'])
+ root_url = (
+ 'https://new.livestream.com/api/accounts/{account}/events/{event}/'
+ 'feed.json'.format(account=account, event=event_id))
+
+ def _extract_videos():
+ last_video = None
+ for i in itertools.count(1):
+ if last_video is None:
+ info_url = root_url
+ else:
+ info_url = '{root}?&id={id}&newer=-1&type=video'.format(
+ root=root_url, id=last_video)
+ videos_info = self._download_json(info_url, event_id, 'Downloading page {0}'.format(i))['data']
+ videos_info = [v['data'] for v in videos_info if v['type'] == 'video']
+ if not videos_info:
+ break
+ for v in videos_info:
+ yield self._extract_video_info(v)
+ last_video = videos_info[-1]['id']
+ return self.playlist_result(_extract_videos(), event_id, info['full_name'])
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
@@ -144,14 +176,13 @@ class LivestreamIE(InfoExtractor):
result = result and compat_str(vdata['data']['id']) == vid
return result
- videos = [self._extract_video_info(video_data['data'])
- for video_data in info['feed']['data']
- if is_relevant(video_data, video_id)]
if video_id is None:
# This is an event page:
- return self.playlist_result(
- videos, '%s' % info['id'], info['full_name'])
+ return self._extract_event(info)
else:
+ videos = [self._extract_video_info(video_data['data'])
+ for video_data in info['feed']['data']
+ if is_relevant(video_data, video_id)]
if not videos:
raise ExtractorError('Cannot find video %s' % video_id)
return videos[0]
diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py
index 9c2fbdd96..e3236f7b5 100644
--- a/youtube_dl/extractor/lrt.py
+++ b/youtube_dl/extractor/lrt.py
@@ -52,6 +52,7 @@ class LRTIE(InfoExtractor):
'url': data['streamer'],
'play_path': 'mp4:%s' % data['file'],
'preference': -1,
+ 'rtmp_real_time': True,
})
else:
formats.extend(
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py
index 5dc22da22..cfd3b14f4 100644
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -15,18 +15,73 @@ from ..utils import (
)
-class LyndaIE(InfoExtractor):
+class LyndaBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
+ _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
+ _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
+ _NETRC_MACHINE = 'lynda'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+
+ login_form = {
+ 'username': username,
+ 'password': password,
+ 'remember': 'false',
+ 'stayPut': 'false'
+ }
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
+ login_page = self._download_webpage(
+ request, None, 'Logging in as %s' % username)
+
+ # Not (yet) logged in
+ m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
+ if m is not None:
+ response = m.group('json')
+ response_json = json.loads(response)
+ state = response_json['state']
+
+ if state == 'notlogged':
+ raise ExtractorError(
+ 'Unable to login, incorrect username and/or password',
+ expected=True)
+
+ # This is when we get popup:
+ # > You're already logged in to lynda.com on two devices.
+ # > If you log in here, we'll log you out of another device.
+ # So, we need to confirm this.
+ if state == 'conflicted':
+ confirm_form = {
+ 'username': '',
+ 'password': '',
+ 'resolve': 'true',
+ 'remember': 'false',
+ 'stayPut': 'false',
+ }
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
+ login_page = self._download_webpage(
+ request, None,
+ 'Confirming log in and log out from another device')
+
+ if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
+ raise ExtractorError('Unable to log in')
+
+
+class LyndaIE(LyndaBaseIE):
IE_NAME = 'lynda'
IE_DESC = 'lynda.com videos'
- _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(\d+)'
- _LOGIN_URL = 'https://www.lynda.com/login/login.aspx'
+ _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)'
_NETRC_MACHINE = 'lynda'
- _SUCCESSFUL_LOGIN_REGEX = r'isLoggedIn: true'
_TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
- ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
-
_TESTS = [{
'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
@@ -41,23 +96,22 @@ class LyndaIE(InfoExtractor):
'only_matching': True,
}]
- def _real_initialize(self):
- self._login()
-
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group(1)
+ video_id = self._match_id(url)
- page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, video_id,
- 'Downloading video JSON')
+ page = self._download_webpage(
+ 'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
+ video_id, 'Downloading video JSON')
video_json = json.loads(page)
if 'Status' in video_json:
- raise ExtractorError('lynda returned error: %s' % video_json['Message'], expected=True)
+ raise ExtractorError(
+ 'lynda returned error: %s' % video_json['Message'], expected=True)
if video_json['HasAccess'] is False:
raise ExtractorError(
- 'Video %s is only available for members. ' % video_id + self.ACCOUNT_CREDENTIALS_HINT, expected=True)
+ 'Video %s is only available for members. '
+ % video_id + self._ACCOUNT_CREDENTIALS_HINT, expected=True)
video_id = compat_str(video_json['ID'])
duration = video_json['DurationInSeconds']
@@ -100,50 +154,9 @@ class LyndaIE(InfoExtractor):
'formats': formats
}
- def _login(self):
- (username, password) = self._get_login_info()
- if username is None:
- return
-
- login_form = {
- 'username': username,
- 'password': password,
- 'remember': 'false',
- 'stayPut': 'false'
- }
- request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
- login_page = self._download_webpage(request, None, 'Logging in as %s' % username)
-
- # Not (yet) logged in
- m = re.search(r'loginResultJson = \'(?P<json>[^\']+)\';', login_page)
- if m is not None:
- response = m.group('json')
- response_json = json.loads(response)
- state = response_json['state']
-
- if state == 'notlogged':
- raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
-
- # This is when we get popup:
- # > You're already logged in to lynda.com on two devices.
- # > If you log in here, we'll log you out of another device.
- # So, we need to confirm this.
- if state == 'conflicted':
- confirm_form = {
- 'username': '',
- 'password': '',
- 'resolve': 'true',
- 'remember': 'false',
- 'stayPut': 'false',
- }
- request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(confirm_form))
- login_page = self._download_webpage(request, None, 'Confirming log in and log out from another device')
-
- if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
- raise ExtractorError('Unable to log in')
-
def _fix_subtitles(self, subs):
srt = ''
+ seq_counter = 0
for pos in range(0, len(subs) - 1):
seq_current = subs[pos]
m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
@@ -155,8 +168,10 @@ class LyndaIE(InfoExtractor):
continue
appear_time = m_current.group('timecode')
disappear_time = m_next.group('timecode')
- text = seq_current['Caption'].lstrip()
- srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
+ text = seq_current['Caption'].strip()
+ if text:
+ seq_counter += 1
+ srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text)
if srt:
return srt
@@ -169,7 +184,7 @@ class LyndaIE(InfoExtractor):
return {}
-class LyndaCourseIE(InfoExtractor):
+class LyndaCourseIE(LyndaBaseIE):
IE_NAME = 'lynda:course'
IE_DESC = 'lynda.com online courses'
@@ -182,35 +197,37 @@ class LyndaCourseIE(InfoExtractor):
course_path = mobj.group('coursepath')
course_id = mobj.group('courseid')
- page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
- course_id, 'Downloading course JSON')
+ page = self._download_webpage(
+ 'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
+ course_id, 'Downloading course JSON')
course_json = json.loads(page)
if 'Status' in course_json and course_json['Status'] == 'NotFound':
- raise ExtractorError('Course %s does not exist' % course_id, expected=True)
+ raise ExtractorError(
+ 'Course %s does not exist' % course_id, expected=True)
unaccessible_videos = 0
videos = []
- (username, _) = self._get_login_info()
# Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided
# by single video API anymore
for chapter in course_json['Chapters']:
for video in chapter['Videos']:
- if username is None and video['HasAccess'] is False:
+ if video['HasAccess'] is False:
unaccessible_videos += 1
continue
videos.append(video['ID'])
if unaccessible_videos > 0:
- self._downloader.report_warning('%s videos are only available for members and will not be downloaded. '
- % unaccessible_videos + LyndaIE.ACCOUNT_CREDENTIALS_HINT)
+ self._downloader.report_warning(
+ '%s videos are only available for members (or paid members) and will not be downloaded. '
+ % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT)
entries = [
- self.url_result('http://www.lynda.com/%s/%s-4.html' %
- (course_path, video_id),
- 'Lynda')
+ self.url_result(
+ 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id),
+ 'Lynda')
for video_id in videos]
course_title = course_json['Title']
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index 1831c6749..21aea0c55 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -1,6 +1,7 @@
from __future__ import unicode_literals
import re
+import itertools
from .common import InfoExtractor
from ..compat import (
@@ -10,7 +11,6 @@ from ..utils import (
ExtractorError,
HEADRequest,
str_to_int,
- parse_iso8601,
)
@@ -27,8 +27,6 @@ class MixcloudIE(InfoExtractor):
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
'uploader': 'Daniel Holbach',
'uploader_id': 'dholbach',
- 'upload_date': '20111115',
- 'timestamp': 1321359578,
'thumbnail': 're:https?://.*\.jpg',
'view_count': int,
'like_count': int,
@@ -37,31 +35,30 @@ class MixcloudIE(InfoExtractor):
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
'info_dict': {
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
- 'ext': 'm4a',
- 'title': 'Electric Relaxation vol. 3',
+ 'ext': 'mp3',
+ 'title': 'Caribou 7 inch Vinyl Mix & Chat',
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
- 'uploader': 'Daniel Drumz',
+ 'uploader': 'Gilles Peterson Worldwide',
'uploader_id': 'gillespeterson',
- 'thumbnail': 're:https?://.*\.jpg',
+ 'thumbnail': 're:https?://.*/images/',
'view_count': int,
'like_count': int,
},
}]
- def _get_url(self, track_id, template_url):
- server_count = 30
- for i in range(server_count):
- url = template_url % i
+ def _get_url(self, track_id, template_url, server_number):
+ boundaries = (1, 30)
+ for nr in server_numbers(server_number, boundaries):
+ url = template_url % nr
try:
# We only want to know if the request succeed
# don't download the whole file
self._request_webpage(
HEADRequest(url), track_id,
- 'Checking URL %d/%d ...' % (i + 1, server_count + 1))
+ 'Checking URL %d/%d ...' % (nr, boundaries[-1]))
return url
except ExtractorError:
pass
-
return None
def _real_extract(self, url):
@@ -75,17 +72,18 @@ class MixcloudIE(InfoExtractor):
preview_url = self._search_regex(
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
song_url = preview_url.replace('/previews/', '/c/originals/')
+ server_number = int(self._search_regex(r'stream(\d+)', song_url, 'server number'))
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
- final_song_url = self._get_url(track_id, template_url)
+ final_song_url = self._get_url(track_id, template_url, server_number)
if final_song_url is None:
self.to_screen('Trying with m4a extension')
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
- final_song_url = self._get_url(track_id, template_url)
+ final_song_url = self._get_url(track_id, template_url, server_number)
if final_song_url is None:
raise ExtractorError('Unable to extract track url')
PREFIX = (
- r'<span class="play-button[^"]*?"'
+ r'm-play-on-spacebar[^>]+'
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
title = self._html_search_regex(
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
@@ -99,16 +97,12 @@ class MixcloudIE(InfoExtractor):
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
description = self._og_search_description(webpage)
like_count = str_to_int(self._search_regex(
- [r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
- r'/favorites/?">([0-9]+)<'],
+ r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"',
webpage, 'like count', fatal=False))
view_count = str_to_int(self._search_regex(
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
r'/listeners/?">([0-9,.]+)</a>'],
webpage, 'play count', fatal=False))
- timestamp = parse_iso8601(self._search_regex(
- r'<time itemprop="dateCreated" datetime="([^"]+)">',
- webpage, 'upload date', default=None))
return {
'id': track_id,
@@ -118,7 +112,38 @@ class MixcloudIE(InfoExtractor):
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
- 'timestamp': timestamp,
'view_count': view_count,
'like_count': like_count,
}
+
+
+def server_numbers(first, boundaries):
+ """ Server numbers to try in descending order of probable availability.
+ Starting from first (i.e. the number of the server hosting the preview file)
+ and going further and further up to the higher boundary and down to the
+ lower one in an alternating fashion. Namely:
+
+ server_numbers(2, (1, 5))
+
+ # Where the preview server is 2, min number is 1 and max is 5.
+ # Yields: 2, 3, 1, 4, 5
+
+ Why not random numbers or increasing sequences? Since from what I've seen,
+ full length files seem to be hosted on servers whose number is closer to
+ that of the preview; to be confirmed.
+ """
+ zip_longest = getattr(itertools, 'zip_longest', None)
+ if zip_longest is None:
+ # python 2.x
+ zip_longest = itertools.izip_longest
+
+ if len(boundaries) != 2:
+ raise ValueError("boundaries should be a two-element tuple")
+ min, max = boundaries
+ highs = range(first + 1, max + 1)
+ lows = range(first - 1, min - 1, -1)
+ rest = filter(
+ None, itertools.chain.from_iterable(zip_longest(highs, lows)))
+ yield first
+ for n in rest:
+ yield n
diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py
index 1a241aca7..e369551c2 100644
--- a/youtube_dl/extractor/mlb.py
+++ b/youtube_dl/extractor/mlb.py
@@ -10,7 +10,7 @@ from ..utils import (
class MLBIE(InfoExtractor):
- _VALID_URL = r'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
+ _VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
_TESTS = [
{
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
@@ -80,6 +80,10 @@ class MLBIE(InfoExtractor):
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
'only_matching': True,
},
+ {
+ 'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
+ 'only_matching': True,
+ }
]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index 3645d3033..ecd0ac8b1 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -14,7 +14,7 @@ from ..utils import (
class NBCIE(InfoExtractor):
- _VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
+ _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
_TESTS = [
{
@@ -50,6 +50,57 @@ class NBCIE(InfoExtractor):
return self.url_result(theplatform_url)
+class NBCSportsVPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+
+ _TESTS = [{
+ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ }
+ }, {
+ 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ iframe_m = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
+ if iframe_m:
+ return iframe_m.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ theplatform_url = self._og_search_video_url(webpage)
+ return self.url_result(theplatform_url, 'ThePlatform')
+
+
+class NBCSportsIE(InfoExtractor):
+ # Does not include https becuase its certificate is invalid
+ _VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+
+ _TEST = {
+ 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
+ 'info_dict': {
+ 'id': 'PHJSaFWbrTY9',
+ 'ext': 'flv',
+ 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
+ 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ return self.url_result(
+ NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+
+
class NBCNewsIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
(?:video/.+?/(?P<id>\d+)|
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index 4c1890416..ddec7b338 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -22,7 +22,7 @@ class NiconicoIE(InfoExtractor):
IE_NAME = 'niconico'
IE_DESC = 'ニコニコ動画'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.nicovideo.jp/watch/sm22312215',
'md5': 'd1a75c0823e2f629128c43e1212760f9',
'info_dict': {
@@ -39,9 +39,26 @@ class NiconicoIE(InfoExtractor):
'username': 'ydl.niconico@gmail.com',
'password': 'youtube-dl',
},
- }
+ }, {
+ 'url': 'http://www.nicovideo.jp/watch/nm14296458',
+ 'md5': '8db08e0158457cf852a31519fceea5bc',
+ 'info_dict': {
+ 'id': 'nm14296458',
+ 'ext': 'swf',
+ 'title': '【鏡音リン】Dance on media【オリジナル】take2!',
+ 'description': 'md5:',
+ 'uploader': 'りょうた',
+ 'uploader_id': '18822557',
+ 'upload_date': '20110429',
+ 'duration': 209,
+ },
+ 'params': {
+ 'username': 'ydl.niconico@gmail.com',
+ 'password': 'youtube-dl',
+ },
+ }]
- _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
_NETRC_MACHINE = 'niconico'
# Determine whether the downloader used authentication to download video
_AUTHENTICATED = False
@@ -76,8 +93,7 @@ class NiconicoIE(InfoExtractor):
return True
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group(1)
+ video_id = self._match_id(url)
# Get video webpage. We are not actually interested in it, but need
# the cookies in order to be able to download the info webpage
@@ -90,7 +106,7 @@ class NiconicoIE(InfoExtractor):
if self._AUTHENTICATED:
# Get flv info
flv_info_webpage = self._download_webpage(
- 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
+ 'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
video_id, 'Downloading flv info')
else:
# Get external player info
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index 9c01eb0af..5d8448571 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -219,7 +219,8 @@ class NPOLiveIE(NPOBaseIE):
if streams:
for stream in streams:
stream_type = stream.get('type').lower()
- if stream_type == 'ss':
+ # smooth streaming is not supported
+ if stream_type in ['ss', 'ms']:
continue
stream_info = self._download_json(
'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp'
@@ -230,7 +231,10 @@ class NPOLiveIE(NPOBaseIE):
stream_url = self._download_json(
stream_info['stream'], display_id,
'Downloading %s URL' % stream_type,
- transform_source=strip_jsonp)
+ 'Unable to download %s URL' % stream_type,
+ transform_source=strip_jsonp, fatal=False)
+ if not stream_url:
+ continue
if stream_type == 'hds':
f4m_formats = self._extract_f4m_formats(stream_url, display_id)
# f4m downloader downloads only piece of live stream
@@ -242,6 +246,7 @@ class NPOLiveIE(NPOBaseIE):
else:
formats.append({
'url': stream_url,
+ 'preference': -10,
})
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index 1e4cfa2e7..e91d3a248 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -14,46 +14,48 @@ from ..utils import (
class NRKIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
+ _VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
_TESTS = [
{
- 'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
- 'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
+ 'url': 'http://www.nrk.no/video/PS*150533',
+ 'md5': 'bccd850baebefe23b56d708a113229c2',
'info_dict': {
'id': '150533',
'ext': 'flv',
'title': 'Dompap og andre fugler i Piip-Show',
- 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
+ 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
+ 'duration': 263,
}
},
{
- 'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
- 'md5': '3471f2a51718195164e88f46bf427668',
+ 'url': 'http://www.nrk.no/video/PS*154915',
+ 'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
'info_dict': {
'id': '154915',
'ext': 'flv',
'title': 'Slik høres internett ut når du er blind',
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+ 'duration': 20,
}
},
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- page = self._download_webpage(url, video_id)
-
- video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
+ video_id = self._match_id(url)
data = self._download_json(
- 'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
+ 'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
+ video_id, 'Downloading media JSON')
if data['usageRights']['isGeoBlocked']:
- raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
+ raise ExtractorError(
+ 'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge',
+ expected=True)
+
+ video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
- video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
+ duration = parse_duration(data.get('duration'))
images = data.get('images')
if images:
@@ -69,10 +71,51 @@ class NRKIE(InfoExtractor):
'ext': 'flv',
'title': data['title'],
'description': data['description'],
+ 'duration': duration,
'thumbnail': thumbnail,
}
+class NRKPlaylistIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'info_dict': {
+ 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'title': 'Gjenopplev den historiske solformørkelsen',
+ 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
+ 'info_dict': {
+ 'id': 'rivertonprisen-til-karin-fossum-1.12266449',
+ 'title': 'Rivertonprisen til Karin Fossum',
+ 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
+ },
+ 'playlist_count': 5,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('nrk:%s' % video_id, 'NRK')
+ for video_id in re.findall(
+ r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
+ webpage)
+ ]
+
+ playlist_title = self._og_search_title(webpage)
+ playlist_description = self._og_search_description(webpage)
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+
class NRKTVIE(InfoExtractor):
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
@@ -149,9 +192,6 @@ class NRKTVIE(InfoExtractor):
}
]
- def _seconds2str(self, s):
- return '%02d:%02d:%02d.%03d' % (s / 3600, (s % 3600) / 60, s % 60, (s % 1) * 1000)
-
def _debug_print(self, txt):
if self._downloader.params.get('verbose', False):
self.to_screen('[debug] %s' % txt)
@@ -168,8 +208,8 @@ class NRKTVIE(InfoExtractor):
for pos, p in enumerate(ps):
begin = parse_duration(p.get('begin'))
duration = parse_duration(p.get('dur'))
- starttime = self._seconds2str(begin)
- endtime = self._seconds2str(begin + duration)
+ starttime = self._subtitles_timecode(begin)
+ endtime = self._subtitles_timecode(begin + duration)
srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
return {lang: [
{'ext': 'ttml', 'url': url},
diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py
index 56e1cad3b..03f0a4de6 100644
--- a/youtube_dl/extractor/nytimes.py
+++ b/youtube_dl/extractor/nytimes.py
@@ -1,15 +1,17 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..utils import parse_iso8601
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+)
class NYTimesIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
'md5': '18a525a510f942ada2720db5f31644c0',
'info_dict': {
@@ -22,18 +24,21 @@ class NYTimesIE(InfoExtractor):
'uploader': 'Brett Weiner',
'duration': 419,
}
- }
+ }, {
+ 'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
video_data = self._download_json(
- 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
+ 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
+ video_id, 'Downloading video JSON')
title = video_data['headline']
- description = video_data['summary']
- duration = video_data['duration'] / 1000.0
+ description = video_data.get('summary')
+ duration = float_or_none(video_data.get('duration'), 1000)
uploader = video_data['byline']
timestamp = parse_iso8601(video_data['publication_date'][:-8])
@@ -49,11 +54,11 @@ class NYTimesIE(InfoExtractor):
formats = [
{
'url': video['url'],
- 'format_id': video['type'],
- 'vcodec': video['video_codec'],
- 'width': video['width'],
- 'height': video['height'],
- 'filesize': get_file_size(video['fileSize']),
+ 'format_id': video.get('type'),
+ 'vcodec': video.get('video_codec'),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ 'filesize': get_file_size(video.get('fileSize')),
} for video in video_data['renditions']
]
self._sort_formats(formats)
@@ -61,7 +66,8 @@ class NYTimesIE(InfoExtractor):
thumbnails = [
{
'url': 'http://www.nytimes.com/%s' % image['url'],
- 'resolution': '%dx%d' % (image['width'], image['height']),
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
} for image in video_data['images']
]
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index 4e293392b..ca1a5bb3c 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -11,6 +11,11 @@ from ..utils import (
HEADRequest,
unified_strdate,
ExtractorError,
+ strip_jsonp,
+ int_or_none,
+ float_or_none,
+ determine_ext,
+ remove_end,
)
@@ -197,3 +202,92 @@ class ORFFM4IE(InfoExtractor):
'description': data['subtitle'],
'entries': entries
}
+
+
+class ORFIPTVIE(InfoExtractor):
+ IE_NAME = 'orf:iptv'
+ IE_DESC = 'iptv.ORF.at'
+ _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://iptv.orf.at/stories/2267952',
+ 'md5': '26ffa4bab6dbce1eee78bbc7021016cd',
+ 'info_dict': {
+ 'id': '339775',
+ 'ext': 'flv',
+ 'title': 'Kreml-Kritiker Nawalny wieder frei',
+ 'description': 'md5:6f24e7f546d364dacd0e616a9e409236',
+ 'duration': 84.729,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'upload_date': '20150306',
+ },
+ }
+
+ def _real_extract(self, url):
+ story_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://iptv.orf.at/stories/%s' % story_id, story_id)
+
+ video_id = self._search_regex(
+ r'data-video(?:id)?="(\d+)"', webpage, 'video id')
+
+ data = self._download_json(
+ 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
+ video_id)[0]
+
+ duration = float_or_none(data['duration'], 1000)
+
+ video = data['sources']['default']
+ load_balancer_url = video['loadBalancerUrl']
+ abr = int_or_none(video.get('audioBitrate'))
+ vbr = int_or_none(video.get('bitrate'))
+ fps = int_or_none(video.get('videoFps'))
+ width = int_or_none(video.get('videoWidth'))
+ height = int_or_none(video.get('videoHeight'))
+ thumbnail = video.get('preview')
+
+ rendition = self._download_json(
+ load_balancer_url, video_id, transform_source=strip_jsonp)
+
+ f = {
+ 'abr': abr,
+ 'vbr': vbr,
+ 'fps': fps,
+ 'width': width,
+ 'height': height,
+ }
+
+ formats = []
+ for format_id, format_url in rendition['redirect'].items():
+ if format_id == 'rtmp':
+ ff = f.copy()
+ ff.update({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ formats.append(ff)
+ elif determine_ext(format_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id))
+ elif determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id))
+ else:
+ continue
+ self._sort_formats(formats)
+
+ title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
+ description = self._og_search_description(webpage)
+ upload_date = unified_strdate(self._html_search_meta(
+ 'dc.date', webpage, 'upload date'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py
index a20672c0c..46cebc0d7 100644
--- a/youtube_dl/extractor/phoenix.py
+++ b/youtube_dl/extractor/phoenix.py
@@ -5,19 +5,33 @@ from .zdf import extract_from_xml_url
class PhoenixIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.phoenix.de/content/884301',
- 'md5': 'ed249f045256150c92e72dbb70eadec6',
- 'info_dict': {
- 'id': '884301',
- 'ext': 'mp4',
- 'title': 'Michael Krons mit Hans-Werner Sinn',
- 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
- 'upload_date': '20141025',
- 'uploader': 'Im Dialog',
- }
- }
+ _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
+ (?:
+ phoenix/die_sendungen/(?:[^/]+/)?
+ )?
+ (?P<id>[0-9]+)'''
+ _TESTS = [
+ {
+ 'url': 'http://www.phoenix.de/content/884301',
+ 'md5': 'ed249f045256150c92e72dbb70eadec6',
+ 'info_dict': {
+ 'id': '884301',
+ 'ext': 'mp4',
+ 'title': 'Michael Krons mit Hans-Werner Sinn',
+ 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
+ 'upload_date': '20141025',
+ 'uploader': 'Im Dialog',
+ }
+ },
+ {
+ 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
+ 'only_matching': True,
+ },
+ ]
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py
new file mode 100644
index 000000000..abde34b94
--- /dev/null
+++ b/youtube_dl/extractor/pladform.py
@@ -0,0 +1,90 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ xpath_text,
+ qualities,
+)
+
+
+class PladformIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:
+ out\.pladform\.ru/player|
+ static\.pladform\.ru/player\.swf
+ )
+ \?.*\bvideoid=|
+ video\.pladform\.ru/catalog/video/videoid/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ # http://muz-tv.ru/kinozal/view/7400/
+ 'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
+ 'md5': '61f37b575dd27f1bb2e1854777fe31f4',
+ 'info_dict': {
+ 'id': '100183293',
+ 'ext': 'mp4',
+ 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
+ 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'duration': 694,
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_xml(
+ 'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
+ video_id)
+
+ if video.tag == 'error':
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, video.text),
+ expected=True)
+
+ quality = qualities(('ld', 'sd', 'hd'))
+
+ formats = [{
+ 'url': src.text,
+ 'format_id': src.get('quality'),
+ 'quality': quality(src.get('quality')),
+ } for src in video.findall('./src')]
+ self._sort_formats(formats)
+
+ webpage = self._download_webpage(
+ 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
+ video_id)
+
+ title = self._og_search_title(webpage, fatal=False) or xpath_text(
+ video, './/title', 'title', fatal=True)
+ description = self._search_regex(
+ r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
+ video, './/cover', 'cover')
+
+ duration = int_or_none(xpath_text(video, './/time', 'duration'))
+ age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py
index 9576aed0e..e766ccca3 100644
--- a/youtube_dl/extractor/playfm.py
+++ b/youtube_dl/extractor/playfm.py
@@ -4,85 +4,72 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
- compat_urllib_request,
-)
+from ..compat import compat_str
from ..utils import (
ExtractorError,
- float_or_none,
int_or_none,
- str_to_int,
+ parse_iso8601,
)
class PlayFMIE(InfoExtractor):
IE_NAME = 'play.fm'
- _VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
+ _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])'
_TEST = {
- 'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
+ 'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12',
'md5': 'c505f8307825a245d0c7ad1850001f22',
'info_dict': {
- 'id': '137220',
+ 'id': '71276',
'ext': 'mp3',
- 'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
- 'uploader': 'Sven Tasnadi',
- 'uploader_id': 'sventasnadi',
- 'duration': 5627.428,
- 'upload_date': '20140712',
+ 'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
+ 'description': '',
+ 'duration': 5627,
+ 'timestamp': 1406033781,
+ 'upload_date': '20140722',
+ 'uploader': 'Dan Drastic',
+ 'uploader_id': '71170',
'view_count': int,
'comment_count': int,
- 'thumbnail': 're:^https?://.*\.jpg$',
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- upload_date = mobj.group('upload_date')
-
- rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
- req = compat_urllib_request.Request(
- 'http://www.play.fm/flexRead/recording', data=rec_data)
- req.add_header('Content-Type', 'application/x-www-form-urlencoded')
- rec_doc = self._download_xml(req, video_id)
+ slug = mobj.group('slug')
- error_node = rec_doc.find('./error')
- if error_node is not None:
- raise ExtractorError('An error occured: %s (code %s)' % (
- error_node.text, rec_doc.find('./status').text))
+ recordings = self._download_json(
+ 'http://v2api.play.fm/recordings/slug/%s' % slug, video_id)
- recording = rec_doc.find('./recording')
- title = recording.find('./title').text
- view_count = str_to_int(recording.find('./stats/playcount').text)
- comment_count = str_to_int(recording.find('./stats/comments').text)
- duration = float_or_none(recording.find('./duration').text, scale=1000)
- thumbnail = recording.find('./image').text
+ error = recordings.get('error')
+ if isinstance(error, dict):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error.get('message')),
+ expected=True)
- artist = recording.find('./artists/artist')
- uploader = artist.find('./name').text
- uploader_id = artist.find('./slug').text
-
- video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
- 'http:', recording.find('./url').text,
- recording.find('./_class').text, recording.find('./file_id').text,
- rec_doc.find('./uuid').text, video_id,
- rec_doc.find('./jingle/file_id').text,
- 'http%3A%2F%2Fwww.play.fm%2Fplayer',
- )
+ audio_url = recordings['audio']
+ video_id = compat_str(recordings.get('id') or video_id)
+ title = recordings['title']
+ description = recordings.get('description')
+ duration = int_or_none(recordings.get('recordingDuration'))
+ timestamp = parse_iso8601(recordings.get('created_at'))
+ uploader = recordings.get('page', {}).get('title')
+ uploader_id = compat_str(recordings.get('page', {}).get('id'))
+ view_count = int_or_none(recordings.get('playCount'))
+ comment_count = int_or_none(recordings.get('commentCount'))
+ categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')]
return {
'id': video_id,
- 'url': video_url,
- 'ext': 'mp3',
- 'filesize': int_or_none(recording.find('./size').text),
+ 'url': audio_url,
'title': title,
- 'upload_date': upload_date,
- 'view_count': view_count,
- 'comment_count': comment_count,
+ 'description': description,
'duration': duration,
- 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
'uploader': uploader,
'uploader_id': uploader_id,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
}
diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py
new file mode 100644
index 000000000..bdc71017b
--- /dev/null
+++ b/youtube_dl/extractor/playwire.py
@@ -0,0 +1,78 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ xpath_text,
+ float_or_none,
+ int_or_none,
+)
+
+
+class PlaywireIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json',
+ 'md5': 'e6398701e3595888125729eaa2329ed9',
+ 'info_dict': {
+ 'id': '3353705',
+ 'ext': 'mp4',
+ 'title': 'S04_RM_UCL_Rus',
+ 'thumbnail': 're:^http://.*\.png$',
+ 'duration': 145.94,
+ },
+ }, {
+ 'url': 'http://cdn.playwire.com/11625/embed/85228.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id')
+
+ player = self._download_json(
+ 'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id),
+ video_id)
+
+ title = player['settings']['title']
+ duration = float_or_none(player.get('duration'), 1000)
+
+ content = player['content']
+ thumbnail = content.get('poster')
+ src = content['media']['f4m']
+
+ f4m = self._download_xml(src, video_id)
+ base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True)
+ formats = []
+ for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'):
+ media_url = media.get('url')
+ if not media_url:
+ continue
+ tbr = int_or_none(media.get('bitrate'))
+ width = int_or_none(media.get('width'))
+ height = int_or_none(media.get('height'))
+ f = {
+ 'url': '%s/%s' % (base_url, media.attrib['url']),
+ 'tbr': tbr,
+ 'width': width,
+ 'height': height,
+ }
+ if not (tbr or width or height):
+ f['quality'] = 1 if '-hd.' in media_url else 0
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 3a27e3789..0c8b731cf 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor):
}
def _extract_count(self, pattern, webpage, name):
- count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
- if count:
- count = str_to_int(count)
- return count
+ return str_to_int(self._search_regex(
+ pattern, webpage, '%s count' % name, fatal=False))
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor):
if thumbnail:
thumbnail = compat_urllib_parse.unquote(thumbnail)
- view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
- like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
- dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
+ view_count = self._extract_count(
+ r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+ like_count = self._extract_count(
+ r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
+ dislike_count = self._extract_count(
+ r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
comment_count = self._extract_count(
- r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
+ r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
if webpage.find('"encrypted":true') != -1:
diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py
new file mode 100644
index 000000000..01cc3d9ea
--- /dev/null
+++ b/youtube_dl/extractor/primesharetv.py
@@ -0,0 +1,69 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse,
+ compat_urllib_request,
+)
+from ..utils import ExtractorError
+
+
+class PrimeShareTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
+
+ _TEST = {
+ 'url': 'http://primeshare.tv/download/238790B611',
+ 'md5': 'b92d9bf5461137c36228009f31533fbc',
+ 'info_dict': {
+ 'id': '238790B611',
+ 'ext': 'mp4',
+ 'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ if '>File not exist<' in webpage:
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ fields = dict(re.findall(r'''(?x)<input\s+
+ type="hidden"\s+
+ name="([^"]+)"\s+
+ (?:id="[^"]+"\s+)?
+ value="([^"]*)"
+ ''', webpage))
+
+ headers = {
+ 'Referer': url,
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ }
+
+ wait_time = int(self._search_regex(
+ r'var\s+cWaitTime\s*=\s*(\d+)',
+ webpage, 'wait time', default=7)) + 1
+ self._sleep(wait_time, video_id)
+
+ req = compat_urllib_request.Request(
+ url, compat_urllib_parse.urlencode(fields), headers)
+ video_page = self._download_webpage(
+ req, video_id, 'Downloading video page')
+
+ video_url = self._search_regex(
+ r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
+ video_page, 'video url')
+
+ title = self._html_search_regex(
+ r'<h1>Watch\s*(?:&nbsp;)?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?:&nbsp;)?\s*<strong>',
+ video_page, 'title')
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'ext': 'mp4',
+ }
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py
index 846b76c81..d6054d717 100644
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -1,17 +1,19 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import ExtractorError
class RedTubeIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.redtube.com/66418',
+ 'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
'info_dict': {
'id': '66418',
'ext': 'mp4',
- "title": "Sucked on a toilet",
- "age_limit": 18,
+ 'title': 'Sucked on a toilet',
+ 'age_limit': 18,
}
}
@@ -19,6 +21,9 @@ class RedTubeIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
+ raise ExtractorError('Video %s has been removed' % video_id, expected=True)
+
video_url = self._html_search_regex(
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
video_title = self._html_search_regex(
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py
index b42442d12..13f071077 100644
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@@ -127,6 +127,47 @@ class RTVEALaCartaIE(InfoExtractor):
for s in subs)
+class RTVEInfantilIE(InfoExtractor):
+ IE_NAME = 'rtve.es:infantil'
+ IE_DESC = 'RTVE infantil'
+ _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
+
+ _TESTS = [{
+ 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
+ 'md5': '915319587b33720b8e0357caaa6617e6',
+ 'info_dict': {
+ 'id': '3040283',
+ 'ext': 'mp4',
+ 'title': 'Maneras de vivir',
+ 'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
+ 'duration': 357.958,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ info = self._download_json(
+ 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
+ video_id)['page']['items'][0]
+
+ webpage = self._download_webpage(url, video_id)
+ vidplayer_id = self._search_regex(
+ r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
+
+ png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
+ png = self._download_webpage(png_url, video_id, 'Downloading url information')
+ video_url = _decrypt_url(png)
+
+ return {
+ 'id': video_id,
+ 'ext': 'mp4',
+ 'title': info['title'],
+ 'url': video_url,
+ 'thumbnail': info.get('image'),
+ 'duration': float_or_none(info.get('duration'), scale=1000),
+ }
+
+
class RTVELiveIE(InfoExtractor):
IE_NAME = 'rtve.es:live'
IE_DESC = 'RTVE.es live streams'
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
new file mode 100644
index 000000000..10251f29e
--- /dev/null
+++ b/youtube_dl/extractor/safari.py
@@ -0,0 +1,157 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .brightcove import BrightcoveIE
+
+from ..compat import (
+ compat_urllib_parse,
+ compat_urllib_request,
+)
+from ..utils import (
+ ExtractorError,
+ smuggle_url,
+ std_headers,
+)
+
+
+class SafariBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
+ _SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
+ _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
+ _NETRC_MACHINE = 'safari'
+
+ _API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
+ _API_FORMAT = 'json'
+
+ LOGGED_IN = False
+
+ def _real_initialize(self):
+ # We only need to log in once for courses or individual videos
+ if not self.LOGGED_IN:
+ self._login()
+ SafariBaseIE.LOGGED_IN = True
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ raise ExtractorError(
+ self._ACCOUNT_CREDENTIALS_HINT,
+ expected=True)
+
+ headers = std_headers
+ if 'Referer' not in headers:
+ headers['Referer'] = self._LOGIN_URL
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None,
+ 'Downloading login form')
+
+ csrf = self._html_search_regex(
+ r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
+ login_page, 'csrf token')
+
+ login_form = {
+ 'csrfmiddlewaretoken': csrf,
+ 'email': username,
+ 'password1': password,
+ 'login': 'Sign In',
+ 'next': '',
+ }
+
+ request = compat_urllib_request.Request(
+ self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
+ login_page = self._download_webpage(
+ request, None, 'Logging in as %s' % username)
+
+ if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
+ raise ExtractorError(
+ 'Login failed; make sure your credentials are correct and try again.',
+ expected=True)
+
+ self.to_screen('Login successful')
+
+
+class SafariIE(SafariBaseIE):
+ IE_NAME = 'safari'
+ IE_DESC = 'safaribooksonline.com online video'
+ _VALID_URL = r'''(?x)https?://
+ (?:www\.)?safaribooksonline\.com/
+ (?:
+ library/view/[^/]+|
+ api/v1/book
+ )/
+ (?P<course_id>\d+)/
+ (?:chapter(?:-content)?/)?
+ (?P<part>part\d+)\.html
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
+ 'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
+ 'info_dict': {
+ 'id': '2842601850001',
+ 'ext': 'mp4',
+ 'title': 'Introduction',
+ },
+ 'skip': 'Requires safaribooksonline account credentials',
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ course_id = mobj.group('course_id')
+ part = mobj.group('part')
+
+ webpage = self._download_webpage(
+ '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
+ part)
+
+ bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+ if not bc_url:
+ raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
+
+ return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove')
+
+
+class SafariCourseIE(SafariBaseIE):
+ IE_NAME = 'safari:course'
+ IE_DESC = 'safaribooksonline.com online courses'
+
+ _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+ 'info_dict': {
+ 'id': '9780133392838',
+ 'title': 'Hadoop Fundamentals LiveLessons',
+ },
+ 'playlist_count': 22,
+ 'skip': 'Requires safaribooksonline account credentials',
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ course_json = self._download_json(
+ '%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
+ course_id, 'Downloading course JSON')
+
+ if 'chapters' not in course_json:
+ raise ExtractorError(
+ 'No chapters found for course %s' % course_id, expected=True)
+
+ entries = [
+ self.url_result(chapter, 'Safari')
+ for chapter in course_json['chapters']]
+
+ course_title = course_json['title']
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py
index 9f79ff5c1..0b717a1e4 100644
--- a/youtube_dl/extractor/slideshare.py
+++ b/youtube_dl/extractor/slideshare.py
@@ -30,7 +30,7 @@ class SlideshareIE(InfoExtractor):
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
slideshare_obj = self._search_regex(
- r'var\s+slideshare_object\s*=\s*({.*?});\s*var\s+user_info\s*=',
+ r'\$\.extend\(slideshare_object,\s*(\{.*?\})\);',
webpage, 'slideshare object')
info = json.loads(slideshare_obj)
if info['slideshow']['type'] != 'video':
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index c04791997..11edf616a 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -4,22 +4,87 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from .common import compat_str
+from ..compat import (
+ compat_str,
+ compat_urllib_request
+)
+from ..utils import sanitize_url_path_consecutive_slashes
class SohuIE(InfoExtractor):
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
- _TEST = {
+ _TESTS = [{
+ 'note': 'This video is available only in Mainland China',
'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
- 'md5': 'bde8d9a6ffd82c63a1eefaef4eeefec7',
+ 'md5': '29175c8cadd8b5cc4055001e85d6b372',
'info_dict': {
'id': '382479172',
'ext': 'mp4',
'title': 'MV:Far East Movement《The Illest》',
},
- 'skip': 'Only available from China',
- }
+ 'params': {
+ 'cn_verification_proxy': 'proxy.uku.im:8888'
+ }
+ }, {
+ 'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
+ 'md5': '699060e75cf58858dd47fb9c03c42cfb',
+ 'info_dict': {
+ 'id': '409385080',
+ 'ext': 'mp4',
+ 'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
+ }
+ }, {
+ 'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
+ 'md5': '9bf34be48f2f4dadcb226c74127e203c',
+ 'info_dict': {
+ 'id': '78693464',
+ 'ext': 'mp4',
+ 'title': '【爱范品】第31期:MWC见不到的奇葩手机',
+ }
+ }, {
+ 'note': 'Multipart video',
+ 'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml',
+ 'info_dict': {
+ 'id': '78910339',
+ },
+ 'playlist': [{
+ 'md5': 'bdbfb8f39924725e6589c146bc1883ad',
+ 'info_dict': {
+ 'id': '78910339_part1',
+ 'ext': 'mp4',
+ 'duration': 294,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }, {
+ 'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
+ 'info_dict': {
+ 'id': '78910339_part2',
+ 'ext': 'mp4',
+ 'duration': 300,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }, {
+ 'md5': '8407e634175fdac706766481b9443450',
+ 'info_dict': {
+ 'id': '78910339_part3',
+ 'ext': 'mp4',
+ 'duration': 150,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }]
+ }, {
+ 'note': 'Video with title containing dash',
+ 'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml',
+ 'info_dict': {
+ 'id': '78932792',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl testing video',
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }]
def _real_extract(self, url):
@@ -29,8 +94,14 @@ class SohuIE(InfoExtractor):
else:
base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+ req = compat_urllib_request.Request(base_data_url + vid_id)
+
+ cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
+ if cn_verification_proxy:
+ req.add_header('Ytdl-request-proxy', cn_verification_proxy)
+
return self._download_json(
- base_data_url + vid_id, video_id,
+ req, video_id,
'Downloading JSON data for %s' % vid_id)
mobj = re.match(self._VALID_URL, url)
@@ -38,10 +109,8 @@ class SohuIE(InfoExtractor):
mytv = mobj.group('mytv') is not None
webpage = self._download_webpage(url, video_id)
- raw_title = self._html_search_regex(
- r'(?s)<title>(.+?)</title>',
- webpage, 'video title')
- title = raw_title.partition('-')[0].strip()
+
+ title = self._og_search_title(webpage)
vid = self._html_search_regex(
r'var vid ?= ?["\'](\d+)["\']',
@@ -77,7 +146,9 @@ class SohuIE(InfoExtractor):
% (format_id, i + 1, part_count))
part_info = part_str.split('|')
- video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
+
+ video_url = sanitize_url_path_consecutive_slashes(
+ '%s%s?key=%s' % (part_info[0], su[i], part_info[3]))
formats.append({
'url': video_url,
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index c5284fa67..316b2c90f 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -180,7 +180,7 @@ class SoundcloudIE(InfoExtractor):
'format_id': key,
'url': url,
'play_path': 'mp3:' + path,
- 'ext': ext,
+ 'ext': 'flv',
'vcodec': 'none',
})
@@ -200,8 +200,9 @@ class SoundcloudIE(InfoExtractor):
if f['format_id'].startswith('rtmp'):
f['protocol'] = 'rtmp'
- self._sort_formats(formats)
- result['formats'] = formats
+ self._check_formats(formats, track_id)
+ self._sort_formats(formats)
+ result['formats'] = formats
return result
@@ -241,7 +242,7 @@ class SoundcloudIE(InfoExtractor):
class SoundcloudSetIE(SoundcloudIE):
- _VALID_URL = r'https?://(?:www\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
IE_NAME = 'soundcloud:set'
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
@@ -286,7 +287,7 @@ class SoundcloudSetIE(SoundcloudIE):
class SoundcloudUserIE(SoundcloudIE):
- _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$'
IE_NAME = 'soundcloud:user'
_TESTS = [{
'url': 'https://soundcloud.com/the-concept-band',
diff --git a/youtube_dl/extractor/ssa.py b/youtube_dl/extractor/ssa.py
new file mode 100644
index 000000000..13101c714
--- /dev/null
+++ b/youtube_dl/extractor/ssa.py
@@ -0,0 +1,58 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ unescapeHTML,
+ parse_duration,
+)
+
+
+class SSAIE(InfoExtractor):
+ _VALID_URL = r'http://ssa\.nls\.uk/film/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://ssa.nls.uk/film/3561',
+ 'info_dict': {
+ 'id': '3561',
+ 'ext': 'flv',
+ 'title': 'SHETLAND WOOL',
+ 'description': 'md5:c5afca6871ad59b4271e7704fe50ab04',
+ 'duration': 900,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ streamer = self._search_regex(
+ r"'streamer'\s*,\S*'(rtmp[^']+)'", webpage, 'streamer')
+ play_path = self._search_regex(
+ r"'file'\s*,\s*'([^']+)'", webpage, 'file').rpartition('.')[0]
+
+ def search_field(field_name, fatal=False):
+ return self._search_regex(
+ r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name,
+ webpage, 'title', fatal=fatal)
+
+ title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]')
+ description = unescapeHTML(search_field('Description'))
+ duration = parse_duration(search_field('Running time'))
+ thumbnail = self._search_regex(
+ r"'image'\s*,\s*'([^']+)'", webpage, 'thumbnails', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'url': streamer,
+ 'play_path': play_path,
+ 'ext': 'flv',
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 5793dbc10..a46a7ecba 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -53,10 +53,10 @@ class TeamcocoIE(InfoExtractor):
embed = self._download_webpage(
embed_url, video_id, 'Downloading embed page')
- encoded_data = self._search_regex(
- r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data')
+ player_data = self._parse_json(self._search_regex(
+ r'Y\.Ginger\.Module\.Player(?:;var\s*player\s*=\s*new\s*m)?\((\{.*?\})\);', embed, 'player data'), video_id)
data = self._parse_json(
- base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id)
+ base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id)
formats = []
get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p'])
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index feac666f7..0e3e627f4 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -92,7 +92,7 @@ class ThePlatformIE(InfoExtractor):
error_msg = next(
n.attrib['abstract']
for n in meta.findall(_x('.//smil:ref'))
- if n.attrib.get('title') == 'Geographic Restriction')
+ if n.attrib.get('title') == 'Geographic Restriction' or n.attrib.get('title') == 'Expired')
except StopIteration:
pass
else:
diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py
index 9a53a3c74..e83e31a31 100644
--- a/youtube_dl/extractor/tvplay.py
+++ b/youtube_dl/extractor/tvplay.py
@@ -16,6 +16,7 @@ class TVPlayIE(InfoExtractor):
_VALID_URL = r'''(?x)http://(?:www\.)?
(?:tvplay\.lv/parraides|
tv3play\.lt/programos|
+ play\.tv3\.lt/programos|
tv3play\.ee/sisu|
tv3play\.se/program|
tv6play\.se/program|
@@ -45,7 +46,7 @@ class TVPlayIE(InfoExtractor):
},
},
{
- 'url': 'http://www.tv3play.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
+ 'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
'info_dict': {
'id': '409229',
'ext': 'flv',
diff --git a/youtube_dl/extractor/twentytwotracks.py b/youtube_dl/extractor/twentytwotracks.py
new file mode 100644
index 000000000..d6c0ab184
--- /dev/null
+++ b/youtube_dl/extractor/twentytwotracks.py
@@ -0,0 +1,86 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+# 22Tracks regularly replace the audio tracks that can be streamed on their
+# site. The tracks usually expire after 1 months, so we can't add tests.
+
+
+class TwentyTwoTracksIE(InfoExtractor):
+ _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/(?P<id>\d+)'
+ IE_NAME = '22tracks:track'
+
+ _API_BASE = 'http://22tracks.com/api'
+
+ def _extract_info(self, city, genre_name, track_id=None):
+ item_id = track_id if track_id else genre_name
+
+ cities = self._download_json(
+ '%s/cities' % self._API_BASE, item_id,
+ 'Downloading cities info',
+ 'Unable to download cities info')
+ city_id = [x['id'] for x in cities if x['slug'] == city][0]
+
+ genres = self._download_json(
+ '%s/genres/%s' % (self._API_BASE, city_id), item_id,
+ 'Downloading %s genres info' % city,
+ 'Unable to download %s genres info' % city)
+ genre = [x for x in genres if x['slug'] == genre_name][0]
+ genre_id = genre['id']
+
+ tracks = self._download_json(
+ '%s/tracks/%s' % (self._API_BASE, genre_id), item_id,
+ 'Downloading %s genre tracks info' % genre_name,
+ 'Unable to download track info')
+
+ return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks]
+
+ def _get_track_url(self, filename, track_id):
+ token = self._download_json(
+ 'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename,
+ track_id, 'Downloading token', 'Unable to download token')
+ return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e'])
+
+ def _extract_track_info(self, track_info, track_id):
+ download_url = self._get_track_url(track_info['filename'], track_id)
+ title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip())
+ return {
+ 'id': track_id,
+ 'url': download_url,
+ 'ext': 'mp3',
+ 'title': title,
+ 'duration': int_or_none(track_info.get('duration')),
+ 'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created'))
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ city = mobj.group('city')
+ genre = mobj.group('genre')
+ track_id = mobj.group('id')
+
+ track_info = self._extract_info(city, genre, track_id)
+ return self._extract_track_info(track_info, track_id)
+
+
+class TwentyTwoTracksGenreIE(TwentyTwoTracksIE):
+ _VALID_URL = r'https?://22tracks\.com/(?P<city>[a-z]+)/(?P<genre>[\da-z]+)/?$'
+ IE_NAME = '22tracks:genre'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ city = mobj.group('city')
+ genre = mobj.group('genre')
+
+ genre_title, tracks = self._extract_info(city, genre)
+
+ entries = [
+ self._extract_track_info(track_info, track_info['id'])
+ for track_info in tracks]
+
+ return self.playlist_result(entries, genre, genre_title)
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index 4b0ce54df..94bd6345d 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -23,6 +23,8 @@ class TwitchBaseIE(InfoExtractor):
_API_BASE = 'https://api.twitch.tv'
_USHER_BASE = 'http://usher.twitch.tv'
_LOGIN_URL = 'https://secure.twitch.tv/user/login'
+ _LOGIN_POST_URL = 'https://secure-login.twitch.tv/login'
+ _NETRC_MACHINE = 'twitch'
def _handle_error(self, response):
if not isinstance(response, dict):
@@ -66,14 +68,14 @@ class TwitchBaseIE(InfoExtractor):
'authenticity_token': authenticity_token,
'redirect_on_login': '',
'embed_form': 'false',
- 'mp_source_action': '',
+ 'mp_source_action': 'login-button',
'follow': '',
- 'user[login]': username,
- 'user[password]': password,
+ 'login': username,
+ 'password': password,
}
request = compat_urllib_request.Request(
- self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
+ self._LOGIN_POST_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
request.add_header('Referer', self._LOGIN_URL)
response = self._download_webpage(
request, None, 'Logging in as %s' % username)
@@ -84,6 +86,14 @@ class TwitchBaseIE(InfoExtractor):
raise ExtractorError(
'Unable to login: %s' % m.group('msg').strip(), expected=True)
+ def _prefer_source(self, formats):
+ try:
+ source = next(f for f in formats if f['format_id'] == 'Source')
+ source['preference'] = 10
+ except StopIteration:
+ pass # No Source stream present
+ self._sort_formats(formats)
+
class TwitchItemBaseIE(TwitchBaseIE):
def _download_info(self, item, item_id):
@@ -139,7 +149,7 @@ class TwitchItemBaseIE(TwitchBaseIE):
class TwitchVideoIE(TwitchItemBaseIE):
IE_NAME = 'twitch:video'
- _VALID_URL = r'%s/[^/]+/b/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
_ITEM_TYPE = 'video'
_ITEM_SHORTCUT = 'a'
@@ -155,7 +165,7 @@ class TwitchVideoIE(TwitchItemBaseIE):
class TwitchChapterIE(TwitchItemBaseIE):
IE_NAME = 'twitch:chapter'
- _VALID_URL = r'%s/[^/]+/c/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
_ITEM_TYPE = 'chapter'
_ITEM_SHORTCUT = 'c'
@@ -174,7 +184,7 @@ class TwitchChapterIE(TwitchItemBaseIE):
class TwitchVodIE(TwitchItemBaseIE):
IE_NAME = 'twitch:vod'
- _VALID_URL = r'%s/[^/]+/v/(?P<id>[^/]+)' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL = r'%s/[^/]+/v/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
_ITEM_TYPE = 'vod'
_ITEM_SHORTCUT = 'v'
@@ -208,6 +218,7 @@ class TwitchVodIE(TwitchItemBaseIE):
'%s/vod/%s?nauth=%s&nauthsig=%s'
% (self._USHER_BASE, item_id, access_token['token'], access_token['sig']),
item_id, 'mp4')
+ self._prefer_source(formats)
info['formats'] = formats
return info
@@ -348,21 +359,14 @@ class TwitchStreamIE(TwitchBaseIE):
'p': random.randint(1000000, 10000000),
'player': 'twitchweb',
'segment_preference': '4',
- 'sig': access_token['sig'],
- 'token': access_token['token'],
+ 'sig': access_token['sig'].encode('utf-8'),
+ 'token': access_token['token'].encode('utf-8'),
}
-
formats = self._extract_m3u8_formats(
'%s/api/channel/hls/%s.m3u8?%s'
- % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')),
+ % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query)),
channel_id, 'mp4')
-
- # prefer the 'source' stream, the others are limited to 30 fps
- def _sort_source(f):
- if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source':
- return 1
- return 0
- formats = sorted(formats, key=_sort_source)
+ self._prefer_source(formats)
view_count = stream.get('viewers')
timestamp = parse_iso8601(stream.get('created_at'))
diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py
new file mode 100644
index 000000000..96c809eaf
--- /dev/null
+++ b/youtube_dl/extractor/ultimedia.py
@@ -0,0 +1,103 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ qualities,
+ unified_strdate,
+ clean_html,
+)
+
+
+class UltimediaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ultimedia\.com/default/index/video[^/]+/id/(?P<id>[\d+a-z]+)'
+ _TESTS = [{
+ # news
+ 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
+ 'md5': '276a0e49de58c7e85d32b057837952a2',
+ 'info_dict': {
+ 'id': 's8uk0r',
+ 'ext': 'mp4',
+ 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
+ 'description': 'md5:3e5c8fd65791487333dda5db8aed32af',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'upload_date': '20150317',
+ },
+ }, {
+ # music
+ 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
+ 'md5': '2ea3513813cf230605c7e2ffe7eca61c',
+ 'info_dict': {
+ 'id': 'xvpfp8',
+ 'ext': 'mp4',
+ 'title': "Two - C'est la vie (Clip)",
+ 'description': 'Two',
+ 'thumbnail': 're:^https?://.*\.jpg',
+ 'upload_date': '20150224',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ deliver_url = self._search_regex(
+ r'<iframe[^>]+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"',
+ webpage, 'deliver URL')
+
+ deliver_page = self._download_webpage(
+ deliver_url, video_id, 'Downloading iframe page')
+
+ if '>This video is currently not available' in deliver_page:
+ raise ExtractorError(
+ 'Video %s is currently not available' % video_id, expected=True)
+
+ player = self._parse_json(
+ self._search_regex(
+ r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'),
+ video_id)
+
+ quality = qualities(['flash', 'html5'])
+ formats = []
+ for mode in player['modes']:
+ video_url = mode.get('config', {}).get('file')
+ if not video_url:
+ continue
+ if re.match(r'https?://www\.youtube\.com/.+?', video_url):
+ return self.url_result(video_url, 'Youtube')
+ formats.append({
+ 'url': video_url,
+ 'format_id': mode.get('type'),
+ 'quality': quality(mode.get('type')),
+ })
+ self._sort_formats(formats)
+
+ thumbnail = player.get('image')
+
+ title = clean_html((
+ self._html_search_regex(
+ r'(?s)<div\s+id="catArticle">.+?</div>(.+?)</h1>',
+ webpage, 'title', default=None) or
+ self._search_regex(
+ r"var\s+nameVideo\s*=\s*'([^']+)'",
+ deliver_page, 'title')))
+
+ description = clean_html(self._html_search_regex(
+ r'(?s)<span>Description</span>(.+?)</p>', webpage,
+ 'description', fatal=False))
+
+ upload_date = unified_strdate(self._search_regex(
+ r'Ajouté le\s*<span>([^<]+)', webpage,
+ 'upload date', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dl/extractor/varzesh3.py
new file mode 100644
index 000000000..9369abaf8
--- /dev/null
+++ b/youtube_dl/extractor/varzesh3.py
@@ -0,0 +1,45 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class Varzesh3IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?video\.varzesh3\.com/(?:[^/]+/)+(?P<id>[^/]+)/?'
+ _TEST = {
+ 'url': 'http://video.varzesh3.com/germany/bundesliga/5-%D9%88%D8%A7%DA%A9%D9%86%D8%B4-%D8%A8%D8%B1%D8%AA%D8%B1-%D8%AF%D8%B1%D9%88%D8%A7%D8%B2%D9%87%E2%80%8C%D8%A8%D8%A7%D9%86%D8%A7%D9%86%D8%9B%D9%87%D9%81%D8%AA%D9%87-26-%D8%A8%D9%88%D9%86%D8%AF%D8%B3/',
+ 'md5': '2a933874cb7dce4366075281eb49e855',
+ 'info_dict': {
+ 'id': '76337',
+ 'ext': 'mp4',
+ 'title': '۵ واکنش برتر دروازه‌بانان؛هفته ۲۶ بوندسلیگا',
+ 'description': 'فصل ۲۰۱۵-۲۰۱۴',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._search_regex(
+ r'<source[^>]+src="([^"]+)"', webpage, 'video url')
+
+ title = self._og_search_title(webpage)
+ description = self._html_search_regex(
+ r'(?s)<div class="matn">(.+?)</div>',
+ webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ video_id = self._search_regex(
+ r"<link[^>]+rel='(?:canonical|shortlink)'[^>]+href='/\?p=([^']+)'",
+ webpage, display_id, default=display_id)
+
+ return {
+ 'url': video_url,
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py
new file mode 100644
index 000000000..6215f0642
--- /dev/null
+++ b/youtube_dl/extractor/vessel.py
@@ -0,0 +1,127 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+from ..utils import (
+ ExtractorError,
+ parse_iso8601,
+)
+
+
+class VesselIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vessel\.com/videos/(?P<id>[0-9a-zA-Z]+)'
+ _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
+ _LOGIN_URL = 'https://www.vessel.com/api/account/login'
+ _NETRC_MACHINE = 'vessel'
+ _TEST = {
+ 'url': 'https://www.vessel.com/videos/HDN7G5UMs',
+ 'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
+ 'info_dict': {
+ 'id': 'HDN7G5UMs',
+ 'ext': 'mp4',
+ 'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'upload_date': '20150317',
+ 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
+ 'timestamp': int,
+ },
+ }
+
+ @staticmethod
+ def make_json_request(url, data):
+ payload = json.dumps(data).encode('utf-8')
+ req = compat_urllib_request.Request(url, payload)
+ req.add_header('Content-Type', 'application/json; charset=utf-8')
+ return req
+
+ @staticmethod
+ def find_assets(data, asset_type):
+ for asset in data.get('assets', []):
+ if asset.get('type') == asset_type:
+ yield asset
+
+ def _check_access_rights(self, data):
+ access_info = data.get('__view', {})
+ if not access_info.get('allow_access', True):
+ err_code = access_info.get('error_code') or ''
+ if err_code == 'ITEM_PAID_ONLY':
+ raise ExtractorError(
+ 'This video requires subscription.', expected=True)
+ else:
+ raise ExtractorError(
+ 'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
+
+ def _login(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ return
+ self.report_login()
+ data = {
+ 'client_id': 'web',
+ 'type': 'password',
+ 'user_key': username,
+ 'password': password,
+ }
+ login_request = VesselIE.make_json_request(self._LOGIN_URL, data)
+ self._download_webpage(login_request, None, False, 'Wrong login info')
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ data = self._parse_json(self._search_regex(
+ r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id)
+ asset_id = data['model']['data']['id']
+
+ req = VesselIE.make_json_request(
+ self._API_URL_TEMPLATE % asset_id, {'client': 'web'})
+ data = self._download_json(req, video_id)
+
+ self._check_access_rights(data)
+
+ try:
+ video_asset = next(VesselIE.find_assets(data, 'video'))
+ except StopIteration:
+ raise ExtractorError('No video assets found')
+
+ formats = []
+ for f in video_asset.get('sources', []):
+ if f['name'] == 'hls-index':
+ formats.extend(self._extract_m3u8_formats(
+ f['location'], video_id, ext='mp4', m3u8_id='m3u8'))
+ else:
+ formats.append({
+ 'format_id': f['name'],
+ 'tbr': f.get('bitrate'),
+ 'height': f.get('height'),
+ 'width': f.get('width'),
+ 'url': f['location'],
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for im_asset in VesselIE.find_assets(data, 'image'):
+ thumbnails.append({
+ 'url': im_asset['location'],
+ 'width': im_asset.get('width', 0),
+ 'height': im_asset.get('height', 0),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': data['title'],
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': data.get('short_description'),
+ 'duration': data.get('duration'),
+ 'comment_count': data.get('comment_count'),
+ 'like_count': data.get('like_count'),
+ 'view_count': data.get('view_count'),
+ 'timestamp': parse_iso8601(data.get('released_at')),
+ }
diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py
index 273030316..eb309a7cd 100644
--- a/youtube_dl/extractor/videomega.py
+++ b/youtube_dl/extractor/videomega.py
@@ -4,28 +4,21 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
- compat_urllib_request,
-)
-from ..utils import (
- ExtractorError,
- remove_start,
-)
+from ..compat import compat_urllib_request
class VideoMegaIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://
(?:www\.)?videomega\.tv/
- (?:iframe\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
+ (?:iframe\.php|cdn\.php)?\?ref=(?P<id>[A-Za-z0-9]+)
'''
_TEST = {
- 'url': 'http://videomega.tv/?ref=QR0HCUHI1661IHUCH0RQ',
+ 'url': 'http://videomega.tv/?ref=4GNA688SU99US886ANG4',
'md5': 'bf5c2f95c4c917536e80936af7bc51e1',
'info_dict': {
- 'id': 'QR0HCUHI1661IHUCH0RQ',
+ 'id': '4GNA688SU99US886ANG4',
'ext': 'mp4',
- 'title': 'Big Buck Bunny',
+ 'title': 'BigBuckBunny_320x180',
'thumbnail': 're:^https?://.*\.jpg$',
}
}
@@ -33,34 +26,24 @@ class VideoMegaIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- iframe_url = 'http://videomega.tv/iframe.php?ref={0:}'.format(video_id)
+ iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
req = compat_urllib_request.Request(iframe_url)
req.add_header('Referer', url)
webpage = self._download_webpage(req, video_id)
- try:
- escaped_data = re.findall(r'unescape\("([^"]+)"\)', webpage)[-1]
- except IndexError:
- raise ExtractorError('Unable to extract escaped data')
-
- playlist = compat_urllib_parse.unquote(escaped_data)
-
+ title = self._html_search_regex(
+ r'<title>(.*?)</title>', webpage, 'title')
+ title = re.sub(
+ r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s?|\s?-\svideomega\.tv$)', '', title)
thumbnail = self._search_regex(
- r'image:\s*"([^"]+)"', playlist, 'thumbnail', fatal=False)
- video_url = self._search_regex(r'file:\s*"([^"]+)"', playlist, 'URL')
- title = remove_start(self._html_search_regex(
- r'<title>(.*?)</title>', webpage, 'title'), 'VideoMega.tv - ')
-
- formats = [{
- 'format_id': 'sd',
- 'url': video_url,
- }]
- self._sort_formats(formats)
+ r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
+ video_url = self._search_regex(
+ r'<source[^>]+?src="([^"]+)"', webpage, 'video URL')
return {
'id': video_id,
'title': title,
- 'formats': formats,
+ 'url': video_url,
'thumbnail': thumbnail,
'http_headers': {
'Referer': iframe_url,
diff --git a/youtube_dl/extractor/vidme.py b/youtube_dl/extractor/vidme.py
index 5c89824c1..bd953fb4c 100644
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dl/extractor/vidme.py
@@ -1,7 +1,5 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -28,12 +26,11 @@ class VidmeIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- video_url = self._html_search_regex(r'<source src="([^"]+)"', webpage, 'video URL')
+ video_url = self._html_search_regex(
+ r'<source src="([^"]+)"', webpage, 'video URL')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage, default='')
@@ -44,13 +41,10 @@ class VidmeIE(InfoExtractor):
duration = float_or_none(self._html_search_regex(
r'data-duration="([^"]+)"', webpage, 'duration', fatal=False))
view_count = str_to_int(self._html_search_regex(
- r'<span class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
+ r'<(?:li|span) class="video_views">\s*([\d,\.]+)\s*plays?', webpage, 'view count', fatal=False))
like_count = str_to_int(self._html_search_regex(
r'class="score js-video-vote-score"[^>]+data-score="([\d,\.\s]+)">',
webpage, 'like count', fatal=False))
- comment_count = str_to_int(self._html_search_regex(
- r'class="js-comment-count"[^>]+data-count="([\d,\.\s]+)">',
- webpage, 'comment count', fatal=False))
return {
'id': video_id,
@@ -64,5 +58,4 @@ class VidmeIE(InfoExtractor):
'duration': duration,
'view_count': view_count,
'like_count': like_count,
- 'comment_count': comment_count,
}
diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py
new file mode 100644
index 000000000..1742e66f4
--- /dev/null
+++ b/youtube_dl/extractor/viewster.py
@@ -0,0 +1,129 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_request
+
+
+class ViewsterIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?viewster\.com/movie/(?P<id>\d+-\d+-\d+)'
+ _TESTS = [{
+ # movielink, paymethod=fre
+ 'url': 'http://www.viewster.com/movie/1293-19341-000/hout-wood/',
+ 'playlist': [{
+ 'md5': '8f9d94b282d80c42b378dffdbb11caf3',
+ 'info_dict': {
+ 'id': '1293-19341-000-movie',
+ 'ext': 'flv',
+ 'title': "'Hout' (Wood) - Movie",
+ },
+ }],
+ 'info_dict': {
+ 'id': '1293-19341-000',
+ 'title': "'Hout' (Wood)",
+ 'description': 'md5:925733185a9242ef96f436937683f33b',
+ }
+ }, {
+ # movielink, paymethod=adv
+ 'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
+ 'playlist': [{
+ 'md5': '77a005453ca7396cbe3d35c9bea30aef',
+ 'info_dict': {
+ 'id': '1140-11855-000-movie',
+ 'ext': 'flv',
+ 'title': "THE LISTENING PROJECT - Movie",
+ },
+ }],
+ 'info_dict': {
+ 'id': '1140-11855-000',
+ 'title': "THE LISTENING PROJECT",
+ 'description': 'md5:714421ae9957e112e672551094bf3b08',
+ }
+ }, {
+ # direct links, no movielink
+ 'url': 'http://www.viewster.com/movie/1198-56411-000/sinister/',
+ 'playlist': [{
+ 'md5': '0307b7eac6bfb21ab0577a71f6eebd8f',
+ 'info_dict': {
+ 'id': '1198-56411-000-trailer',
+ 'ext': 'mp4',
+ 'title': "Sinister - Trailer",
+ },
+ }, {
+ 'md5': '80b9ee3ad69fb368f104cb5d9732ae95',
+ 'info_dict': {
+ 'id': '1198-56411-000-behind-scenes',
+ 'ext': 'mp4',
+ 'title': "Sinister - Behind Scenes",
+ },
+ }, {
+ 'md5': '3b3ea897ecaa91fca57a8a94ac1b15c5',
+ 'info_dict': {
+ 'id': '1198-56411-000-scene-from-movie',
+ 'ext': 'mp4',
+ 'title': "Sinister - Scene from movie",
+ },
+ }],
+ 'info_dict': {
+ 'id': '1198-56411-000',
+ 'title': "Sinister",
+ 'description': 'md5:014c40b0488848de9683566a42e33372',
+ }
+ }]
+
+ _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ request = compat_urllib_request.Request(
+ 'http://api.live.viewster.com/api/v1/movie/%s' % video_id)
+ request.add_header('Accept', self._ACCEPT_HEADER)
+
+ movie = self._download_json(
+ request, video_id, 'Downloading movie metadata JSON')
+
+ title = movie.get('title') or movie['original_title']
+ description = movie.get('synopsis')
+ thumbnail = movie.get('large_artwork') or movie.get('artwork')
+
+ entries = []
+ for clip in movie['play_list']:
+ entry = None
+
+ # movielink api
+ link_request = clip.get('link_request')
+ if link_request:
+ request = compat_urllib_request.Request(
+ 'http://api.live.viewster.com/api/v1/movielink?movieid=%(movieid)s&action=%(action)s&paymethod=%(paymethod)s&price=%(price)s&currency=%(currency)s&language=%(language)s&subtitlelanguage=%(subtitlelanguage)s&ischromecast=%(ischromecast)s'
+ % link_request)
+ request.add_header('Accept', self._ACCEPT_HEADER)
+
+ movie_link = self._download_json(
+ request, video_id, 'Downloading movie link JSON', fatal=False)
+
+ if movie_link:
+ formats = self._extract_f4m_formats(
+ movie_link['url'] + '&hdcore=3.2.0&plugin=flowplayer-3.2.0.1', video_id)
+ self._sort_formats(formats)
+ entry = {
+ 'formats': formats,
+ }
+
+ # direct link
+ clip_url = clip.get('clip_data', {}).get('url')
+ if clip_url:
+ entry = {
+ 'url': clip_url,
+ 'ext': 'mp4',
+ }
+
+ if entry:
+ entry.update({
+ 'id': '%s-%s' % (video_id, clip['canonical_title']),
+ 'title': '%s - %s' % (title, clip['title']),
+ })
+ entries.append(entry)
+
+ playlist = self.playlist_result(entries, video_id, title, description)
+ playlist['thumbnail'] = thumbnail
+ return playlist
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 8f540f578..28bcc89cd 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
import json
import re
import itertools
-import hashlib
from .common import InfoExtractor
from ..compat import (
@@ -20,6 +19,7 @@ from ..utils import (
RegexNotFoundError,
smuggle_url,
std_headers,
+ unified_strdate,
unsmuggle_url,
urlencode_postdata,
)
@@ -38,7 +38,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
self.report_login()
login_url = 'https://vimeo.com/log_in'
webpage = self._download_webpage(login_url, None, False)
- token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+ token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token')
data = urlencode_postdata({
'email': username,
'password': password,
@@ -140,6 +140,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'description': 'md5:8678b246399b070816b12313e8b4eb5c',
'uploader_id': 'atencio',
'uploader': 'Peter Atencio',
+ 'upload_date': '20130927',
'duration': 187,
},
},
@@ -176,17 +177,15 @@ class VimeoIE(VimeoBaseInfoExtractor):
password = self._downloader.params.get('videopassword', None)
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
- token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
- data = compat_urllib_parse.urlencode({
+ token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token')
+ data = urlencode_postdata({
'password': password,
'token': token,
})
- # I didn't manage to use the password with https
- if url.startswith('https'):
- pass_url = url.replace('https', 'http')
- else:
- pass_url = url
- password_request = compat_urllib_request.Request(pass_url + '/password', data)
+ if url.startswith('http://'):
+ # vimeo only supports https now, but the user can give an http url
+ url = url.replace('http://', 'https://')
+ password_request = compat_urllib_request.Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
password_request.add_header('Cookie', 'xsrft=%s' % token)
return self._download_webpage(
@@ -223,12 +222,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
video_id = mobj.group('id')
orig_url = url
if mobj.group('pro') or mobj.group('player'):
- url = 'http://player.vimeo.com/video/' + video_id
-
- password = self._downloader.params.get('videopassword', None)
- if password:
- headers['Cookie'] = '%s_password=%s' % (
- video_id, hashlib.md5(password.encode('utf-8')).hexdigest())
+ url = 'https://player.vimeo.com/video/' + video_id
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, headers)
@@ -250,6 +244,16 @@ class VimeoIE(VimeoBaseInfoExtractor):
# and latter we extract those that are Vimeo specific.
self.report_extraction(video_id)
+ vimeo_config = self._search_regex(
+ r'vimeo\.config\s*=\s*({.+?});', webpage,
+ 'vimeo config', default=None)
+ if vimeo_config:
+ seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
+ if seed_status.get('state') == 'failed':
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, seed_status['title']),
+ expected=True)
+
# Extract the config JSON
try:
try:
@@ -323,9 +327,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
# Extract upload date
video_upload_date = None
- mobj = re.search(r'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage)
+ mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage)
if mobj is not None:
- video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
+ video_upload_date = unified_strdate(mobj.group(1))
try:
view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
@@ -379,7 +383,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
for tt in text_tracks:
subtitles[tt['lang']] = [{
'ext': 'vtt',
- 'url': 'http://vimeo.com' + tt['url'],
+ 'url': 'https://vimeo.com' + tt['url'],
}]
return {
@@ -402,11 +406,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
class VimeoChannelIE(InfoExtractor):
IE_NAME = 'vimeo:channel'
- _VALID_URL = r'https?://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
+ _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
_MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
_TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
_TESTS = [{
- 'url': 'http://vimeo.com/channels/tributes',
+ 'url': 'https://vimeo.com/channels/tributes',
'info_dict': {
'id': 'tributes',
'title': 'Vimeo Tributes',
@@ -435,10 +439,10 @@ class VimeoChannelIE(InfoExtractor):
name="([^"]+)"\s+
value="([^"]*)"
''', login_form))
- token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token')
+ token = self._search_regex(r'xsrft = \'(.*?)\'', webpage, 'login token')
fields['token'] = token
fields['password'] = password
- post = compat_urllib_parse.urlencode(fields)
+ post = urlencode_postdata(fields)
password_path = self._search_regex(
r'action="([^"]+)"', login_form, 'password URL')
password_url = compat_urlparse.urljoin(page_url, password_path)
@@ -465,7 +469,7 @@ class VimeoChannelIE(InfoExtractor):
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
- entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
+ entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
for video_id in video_ids]
return {'_type': 'playlist',
'id': list_id,
@@ -476,15 +480,15 @@ class VimeoChannelIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
- return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id)
+ return self._extract_videos(channel_id, 'https://vimeo.com/channels/%s' % channel_id)
class VimeoUserIE(VimeoChannelIE):
IE_NAME = 'vimeo:user'
- _VALID_URL = r'https?://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
+ _VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
_TESTS = [{
- 'url': 'http://vimeo.com/nkistudio/videos',
+ 'url': 'https://vimeo.com/nkistudio/videos',
'info_dict': {
'title': 'Nki',
'id': 'nkistudio',
@@ -495,15 +499,15 @@ class VimeoUserIE(VimeoChannelIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
- return self._extract_videos(name, 'http://vimeo.com/%s' % name)
+ return self._extract_videos(name, 'https://vimeo.com/%s' % name)
class VimeoAlbumIE(VimeoChannelIE):
IE_NAME = 'vimeo:album'
- _VALID_URL = r'https?://vimeo\.com/album/(?P<id>\d+)'
+ _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
_TESTS = [{
- 'url': 'http://vimeo.com/album/2632481',
+ 'url': 'https://vimeo.com/album/2632481',
'info_dict': {
'id': '2632481',
'title': 'Staff Favorites: November 2013',
@@ -527,14 +531,14 @@ class VimeoAlbumIE(VimeoChannelIE):
def _real_extract(self, url):
album_id = self._match_id(url)
- return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id)
+ return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id)
class VimeoGroupsIE(VimeoAlbumIE):
IE_NAME = 'vimeo:group'
- _VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)'
+ _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)'
_TESTS = [{
- 'url': 'http://vimeo.com/groups/rolexawards',
+ 'url': 'https://vimeo.com/groups/rolexawards',
'info_dict': {
'id': 'rolexawards',
'title': 'Rolex Awards for Enterprise',
@@ -548,13 +552,13 @@ class VimeoGroupsIE(VimeoAlbumIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
name = mobj.group('name')
- return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name)
+ return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name)
class VimeoReviewIE(InfoExtractor):
IE_NAME = 'vimeo:review'
IE_DESC = 'Review pages on vimeo'
- _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
+ _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'md5': 'c507a72f780cacc12b2248bb4006d253',
@@ -566,7 +570,7 @@ class VimeoReviewIE(InfoExtractor):
}
}, {
'note': 'video player needs Referer',
- 'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053',
+ 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
'md5': '6295fdab8f4bf6a002d058b2c6dce276',
'info_dict': {
'id': '91613211',
@@ -588,11 +592,11 @@ class VimeoReviewIE(InfoExtractor):
class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
IE_NAME = 'vimeo:watchlater'
IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
- _VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater'
+ _VALID_URL = r'https://vimeo\.com/home/watchlater|:vimeowatchlater'
_LOGIN_REQUIRED = True
_TITLE_RE = r'href="/home/watchlater".*?>(.*?)<'
_TESTS = [{
- 'url': 'http://vimeo.com/home/watchlater',
+ 'url': 'https://vimeo.com/home/watchlater',
'only_matching': True,
}]
@@ -612,7 +616,7 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
class VimeoLikesIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
+ _VALID_URL = r'https://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
IE_NAME = 'vimeo:likes'
IE_DESC = 'Vimeo user likes'
_TEST = {
@@ -640,8 +644,8 @@ class VimeoLikesIE(InfoExtractor):
description = self._html_search_meta('description', webpage)
def _get_page(idx):
- page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % (
- self.http_scheme(), user_id, idx + 1)
+ page_url = 'https://vimeo.com/user%s/likes/page:%d/sort:date' % (
+ user_id, idx + 1)
webpage = self._download_webpage(
page_url, user_id,
note='Downloading page %d/%d' % (idx + 1, page_count))
diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py
index 0b58fe0fe..c3187cfeb 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@@ -33,14 +33,13 @@ class VineIE(InfoExtractor):
r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data'))
formats = [{
- 'url': data['videoLowURL'],
- 'ext': 'mp4',
- 'format_id': 'low',
- }, {
- 'url': data['videoUrl'],
- 'ext': 'mp4',
- 'format_id': 'standard',
- }]
+ 'format_id': '%(format)s-%(rate)s' % f,
+ 'vcodec': f['format'],
+ 'quality': f['rate'],
+ 'url': f['videoUrl'],
+ } for f in data['videoUrls'] if f.get('rate')]
+
+ self._sort_formats(formats)
return {
'id': video_id,
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index 7dea8c59d..cc384adbf 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -31,7 +31,7 @@ class VKIE(InfoExtractor):
'id': '162222515',
'ext': 'flv',
'title': 'ProtivoGunz - Хуёвая песня',
- 'uploader': 're:Noize MC.*',
+ 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
'duration': 195,
'upload_date': '20120212',
},
@@ -140,7 +140,7 @@ class VKIE(InfoExtractor):
if not video_id:
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
- info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
+ info_url = 'http://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
info_page = self._download_webpage(info_url, video_id)
ERRORS = {
@@ -152,7 +152,10 @@ class VKIE(InfoExtractor):
'use --username and --password options to provide account credentials.',
r'<!>Unknown error':
- 'Video %s does not exist.'
+ 'Video %s does not exist.',
+
+ r'<!>Видео временно недоступно':
+ 'Video %s is temporarily unavailable.',
}
for error_re, error_msg in ERRORS.items():
diff --git a/youtube_dl/extractor/xuite.py b/youtube_dl/extractor/xuite.py
index 4971965f9..81d885fdc 100644
--- a/youtube_dl/extractor/xuite.py
+++ b/youtube_dl/extractor/xuite.py
@@ -69,18 +69,26 @@ class XuiteIE(InfoExtractor):
'only_matching': True,
}]
+ @staticmethod
+ def base64_decode_utf8(data):
+ return base64.b64decode(data.encode('utf-8')).decode('utf-8')
+
+ @staticmethod
+ def base64_encode_utf8(data):
+ return base64.b64encode(data.encode('utf-8')).decode('utf-8')
+
def _extract_flv_config(self, media_id):
- base64_media_id = base64.b64encode(media_id.encode('utf-8')).decode('utf-8')
+ base64_media_id = self.base64_encode_utf8(media_id)
flv_config = self._download_xml(
'http://vlog.xuite.net/flash/player?media=%s' % base64_media_id,
'flv config')
prop_dict = {}
for prop in flv_config.findall('./property'):
- prop_id = base64.b64decode(prop.attrib['id']).decode('utf-8')
+ prop_id = self.base64_decode_utf8(prop.attrib['id'])
# CDATA may be empty in flv config
if not prop.text:
continue
- encoded_content = base64.b64decode(prop.text).decode('utf-8')
+ encoded_content = self.base64_decode_utf8(prop.text)
prop_dict[prop_id] = compat_urllib_parse_unquote(encoded_content)
return prop_dict
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index 97dbac4cc..b777159c5 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -17,6 +17,8 @@ from ..utils import (
int_or_none,
)
+from .nbc import NBCSportsVPlayerIE
+
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
@@ -129,6 +131,15 @@ class YahooIE(InfoExtractor):
}, {
'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
'only_matching': True,
+ }, {
+ 'note': 'NBC Sports embeds',
+ 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ }
}
]
@@ -151,6 +162,10 @@ class YahooIE(InfoExtractor):
items = json.loads(items_json)
video_id = items[0]['id']
return self._get_info(video_id, display_id, webpage)
+ # Look for NBCSports iframes
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
items_json = self._search_regex(
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py
index b294767c5..19f8762ae 100644
--- a/youtube_dl/extractor/yam.py
+++ b/youtube_dl/extractor/yam.py
@@ -8,6 +8,7 @@ from ..compat import compat_urlparse
from ..utils import (
float_or_none,
month_by_abbreviation,
+ ExtractorError,
)
@@ -28,23 +29,45 @@ class YamIE(InfoExtractor):
}
}, {
# An external video hosted on YouTube
- 'url': 'http://mymedia.yam.com/m/3598173',
- 'md5': '0238ceec479c654e8c2f1223755bf3e9',
+ 'url': 'http://mymedia.yam.com/m/3599430',
+ 'md5': '03127cf10d8f35d120a9e8e52e3b17c6',
'info_dict': {
- 'id': 'pJ2Deys283c',
+ 'id': 'CNpEoQlrIgA',
'ext': 'mp4',
- 'upload_date': '20150202',
+ 'upload_date': '20150306',
'uploader': '新莊社大瑜伽社',
- 'description': 'md5:f5cc72f0baf259a70fb731654b0d2eff',
+ 'description': 'md5:11e2e405311633ace874f2e6226c8b17',
'uploader_id': '2323agoy',
- 'title': '外婆的澎湖灣KTV-潘安邦',
- }
+ 'title': '20090412陽明山二子坪-1',
+ },
+ 'skip': 'Video does not exist',
+ }, {
+ 'url': 'http://mymedia.yam.com/m/3598173',
+ 'info_dict': {
+ 'id': '3598173',
+ 'ext': 'mp4',
+ },
+ 'skip': 'cause Yam system error',
+ }, {
+ 'url': 'http://mymedia.yam.com/m/3599437',
+ 'info_dict': {
+ 'id': '3599437',
+ 'ext': 'mp4',
+ },
+ 'skip': 'invalid YouTube URL',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
page = self._download_webpage(url, video_id)
+ # Check for errors
+ system_msg = self._html_search_regex(
+ r'系統訊息(?:<br>|\n|\r)*([^<>]+)<br>', page, 'system message',
+ default=None)
+ if system_msg:
+ raise ExtractorError(system_msg, expected=True)
+
# Is it hosted externally on YouTube?
youtube_url = self._html_search_regex(
r'<embed src="(http://www.youtube.com/[^"]+)"',
diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py
new file mode 100644
index 000000000..f4c0f5702
--- /dev/null
+++ b/youtube_dl/extractor/yandexmusic.py
@@ -0,0 +1,127 @@
+# coding=utf-8
+from __future__ import unicode_literals
+
+import re
+import hashlib
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ float_or_none,
+)
+
+
+class YandexMusicBaseIE(InfoExtractor):
+ def _get_track_url(self, storage_dir, track_id):
+ data = self._download_json(
+ 'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
+ % storage_dir,
+ track_id, 'Downloading track location JSON')
+
+ key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest()
+ storage = storage_dir.split('.')
+
+ return ('http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default'
+ % (data['host'], key, data['ts'] + data['path'], storage[1]))
+
+ def _get_track_info(self, track):
+ return {
+ 'id': track['id'],
+ 'ext': 'mp3',
+ 'url': self._get_track_url(track['storageDir'], track['id']),
+ 'title': '%s - %s' % (track['artists'][0]['name'], track['title']),
+ 'filesize': int_or_none(track.get('fileSize')),
+ 'duration': float_or_none(track.get('durationMs'), 1000),
+ }
+
+
+class YandexMusicTrackIE(YandexMusicBaseIE):
+ IE_NAME = 'yandexmusic:track'
+ IE_DESC = 'Яндекс.Музыка - Трек'
+ _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://music.yandex.ru/album/540508/track/4878838',
+ 'md5': 'f496818aa2f60b6c0062980d2e00dc20',
+ 'info_dict': {
+ 'id': '4878838',
+ 'ext': 'mp3',
+ 'title': 'Carlo Ambrosio - Gypsy Eyes 1',
+ 'filesize': 4628061,
+ 'duration': 193.04,
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ album_id, track_id = mobj.group('album_id'), mobj.group('id')
+
+ track = self._download_json(
+ 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
+ track_id, 'Downloading track JSON')['track']
+
+ return self._get_track_info(track)
+
+
+class YandexMusicAlbumIE(YandexMusicBaseIE):
+ IE_NAME = 'yandexmusic:album'
+ IE_DESC = 'Яндекс.Музыка - Альбом'
+ _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
+
+ _TEST = {
+ 'url': 'http://music.yandex.ru/album/540508',
+ 'info_dict': {
+ 'id': '540508',
+ 'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
+ },
+ 'playlist_count': 50,
+ }
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+
+ album = self._download_json(
+ 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
+ album_id, 'Downloading album JSON')
+
+ entries = [self._get_track_info(track) for track in album['volumes'][0]]
+
+ title = '%s - %s' % (album['artists'][0]['name'], album['title'])
+ year = album.get('year')
+ if year:
+ title += ' (%s)' % year
+
+ return self.playlist_result(entries, compat_str(album['id']), title)
+
+
+class YandexMusicPlaylistIE(YandexMusicBaseIE):
+ IE_NAME = 'yandexmusic:playlist'
+ IE_DESC = 'Яндекс.Музыка - Плейлист'
+ _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/users/[^/]+/playlists/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
+ 'info_dict': {
+ 'id': '1245',
+ 'title': 'Что слушают Enter Shikari',
+ 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
+ },
+ 'playlist_count': 6,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'var\s+Mu\s*=\s*({.+?});\s*</script>', webpage, 'player'),
+ playlist_id)['pageData']['playlist']
+
+ entries = [self._get_track_info(track) for track in playlist['tracks']]
+
+ return self.playlist_result(
+ entries, compat_str(playlist_id),
+ playlist['title'], playlist.get('description'))
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index 107c9ac36..6abe72f73 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -47,11 +47,12 @@ class YouPornIE(InfoExtractor):
# Get JSON parameters
json_params = self._search_regex(
- r'var currentVideo = new Video\((.*)\)[,;]',
+ [r'var\s+videoJa?son\s*=\s*({.+?});',
+ r'var\s+currentVideo\s*=\s*new\s+Video\((.+?)\)[,;]'],
webpage, 'JSON parameters')
try:
params = json.loads(json_params)
- except:
+ except ValueError:
raise ExtractorError('Invalid JSON')
self.report_extraction(video_id)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3690f8021..5488101e1 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1263,27 +1263,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
return self.playlist_result(url_results, playlist_id, title)
- def _real_extract(self, url):
- # Extract playlist id
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- playlist_id = mobj.group(1) or mobj.group(2)
-
- # Check if it's a video-specific URL
- query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- if 'v' in query_dict:
- video_id = query_dict['v'][0]
- if self._downloader.params.get('noplaylist'):
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- return self.url_result(video_id, 'Youtube', video_id=video_id)
- else:
- self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
-
- if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
- # Mixes require a custom extraction process
- return self._extract_mix(playlist_id)
-
+ def _extract_playlist(self, playlist_id):
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
more_widget_html = content_html = page
@@ -1327,6 +1307,29 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title)
+ def _real_extract(self, url):
+ # Extract playlist id
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError('Invalid URL: %s' % url)
+ playlist_id = mobj.group(1) or mobj.group(2)
+
+ # Check if it's a video-specific URL
+ query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ if 'v' in query_dict:
+ video_id = query_dict['v'][0]
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ return self.url_result(video_id, 'Youtube', video_id=video_id)
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+
+ if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
+ # Mixes require a custom extraction process
+ return self._extract_mix(playlist_id)
+
+ return self._extract_playlist(playlist_id)
+
class YoutubeChannelIE(InfoExtractor):
IE_DESC = 'YouTube.com channels'
@@ -1532,7 +1535,7 @@ class YoutubeSearchURLIE(InfoExtractor):
webpage = self._download_webpage(url, query)
result_code = self._search_regex(
- r'(?s)<ol class="item-section"(.*?)</ol>', webpage, 'result HTML')
+ r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML')
part_codes = re.findall(
r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
@@ -1643,21 +1646,26 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+ IE_NAME = 'youtube:recommended'
IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
_FEED_NAME = 'recommended'
_PLAYLIST_TITLE = 'Youtube Recommended videos'
-class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
+class YoutubeWatchLaterIE(YoutubePlaylistIE):
+ IE_NAME = 'youtube:watchlater'
IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
- _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
- _FEED_NAME = 'watch_later'
- _PLAYLIST_TITLE = 'Youtube Watch Later'
- _PERSONAL_FEED = True
+ _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
+
+ _TESTS = [] # override PlaylistIE tests
+
+ def _real_extract(self, url):
+ return self._extract_playlist('WL')
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+ IE_NAME = 'youtube:history'
IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
_VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
_FEED_NAME = 'history'
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index a2ffe96bc..35c7e5fb3 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -195,6 +195,12 @@ def parseOpts(overrideArguments=None):
action='store_const', const='::', dest='source_address',
help='Make all connections via IPv6 (experimental)',
)
+ network.add_option(
+ '--cn-verification-proxy',
+ dest='cn_verification_proxy', default=None, metavar='URL',
+ help='Use this proxy to verify the IP address for some Chinese sites. '
+ 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)'
+ )
selection = optparse.OptionGroup(parser, 'Video Selection')
selection.add_option(
@@ -435,8 +441,12 @@ def parseOpts(overrideArguments=None):
downloader.add_option(
'--external-downloader',
dest='external_downloader', metavar='COMMAND',
- help='(experimental) Use the specified external downloader. '
+ help='Use the specified external downloader. '
'Currently supports %s' % ','.join(list_external_downloaders()))
+ downloader.add_option(
+ '--external-downloader-args',
+ dest='external_downloader_args', metavar='ARGS',
+ help='Give these arguments to the external downloader.')
workarounds = optparse.OptionGroup(parser, 'Workarounds')
workarounds.add_option(
@@ -553,7 +563,7 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='verbose', default=False,
help='print various debugging information')
verbosity.add_option(
- '--dump-intermediate-pages',
+ '--dump-pages', '--dump-intermediate-pages',
action='store_true', dest='dump_intermediate_pages', default=False,
help='print downloaded pages to debug problems (very verbose)')
verbosity.add_option(
@@ -726,6 +736,15 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='addmetadata', default=False,
help='write metadata to the video file')
postproc.add_option(
+ '--metadata-from-title',
+ metavar='FORMAT', dest='metafromtitle',
+ help='parse additional metadata like song title / artist from the video title. '
+ 'The format syntax is the same as --output, '
+ 'the parsed parameters replace existing values. '
+ 'Additional templates: %(album), %(artist). '
+ 'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
+ '"Coldplay - Paradise"')
+ postproc.add_option(
'--xattrs',
action='store_true', dest='xattrs', default=False,
help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
@@ -775,6 +794,11 @@ def parseOpts(overrideArguments=None):
write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
else:
command_line_conf = sys.argv[1:]
+ # Workaround for Python 2.x, where argv is a byte list
+ if sys.version_info < (3,):
+ command_line_conf = [
+ a.decode('utf-8', 'replace') for a in command_line_conf]
+
if '--ignore-config' in command_line_conf:
system_conf = []
user_conf = []
diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py
index 708df3dd4..f39acadce 100644
--- a/youtube_dl/postprocessor/__init__.py
+++ b/youtube_dl/postprocessor/__init__.py
@@ -15,6 +15,7 @@ from .ffmpeg import (
)
from .xattrpp import XAttrMetadataPP
from .execafterdownload import ExecAfterDownloadPP
+from .metadatafromtitle import MetadataFromTitlePP
def get_postprocessor(key):
@@ -34,5 +35,6 @@ __all__ = [
'FFmpegPostProcessor',
'FFmpegSubtitlesConvertorPP',
'FFmpegVideoConvertorPP',
+ 'MetadataFromTitlePP',
'XAttrMetadataPP',
]
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 30094c2f3..55adf9685 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import io
import os
import subprocess
-import sys
import time
@@ -269,19 +268,17 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
else:
self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path)
self.run_ffmpeg(path, new_path, acodec, more_opts)
- except:
- etype, e, tb = sys.exc_info()
- if isinstance(e, AudioConversionError):
- msg = 'audio conversion failed: ' + e.msg
- else:
- msg = 'error running ' + self.basename
- raise PostProcessingError(msg)
+ except AudioConversionError as e:
+ raise PostProcessingError(
+ 'audio conversion failed: ' + e.msg)
+ except Exception:
+ raise PostProcessingError('error running ' + self.basename)
# Try to update the date time for extracted audio file.
if information.get('filetime') is not None:
try:
os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
- except:
+ except Exception:
self._downloader.report_warning('Cannot update utime of audio file')
information['filepath'] = new_path
@@ -545,7 +542,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
metadata['title'] = info['title']
if info.get('upload_date') is not None:
metadata['date'] = info['upload_date']
- if info.get('uploader') is not None:
+ if info.get('artist') is not None:
+ metadata['artist'] = info['artist']
+ elif info.get('uploader') is not None:
metadata['artist'] = info['uploader']
elif info.get('uploader_id') is not None:
metadata['artist'] = info['uploader_id']
@@ -554,6 +553,8 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
metadata['comment'] = info['description']
if info.get('webpage_url') is not None:
metadata['purl'] = info['webpage_url']
+ if info.get('album') is not None:
+ metadata['album'] = info['album']
if not metadata:
self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py
new file mode 100644
index 000000000..5019433d3
--- /dev/null
+++ b/youtube_dl/postprocessor/metadatafromtitle.py
@@ -0,0 +1,47 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import PostProcessor
+from ..utils import PostProcessingError
+
+
+class MetadataFromTitlePPError(PostProcessingError):
+ pass
+
+
+class MetadataFromTitlePP(PostProcessor):
+ def __init__(self, downloader, titleformat):
+ super(MetadataFromTitlePP, self).__init__(downloader)
+ self._titleformat = titleformat
+ self._titleregex = self.format_to_regex(titleformat)
+
+ def format_to_regex(self, fmt):
+ """
+ Converts a string like
+ '%(title)s - %(artist)s'
+ to a regex like
+ '(?P<title>.+)\ \-\ (?P<artist>.+)'
+ """
+ lastpos = 0
+ regex = ""
+ # replace %(..)s with regex group and escape other string parts
+ for match in re.finditer(r'%\((\w+)\)s', fmt):
+ regex += re.escape(fmt[lastpos:match.start()])
+ regex += r'(?P<' + match.group(1) + '>.+)'
+ lastpos = match.end()
+ if lastpos < len(fmt):
+ regex += re.escape(fmt[lastpos:len(fmt)])
+ return regex
+
+ def run(self, info):
+ title = info['title']
+ match = re.match(self._titleregex, title)
+ if match is None:
+ raise MetadataFromTitlePPError('Could not interpret title of video as "%s"' % self._titleformat)
+ for attribute, value in match.groupdict().items():
+ value = match.group(attribute)
+ info[attribute] = value
+ self._downloader.to_screen('[fromtitle] parsed ' + attribute + ': ' + value)
+
+ return True, info
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
index d8be4049f..de3169eef 100644
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -65,7 +65,7 @@ def update_self(to_screen, verbose):
# Check if there is a new version
try:
newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
- except:
+ except Exception:
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t find the current version. Please try again later.')
@@ -78,7 +78,7 @@ def update_self(to_screen, verbose):
try:
versions_info = opener.open(JSON_URL).read().decode('utf-8')
versions_info = json.loads(versions_info)
- except:
+ except Exception:
if verbose:
to_screen(compat_str(traceback.format_exc()))
to_screen('ERROR: can\'t obtain versions info. Please try again later.')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index d4938ec36..90e0ed9ab 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -75,7 +75,7 @@ def preferredencoding():
try:
pref = locale.getpreferredencoding()
'TEST'.encode(pref)
- except:
+ except Exception:
pref = 'UTF-8'
return pref
@@ -127,7 +127,7 @@ def write_json_file(obj, fn):
except OSError:
pass
os.rename(tf.name, fn)
- except:
+ except Exception:
try:
os.remove(tf.name)
except OSError:
@@ -252,15 +252,12 @@ def sanitize_open(filename, open_mode):
raise
# In case of error, try to remove win32 forbidden chars
- alt_filename = os.path.join(
- re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
- for path_part in os.path.split(filename)
- )
+ alt_filename = sanitize_path(filename)
if alt_filename == filename:
raise
else:
# An exception here should be caught in the caller
- stream = open(encodeFilename(filename), open_mode)
+ stream = open(encodeFilename(alt_filename), open_mode)
return (stream, alt_filename)
@@ -305,11 +302,37 @@ def sanitize_filename(s, restricted=False, is_id=False):
result = result[2:]
if result.startswith('-'):
result = '_' + result[len('-'):]
+ result = result.lstrip('.')
if not result:
result = '_'
return result
+def sanitize_path(s):
+ """Sanitizes and normalizes path on Windows"""
+ if sys.platform != 'win32':
+ return s
+ drive, _ = os.path.splitdrive(s)
+ unc, _ = os.path.splitunc(s)
+ unc_or_drive = unc or drive
+ norm_path = os.path.normpath(remove_start(s, unc_or_drive)).split(os.path.sep)
+ if unc_or_drive:
+ norm_path.pop(0)
+ sanitized_path = [
+ path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
+ for path_part in norm_path]
+ if unc_or_drive:
+ sanitized_path.insert(0, unc_or_drive + os.path.sep)
+ return os.path.join(*sanitized_path)
+
+
+def sanitize_url_path_consecutive_slashes(url):
+ """Collapses consecutive slashes in URLs' path"""
+ parsed_url = list(compat_urlparse.urlparse(url))
+ parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
+ return compat_urlparse.urlunparse(parsed_url)
+
+
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """
res = []
@@ -325,7 +348,7 @@ def _htmlentity_transform(entity):
if entity in compat_html_entities.name2codepoint:
return compat_chr(compat_html_entities.name2codepoint[entity])
- mobj = re.match(r'#(x?[0-9]+)', entity)
+ mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None:
numstr = mobj.group(1)
if numstr.startswith('x'):
@@ -1767,3 +1790,24 @@ def match_filter_func(filter_str):
video_title = info_dict.get('title', info_dict.get('id', 'video'))
return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
return _match_func
+
+
+class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
+ def __init__(self, proxies=None):
+ # Set default handlers
+ for type in ('http', 'https'):
+ setattr(self, '%s_open' % type,
+ lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
+ meth(r, proxy, type))
+ return compat_urllib_request.ProxyHandler.__init__(self, proxies)
+
+ def proxy_open(self, req, proxy, type):
+ req_proxy = req.headers.get('Ytdl-request-proxy')
+ if req_proxy is not None:
+ proxy = req_proxy
+ del req.headers['Ytdl-request-proxy']
+
+ if proxy == '__noproxy__':
+ return None # No Proxy
+ return compat_urllib_request.ProxyHandler.proxy_open(
+ self, req, proxy, type)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 5582348ba..dd93e295a 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2015.02.28'
+__version__ = '2015.03.28'