aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2019-01-01 18:12:44 +0700
committerSergey M․ <dstftw@gmail.com>2019-01-01 18:12:44 +0700
commit6b688b8942a0bd18b2a3835e69a903f1eeffee2a (patch)
tree4be798a22cd173dfb697b92f04c5d88d58fbf829
parent9d9daed4647c0a0798adee5a580e9fd95478cf6a (diff)
[bitchute] Fix extraction (closes #18567)
-rw-r--r--youtube_dl/extractor/bitchute.py16
1 files changed, 12 insertions, 4 deletions
diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py
index 43b4732aa..aa034355a 100644
--- a/youtube_dl/extractor/bitchute.py
+++ b/youtube_dl/extractor/bitchute.py
@@ -5,7 +5,10 @@ import itertools
import re
from .common import InfoExtractor
-from ..utils import urlencode_postdata
+from ..utils import (
+ orderedSet,
+ urlencode_postdata,
+)
class BitChuteIE(InfoExtractor):
@@ -43,10 +46,15 @@ class BitChuteIE(InfoExtractor):
'description', webpage, 'title',
default=None) or self._og_search_description(webpage)
+ format_urls = []
+ for mobj in re.finditer(
+ r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
+ format_urls.append(mobj.group('url'))
+ format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
+
formats = [
- {'url': mobj.group('url')}
- for mobj in re.finditer(
- r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
+ {'url': format_url}
+ for format_url in orderedSet(format_urls)]
self._sort_formats(formats)
description = self._html_search_regex(