aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
authorAndrewMBL <62922222+AndrewMBL@users.noreply.github.com>2020-03-31 15:25:04 +1100
committerSergey M․ <dstftw@gmail.com>2020-04-11 20:07:32 +0700
commit533f3e3557af85e28afd72d291cb51a769c7dd7a (patch)
treee8bd4678408c665d42d4f482cea52b8c450e5b8d /youtube_dl
parent75294a5ed03f4443970478f3f4eac572239cec45 (diff)
[thisoldhouse] Fix video id extraction (closes #24548)
Added support for: with of without "www." and either ".chorus.build" or ".com" It now validated correctly on older URL's ``` <iframe src="https://thisoldhouse.chorus.build/videos/zype/5e33baec27d2e50001d5f52f ``` and newer ones ``` <iframe src="https://www.thisoldhouse.com/videos/zype/5e2b70e95216cc0001615120 ```
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/extractor/thisoldhouse.py16
1 files changed, 15 insertions, 1 deletions
diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py
index 387f955ee..33269705f 100644
--- a/youtube_dl/extractor/thisoldhouse.py
+++ b/youtube_dl/extractor/thisoldhouse.py
@@ -20,6 +20,20 @@ class ThisOldHouseIE(InfoExtractor):
'skip_download': True,
},
}, {
+ 'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
+ 'note': 'test for updated video URL',
+ 'info_dict': {
+ 'id': '5e2b70e95216cc0001615120',
+ 'ext': 'mp4',
+ 'title': 'E12 | The Westerly Project | Seaside Transformation',
+ 'description': 'Kevin and Tommy take the tour with the homeowners and Jeff. Norm presents his pine coffee table. Jenn gives Tommy the garden tour. Everyone meets at the flagpole to raise the flags.',
+ 'timestamp': 1579755600,
+ 'upload_date': '20200123',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
'only_matching': True,
}, {
@@ -38,6 +52,6 @@ class ThisOldHouseIE(InfoExtractor):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
- r'<iframe[^>]+src=[\'"](?:https?:)?//thisoldhouse\.chorus\.build/videos/zype/([0-9a-f]{24})',
+ r'<iframe[^>]+src=[\'"](?:https?:)?//(?:www\.|)thisoldhouse(?:\.chorus\.build|\.com)/videos/zype/([0-9a-f]{24})',
webpage, 'video id')
return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)