[pornhub] Improve title extraction (closes #24184)

author: Sergey M․ <dstftw@gmail.com> 2020-03-03 06:23:39 +0700
committer: Sergey M․ <dstftw@gmail.com> 2020-03-03 06:23:39 +0700
commit: 46cc54ca8f13c7b823c1a12446cdd76d060c74b6 (patch)
tree: 4405aa44fd49cf2fbdf1e518a7682c29a3a0b146
parent: 1e1c1960aa154a6e257e83e94e86ee6dc8b0b362 (diff)
download: youtube-dl-46cc54ca8f13c7b823c1a12446cdd76d060c74b6.tar.xz
1 files changed, 4 insertions, 4 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index b3251ccd9..b8f65af7c 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -189,10 +189,10 @@ class PornHubIE(PornHubBaseIE):
         # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
         # on that anymore.
         title = self._html_search_meta(
-            'twitter:title', webpage, default=None) or self._search_regex(
-            (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
-             r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
-             r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
+            'twitter:title', webpage, default=None) or self._html_search_regex(
+            (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
+             r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
+             r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
             webpage, 'title', group='title')
 
         video_urls = []
author	Sergey M․ <dstftw@gmail.com>	2020-03-03 06:23:39 +0700
committer	Sergey M․ <dstftw@gmail.com>	2020-03-03 06:23:39 +0700
commit	46cc54ca8f13c7b823c1a12446cdd76d060c74b6 (patch)
tree	4405aa44fd49cf2fbdf1e518a7682c29a3a0b146
parent	1e1c1960aa154a6e257e83e94e86ee6dc8b0b362 (diff)
download	youtube-dl-46cc54ca8f13c7b823c1a12446cdd76d060c74b6.tar.xz