aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsrc-tinkerer <149616646+src-tinkerer@users.noreply.github.com>2024-03-22 14:31:01 +0000
committerGitHub <noreply@github.com>2024-03-22 14:31:01 +0000
commitbc2b8c0596fd6b75af24822c4f0f1da6783d71f7 (patch)
treeb0a8067ed8992c74bd44972923a65151e48c9ba1
parentaa7e9ae4f48276bd5d0173966c77db9484f65a0a (diff)
[ie/fathom] Add extractor (#9495)
Closes #8541 Authored by: src-tinkerer
-rw-r--r--yt_dlp/extractor/_extractors.py1
-rw-r--r--yt_dlp/extractor/fathom.py54
2 files changed, 55 insertions, 0 deletions
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index ec84ec925..36d0853a0 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -590,6 +590,7 @@ from .facebook import (
FacebookReelIE,
FacebookAdsIE,
)
+from .fathom import FathomIE
from .fancode import (
FancodeVodIE,
FancodeLiveIE
diff --git a/yt_dlp/extractor/fathom.py b/yt_dlp/extractor/fathom.py
new file mode 100644
index 000000000..1df7d96fe
--- /dev/null
+++ b/yt_dlp/extractor/fathom.py
@@ -0,0 +1,54 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ float_or_none,
+ get_element_html_by_id,
+ parse_iso8601,
+)
+from ..utils.traversal import traverse_obj
+
+
+class FathomIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fathom\.video/share/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://fathom.video/share/G9mkjkspnohVVZ_L5nrsoPycyWcB8y7s',
+ 'md5': '0decd5343b8f30ae268625e79a02b60f',
+ 'info_dict': {
+ 'id': '47200596',
+ 'ext': 'mp4',
+ 'title': 'eCom Inucbator - Coaching Session',
+ 'duration': 8125.380507,
+ 'timestamp': 1699048914,
+ 'upload_date': '20231103',
+ },
+ }, {
+ 'url': 'https://fathom.video/share/mEws3bybftHL2QLymxYEDeE21vtLxGVm',
+ 'md5': '4f5cb382126c22d1aba8a939f9c49690',
+ 'info_dict': {
+ 'id': '46812957',
+ 'ext': 'mp4',
+ 'title': 'Jon, Lawrence, Neman chat about practice',
+ 'duration': 3571.517847,
+ 'timestamp': 1698933600,
+ 'upload_date': '20231102',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ props = traverse_obj(
+ get_element_html_by_id('app', webpage), ({extract_attributes}, 'data-page', {json.loads}, 'props'))
+ video_id = str(props['call']['id'])
+
+ return {
+ 'id': video_id,
+ 'formats': self._extract_m3u8_formats(props['call']['video_url'], video_id, 'mp4'),
+ **traverse_obj(props, {
+ 'title': ('head', 'title', {str}),
+ 'duration': ('duration', {float_or_none}),
+ 'timestamp': ('call', 'started_at', {parse_iso8601}),
+ }),
+ }