From 302efc19ea970c3737f088d21b69f440d7f440e3 Mon Sep 17 00:00:00 2001 From: dongmao zhang Date: Thu, 9 Aug 2012 02:04:02 +0800 Subject: add youku support --- youtube_dl/InfoExtractors.py | 118 +++++++++++++++++++++++++++++++++++++++++++ youtube_dl/__init__.py | 1 + 2 files changed, 119 insertions(+) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index ddb4aa16b..e1fc93b6b 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -13,6 +13,8 @@ import urllib import urllib2 import email.utils import xml.etree.ElementTree +import random +import math from urlparse import parse_qs try: @@ -2955,3 +2957,119 @@ class MTVIE(InfoExtractor): } return [info] + + + +class YoukuIE(InfoExtractor): + + _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P[A-Za-z0-9]+)\.html' + IE_NAME = u'Youku' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + def report_download_webpage(self, file_id): + """Report webpage download.""" + self._downloader.to_screen(u'[Youku] %s: Downloading webpage' % file_id) + + def report_extraction(self, file_id): + """Report information extraction.""" + self._downloader.to_screen(u'[Youku] %s: Extracting information' % file_id) + + def _gen_sid(self): + nowTime = int(time.time() * 1000) + random1 = random.randint(1000,1998) + random2 = random.randint(1000,9999) + + return "%d%d%d" %(nowTime,random1,random2) + + def _get_file_ID_mix_string(self, seed): + mixed = [] + source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890") + seed = float(seed) + for i in range(len(source)): + seed = (seed * 211 + 30031 ) % 65536 + index = math.floor(seed / 65536 * len(source) ) + mixed.append(source[int(index)]) + source.remove(source[int(index)]) + #return ''.join(mixed) + return mixed + + + def _get_file_id(self, fileId, seed): + mixed = self._get_file_ID_mix_string(seed) + ids = fileId.split('*') + realId = [] + for ch in ids: + if ch is not '': + realId.append(mixed[int(ch)]) + return ''.join(realId) + + def _gen_key(self, key1, key2): + pass + + + + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + video_id = mobj.group('ID') + + info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id + print info_url + + request = urllib2.Request(info_url, None, std_headers) + try: + self.report_download_webpage(video_id) + jsondata = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + self.report_extraction(video_id) + try: + config = json.loads(jsondata) + + video_title = config['data'][0]['title'] + seed = config['data'][0]['seed'] + #choose format flv first + format = 'flv' + + fileid = config['data'][0]['streamfileids'][format] + seg_number = len(config['data'][0]['segs'][format]) + keys=[] + for i in xrange(seg_number): + keys.append(config['data'][0]['segs'][format][i]['k']) + + #TODO check error + #youku only could be viewed from mainland china + except: + self._downloader.trouble(u'ERROR: unable to extract info section') + return + + files_info=[] + sid = self._gen_sid() + fileid = self._get_file_id(fileid, seed) + + #column 8,9 of fileid represent the segment number + #fileid[7:9] should be changed + for index, key in enumerate(keys): + + temp_fileid = '%s%02x%s' % (fileid[0:8], index, fileid[10:]) + download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02x/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) + print download_url + info = { + 'id': '%s_part%02x' % (video_id, index), + 'url': download_url, + 'uploader': None, + 'title': '%s_part%02x' % (video_title, index), + 'ext': u'flv', + 'format': u'NA' + } + files_info.append(info) + + return files_info +# vim: tabstop=4 shiftwidth=4 softtabstop=4 noexpandtab diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 86951840d..84b972d51 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -351,6 +351,7 @@ def gen_extractors(): MixcloudIE(), StanfordOpenClassroomIE(), MTVIE(), + YoukuIE(), GenericIE() ] -- cgit v1.2.3 From 0a98b09bc2d0553f0f5bd44c26b468e55a163d07 Mon Sep 17 00:00:00 2001 From: dongmao zhang Date: Thu, 9 Aug 2012 02:53:21 +0800 Subject: youku default to download hd2 video --- youtube-dl | Bin 40663 -> 42046 bytes youtube_dl/InfoExtractors.py | 24 ++++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/youtube-dl b/youtube-dl index f2268af22..09e205cc1 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index e1fc93b6b..00953a254 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3035,11 +3035,27 @@ class YoukuIE(InfoExtractor): video_title = config['data'][0]['title'] seed = config['data'][0]['seed'] - #choose format flv first - format = 'flv' + + format = self._downloader.params.get('format', None) + supported_format = config['data'][0]['streamfileids'].keys() + + if format is None or format == 'best': + if 'hd2' in supported_format: + format = 'hd2' + else: + format = 'flv' + ext = u'flv' + elif format == 'worst': + format = 'mp4' + ext = u'mp4' + else: + format = 'flv' + ext = u'flv' + fileid = config['data'][0]['streamfileids'][format] seg_number = len(config['data'][0]['segs'][format]) + keys=[] for i in xrange(seg_number): keys.append(config['data'][0]['segs'][format][i]['k']) @@ -3065,8 +3081,8 @@ class YoukuIE(InfoExtractor): 'id': '%s_part%02x' % (video_id, index), 'url': download_url, 'uploader': None, - 'title': '%s_part%02x' % (video_title, index), - 'ext': u'flv', + 'title': video_title + 'ext': ext, 'format': u'NA' } files_info.append(info) -- cgit v1.2.3 From 7733d455c8b5444395fa3ca0b198bbbb10ac5058 Mon Sep 17 00:00:00 2001 From: dongmao zhang Date: Thu, 9 Aug 2012 03:14:02 +0800 Subject: fix 0a->0A bug --- youtube_dl/InfoExtractors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 00953a254..bc26f2f30 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3074,11 +3074,11 @@ class YoukuIE(InfoExtractor): #fileid[7:9] should be changed for index, key in enumerate(keys): - temp_fileid = '%s%02x%s' % (fileid[0:8], index, fileid[10:]) - download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02x/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) + temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:]) + download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) print download_url info = { - 'id': '%s_part%02x' % (video_id, index), + 'id': '%s_part%d' % (video_id, index), 'url': download_url, 'uploader': None, 'title': video_title -- cgit v1.2.3 From b5809a68bf94e5aa1172e5415be8d7fb91877de8 Mon Sep 17 00:00:00 2001 From: dongmao zhang Date: Thu, 9 Aug 2012 12:26:26 +0800 Subject: merge --- a.c | 254 +++++++++++++++++++++++++++++++++++++++++++ youtube-dl | Bin 42046 -> 42040 bytes youtube_dl/InfoExtractors.py | 4 +- 3 files changed, 256 insertions(+), 2 deletions(-) create mode 100644 a.c diff --git a/a.c b/a.c new file mode 100644 index 000000000..50dc1cc91 --- /dev/null +++ b/a.c @@ -0,0 +1,254 @@ +#include +#include +#include +#include + +#define FLVF_HEADER 1 +#define FLVF_SCRIPT 2 + +struct flvhdr +{ + char fh_magic[3]; + char fh_version; + char fh_flags; + char fh_hlen[4]; + char fh_pads[4]; +}__attribute__((packed)); + +struct taghdr +{ + uint8_t th_type; + uint8_t th_dlen[3]; + uint8_t th_tstamp[3]; + uint8_t th_xstamp; + uint8_t th_streamid[3]; +}__attribute__((packed)); + +struct flvcombine +{ + FILE * fc_file; + uint32_t fc_flags; + uint32_t fc_timestamp; + uint32_t fc_filesize; + double fc_duration; + int fc_filesize_offset; + int fc_duration_offset; +}; + +/* duration, filesize */ + +uint32_t buftoint(const void *buf, size_t len) +{ + uint32_t bufint = 0; + const uint8_t *pval = (const uint8_t *)buf; + while (len-- > 0) + bufint = (bufint << 8) + *pval++; + return bufint; +} + +int dd_copy(FILE * dst_fp, FILE * src_fp, size_t dlen) +{ + size_t len; + char buf[64 * 1024]; + while (dlen > 0 && !feof(src_fp)) { + len = fread(buf, 1, dlen < sizeof(buf)? dlen: sizeof(buf), src_fp); + if (fwrite(buf, 1, len, dst_fp) != len) + break; + dlen -= len; + } + return dlen; +} + +void adjtimestamp(struct taghdr *header, uint32_t stampbase) +{ + uint32_t netval = 0; + uint32_t adjtime = stampbase; + adjtime += buftoint(&header->th_tstamp, sizeof(header->th_tstamp)); + adjtime += (header->th_xstamp << 24); + header->th_xstamp = (adjtime >> 24); + header->th_tstamp[0] = (adjtime >> 16); + header->th_tstamp[1] = (adjtime >> 8); + header->th_tstamp[2] = (adjtime >> 0); +} + +void update_metainfo(struct flvcombine *combine, FILE *fp, size_t dlen) +{ + int i; + size_t len; + char *pmem = NULL; + char buf[256 * 1024]; + double duration = 0.0; + uint8_t duration_bytes[8]; + printf("dlen: %d\n", dlen); + assert (dlen < (256 * 1024)); + + len = fread(buf, 1, dlen < sizeof(buf)? dlen: sizeof(buf), fp); + if (len == 0) + return; + pmem = (char *)memmem(buf, len, "duration", 8); + if (pmem == NULL || pmem + 17l - buf > len) + return; + memcpy(&duration_bytes, pmem + 9, 8); + for (i = 0; i < 4; i ++) { + uint8_t tmp = duration_bytes[i]; + duration_bytes[i] = duration_bytes[7 - i]; + duration_bytes[7 - i] = tmp; + } + memcpy(&duration, &duration_bytes, 8); + combine->fc_duration += duration; + if (combine->fc_flags & FLVF_SCRIPT) + return; + combine->fc_duration_offset = + combine->fc_filesize + (pmem + 9l - buf) + sizeof(struct taghdr); + pmem = (char *)memmem(buf, len, "filesize", 8); + if (pmem == NULL || pmem + 17l - buf > len) + return; + combine->fc_filesize_offset = + combine->fc_filesize + (pmem + 9l - buf) + sizeof(struct taghdr); +} + +int addflv(struct flvcombine *combine, const char *path) +{ + int error = 0; + FILE *fp, *fout; + char magic[4]; + long savepos; + size_t len, dlen, flags; + struct flvhdr header; + struct taghdr *last; + struct taghdr tagvideo; + struct taghdr tagaudio; + struct taghdr tagheader; + + fp = fopen(path, "rb"); + fout = combine->fc_file; + if (fp == NULL || fout == NULL) + return 0; + + last = NULL; + memset(magic, 0, sizeof(magic)); + memset(&tagvideo, 0, sizeof(tagvideo)); + memset(&tagaudio, 0, sizeof(tagaudio)); + + if ( !fread(&header, sizeof(header), 1, fp) ) + goto fail; + + memcpy(magic, header.fh_magic, 3); + if ( strcmp("FLV", magic) ) + goto fail; + + if ((combine->fc_flags & FLVF_HEADER) == 0) { + fwrite(&header, sizeof(header), 1, fout); + combine->fc_filesize += sizeof(header); + combine->fc_flags |= FLVF_HEADER; + } + + printf("magic: %s\n", magic); + printf("flags: 0x%02x\n", header.fh_flags); + printf("version: 0x%02x\n", header.fh_version); + printf("header len: %d\n", buftoint(header.fh_hlen, sizeof(header.fh_hlen))); + + while (feof(fp) == 0) { + if ( !fread(&tagheader, sizeof(tagheader), 1, fp) ) + goto fail; + + dlen = buftoint(tagheader.th_dlen, sizeof(tagheader.th_dlen)); + + switch (tagheader.th_type) + { + case 0x09: + adjtimestamp(&tagheader, combine->fc_timestamp); + tagvideo = tagheader; + last = &tagvideo; + break; + case 0x08: + adjtimestamp(&tagheader, combine->fc_timestamp); + tagaudio = tagheader; + last = &tagaudio; + break; + default: + flags = combine->fc_flags; + savepos = ftell(fp); + if (savepos == -1) + goto fail; + savepos = (flags & FLVF_SCRIPT)? (savepos + dlen + 4): savepos; + update_metainfo(combine, fp, dlen); + combine->fc_flags |= FLVF_SCRIPT; + if ( fseek(fp, savepos, SEEK_SET) ) + goto fail; + if (flags & FLVF_SCRIPT) + continue; + break; + } + fwrite(&tagheader, sizeof(tagheader), 1, fout); + combine->fc_filesize += sizeof(tagheader); + combine->fc_filesize += (dlen + 4); + if ( dd_copy(fout, fp, dlen + 4)) { + error = -__LINE__; + break; + } + } + +fail: + fclose(fp); + if (last == &tagvideo || last == &tagaudio) { + combine->fc_timestamp = buftoint(last->th_tstamp, sizeof(last->th_tstamp)); + combine->fc_timestamp |= (last->th_xstamp << 24); + printf("time stamp: %d\n", combine->fc_timestamp); + } + return 0; +} + +void fixedflv(struct flvcombine *context) +{ + int i; + double dblval = 0.0; + uint8_t dblbytes[8]; + FILE *fout = context->fc_file; + + if (context->fc_filesize_offset > 0) { + if ( fseek(fout, context->fc_filesize_offset, SEEK_SET) ) + return; + dblval = context->fc_filesize; + memcpy(dblbytes, &dblval, 8); + + for (i = 0; i < 4; i ++) { + uint8_t tmp = dblbytes[i]; + dblbytes[i] = dblbytes[7 - i]; + dblbytes[7 - i] = tmp; + } + fwrite(dblbytes, 8, 1, fout); + } + + if (context->fc_duration_offset > 0) { + if ( fseek(fout, context->fc_duration_offset, SEEK_SET) ) + return; + dblval = context->fc_duration; + memcpy(dblbytes, &dblval, 8); + + for (i = 0; i < 4; i ++) { + uint8_t tmp = dblbytes[i]; + dblbytes[i] = dblbytes[7 - i]; + dblbytes[7 - i] = tmp; + } + fwrite(dblbytes, 8, 1, fout); + } +} + +int main(int argc, char *argv[]) +{ + int i; + struct flvcombine context; + memset(&context, 0, sizeof(context)); + context.fc_file = fopen("out.flv", "wb"); + if (context.fc_file == NULL) + return -1; + context.fc_duration = 0; + for (i = 1; i < argc; i++) + addflv(&context, argv[i]); + fixedflv(&context); + fclose(context.fc_file); + + printf("seconds: %d\n", context.fc_timestamp); + return 0; +} diff --git a/youtube-dl b/youtube-dl index 09e205cc1..237181209 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index bc26f2f30..da2294a6b 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3078,10 +3078,10 @@ class YoukuIE(InfoExtractor): download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) print download_url info = { - 'id': '%s_part%d' % (video_id, index), + 'id': '%s_part%02d' % (video_id, index), 'url': download_url, 'uploader': None, - 'title': video_title + 'title': video_title, 'ext': ext, 'format': u'NA' } -- cgit v1.2.3 From 51661d86005ffbdd4debd051cd9683cf5f5c2fe9 Mon Sep 17 00:00:00 2001 From: dongmao zhang Date: Thu, 9 Aug 2012 02:04:02 +0800 Subject: add www.youku.com support --- youtube-dl | Bin 40663 -> 42041 bytes youtube_dl/InfoExtractors.py | 132 +++++++++++++++++++++++++++++++++++++++++++ youtube_dl/__init__.py | 1 + 3 files changed, 133 insertions(+) diff --git a/youtube-dl b/youtube-dl index f2268af22..635b1356e 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index ddb4aa16b..869f5202c 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -13,6 +13,8 @@ import urllib import urllib2 import email.utils import xml.etree.ElementTree +import random +import math from urlparse import parse_qs try: @@ -2955,3 +2957,133 @@ class MTVIE(InfoExtractor): } return [info] + + + +class YoukuIE(InfoExtractor): + + _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P[A-Za-z0-9]+)\.html' + IE_NAME = u'Youku' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + def report_download_webpage(self, file_id): + """Report webpage download.""" + self._downloader.to_screen(u'[Youku] %s: Downloading webpage' % file_id) + + def report_extraction(self, file_id): + """Report information extraction.""" + self._downloader.to_screen(u'[Youku] %s: Extracting information' % file_id) + + def _gen_sid(self): + nowTime = int(time.time() * 1000) + random1 = random.randint(1000,1998) + random2 = random.randint(1000,9999) + + return "%d%d%d" %(nowTime,random1,random2) + + def _get_file_ID_mix_string(self, seed): + mixed = [] + source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890") + seed = float(seed) + for i in range(len(source)): + seed = (seed * 211 + 30031 ) % 65536 + index = math.floor(seed / 65536 * len(source) ) + mixed.append(source[int(index)]) + source.remove(source[int(index)]) + #return ''.join(mixed) + return mixed + + + def _get_file_id(self, fileId, seed): + mixed = self._get_file_ID_mix_string(seed) + ids = fileId.split('*') + realId = [] + for ch in ids: + if ch is not '': + realId.append(mixed[int(ch)]) + return ''.join(realId) + + def _gen_key(self, key1, key2): + pass + + + + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + video_id = mobj.group('ID') + + info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id + + request = urllib2.Request(info_url, None, std_headers) + try: + self.report_download_webpage(video_id) + jsondata = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + self.report_extraction(video_id) + try: + config = json.loads(jsondata) + + video_title = config['data'][0]['title'] + seed = config['data'][0]['seed'] + + format = self._downloader.params.get('format', None) + supported_format = config['data'][0]['streamfileids'].keys() + + if format is None or format == 'best': + if 'hd2' in supported_format: + format = 'hd2' + else: + format = 'flv' + ext = u'flv' + elif format == 'worst': + format = 'mp4' + ext = u'mp4' + else: + format = 'flv' + ext = u'flv' + + + fileid = config['data'][0]['streamfileids'][format] + seg_number = len(config['data'][0]['segs'][format]) + + keys=[] + for i in xrange(seg_number): + keys.append(config['data'][0]['segs'][format][i]['k']) + + #TODO check error + #youku only could be viewed from mainland china + except: + self._downloader.trouble(u'ERROR: unable to extract info section') + return + + files_info=[] + sid = self._gen_sid() + fileid = self._get_file_id(fileid, seed) + + #column 8,9 of fileid represent the segment number + #fileid[7:9] should be changed + for index, key in enumerate(keys): + + temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:]) + download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) + print download_url + info = { + 'id': '%s_part%02d' % (video_id, index), + 'url': download_url, + 'uploader': None, + 'title': video_title, + 'ext': ext, + 'format': u'NA' + } + files_info.append(info) + + return files_info diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 86951840d..84b972d51 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -351,6 +351,7 @@ def gen_extractors(): MixcloudIE(), StanfordOpenClassroomIE(), MTVIE(), + YoukuIE(), GenericIE() ] -- cgit v1.2.3 From d5c4c4c10ed9a92122a8808d0df77cf553f3352e Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Sun, 19 Aug 2012 17:44:34 +0200 Subject: bugfix and standarize the youku.com support --- a.c | 254 ------------------------------------------- youtube-dl | Bin 40997 -> 41989 bytes youtube-dl.exe | Bin 3989787 -> 3991827 bytes youtube_dl/InfoExtractors.py | 15 +-- 4 files changed, 2 insertions(+), 267 deletions(-) delete mode 100644 a.c diff --git a/a.c b/a.c deleted file mode 100644 index 50dc1cc91..000000000 --- a/a.c +++ /dev/null @@ -1,254 +0,0 @@ -#include -#include -#include -#include - -#define FLVF_HEADER 1 -#define FLVF_SCRIPT 2 - -struct flvhdr -{ - char fh_magic[3]; - char fh_version; - char fh_flags; - char fh_hlen[4]; - char fh_pads[4]; -}__attribute__((packed)); - -struct taghdr -{ - uint8_t th_type; - uint8_t th_dlen[3]; - uint8_t th_tstamp[3]; - uint8_t th_xstamp; - uint8_t th_streamid[3]; -}__attribute__((packed)); - -struct flvcombine -{ - FILE * fc_file; - uint32_t fc_flags; - uint32_t fc_timestamp; - uint32_t fc_filesize; - double fc_duration; - int fc_filesize_offset; - int fc_duration_offset; -}; - -/* duration, filesize */ - -uint32_t buftoint(const void *buf, size_t len) -{ - uint32_t bufint = 0; - const uint8_t *pval = (const uint8_t *)buf; - while (len-- > 0) - bufint = (bufint << 8) + *pval++; - return bufint; -} - -int dd_copy(FILE * dst_fp, FILE * src_fp, size_t dlen) -{ - size_t len; - char buf[64 * 1024]; - while (dlen > 0 && !feof(src_fp)) { - len = fread(buf, 1, dlen < sizeof(buf)? dlen: sizeof(buf), src_fp); - if (fwrite(buf, 1, len, dst_fp) != len) - break; - dlen -= len; - } - return dlen; -} - -void adjtimestamp(struct taghdr *header, uint32_t stampbase) -{ - uint32_t netval = 0; - uint32_t adjtime = stampbase; - adjtime += buftoint(&header->th_tstamp, sizeof(header->th_tstamp)); - adjtime += (header->th_xstamp << 24); - header->th_xstamp = (adjtime >> 24); - header->th_tstamp[0] = (adjtime >> 16); - header->th_tstamp[1] = (adjtime >> 8); - header->th_tstamp[2] = (adjtime >> 0); -} - -void update_metainfo(struct flvcombine *combine, FILE *fp, size_t dlen) -{ - int i; - size_t len; - char *pmem = NULL; - char buf[256 * 1024]; - double duration = 0.0; - uint8_t duration_bytes[8]; - printf("dlen: %d\n", dlen); - assert (dlen < (256 * 1024)); - - len = fread(buf, 1, dlen < sizeof(buf)? dlen: sizeof(buf), fp); - if (len == 0) - return; - pmem = (char *)memmem(buf, len, "duration", 8); - if (pmem == NULL || pmem + 17l - buf > len) - return; - memcpy(&duration_bytes, pmem + 9, 8); - for (i = 0; i < 4; i ++) { - uint8_t tmp = duration_bytes[i]; - duration_bytes[i] = duration_bytes[7 - i]; - duration_bytes[7 - i] = tmp; - } - memcpy(&duration, &duration_bytes, 8); - combine->fc_duration += duration; - if (combine->fc_flags & FLVF_SCRIPT) - return; - combine->fc_duration_offset = - combine->fc_filesize + (pmem + 9l - buf) + sizeof(struct taghdr); - pmem = (char *)memmem(buf, len, "filesize", 8); - if (pmem == NULL || pmem + 17l - buf > len) - return; - combine->fc_filesize_offset = - combine->fc_filesize + (pmem + 9l - buf) + sizeof(struct taghdr); -} - -int addflv(struct flvcombine *combine, const char *path) -{ - int error = 0; - FILE *fp, *fout; - char magic[4]; - long savepos; - size_t len, dlen, flags; - struct flvhdr header; - struct taghdr *last; - struct taghdr tagvideo; - struct taghdr tagaudio; - struct taghdr tagheader; - - fp = fopen(path, "rb"); - fout = combine->fc_file; - if (fp == NULL || fout == NULL) - return 0; - - last = NULL; - memset(magic, 0, sizeof(magic)); - memset(&tagvideo, 0, sizeof(tagvideo)); - memset(&tagaudio, 0, sizeof(tagaudio)); - - if ( !fread(&header, sizeof(header), 1, fp) ) - goto fail; - - memcpy(magic, header.fh_magic, 3); - if ( strcmp("FLV", magic) ) - goto fail; - - if ((combine->fc_flags & FLVF_HEADER) == 0) { - fwrite(&header, sizeof(header), 1, fout); - combine->fc_filesize += sizeof(header); - combine->fc_flags |= FLVF_HEADER; - } - - printf("magic: %s\n", magic); - printf("flags: 0x%02x\n", header.fh_flags); - printf("version: 0x%02x\n", header.fh_version); - printf("header len: %d\n", buftoint(header.fh_hlen, sizeof(header.fh_hlen))); - - while (feof(fp) == 0) { - if ( !fread(&tagheader, sizeof(tagheader), 1, fp) ) - goto fail; - - dlen = buftoint(tagheader.th_dlen, sizeof(tagheader.th_dlen)); - - switch (tagheader.th_type) - { - case 0x09: - adjtimestamp(&tagheader, combine->fc_timestamp); - tagvideo = tagheader; - last = &tagvideo; - break; - case 0x08: - adjtimestamp(&tagheader, combine->fc_timestamp); - tagaudio = tagheader; - last = &tagaudio; - break; - default: - flags = combine->fc_flags; - savepos = ftell(fp); - if (savepos == -1) - goto fail; - savepos = (flags & FLVF_SCRIPT)? (savepos + dlen + 4): savepos; - update_metainfo(combine, fp, dlen); - combine->fc_flags |= FLVF_SCRIPT; - if ( fseek(fp, savepos, SEEK_SET) ) - goto fail; - if (flags & FLVF_SCRIPT) - continue; - break; - } - fwrite(&tagheader, sizeof(tagheader), 1, fout); - combine->fc_filesize += sizeof(tagheader); - combine->fc_filesize += (dlen + 4); - if ( dd_copy(fout, fp, dlen + 4)) { - error = -__LINE__; - break; - } - } - -fail: - fclose(fp); - if (last == &tagvideo || last == &tagaudio) { - combine->fc_timestamp = buftoint(last->th_tstamp, sizeof(last->th_tstamp)); - combine->fc_timestamp |= (last->th_xstamp << 24); - printf("time stamp: %d\n", combine->fc_timestamp); - } - return 0; -} - -void fixedflv(struct flvcombine *context) -{ - int i; - double dblval = 0.0; - uint8_t dblbytes[8]; - FILE *fout = context->fc_file; - - if (context->fc_filesize_offset > 0) { - if ( fseek(fout, context->fc_filesize_offset, SEEK_SET) ) - return; - dblval = context->fc_filesize; - memcpy(dblbytes, &dblval, 8); - - for (i = 0; i < 4; i ++) { - uint8_t tmp = dblbytes[i]; - dblbytes[i] = dblbytes[7 - i]; - dblbytes[7 - i] = tmp; - } - fwrite(dblbytes, 8, 1, fout); - } - - if (context->fc_duration_offset > 0) { - if ( fseek(fout, context->fc_duration_offset, SEEK_SET) ) - return; - dblval = context->fc_duration; - memcpy(dblbytes, &dblval, 8); - - for (i = 0; i < 4; i ++) { - uint8_t tmp = dblbytes[i]; - dblbytes[i] = dblbytes[7 - i]; - dblbytes[7 - i] = tmp; - } - fwrite(dblbytes, 8, 1, fout); - } -} - -int main(int argc, char *argv[]) -{ - int i; - struct flvcombine context; - memset(&context, 0, sizeof(context)); - context.fc_file = fopen("out.flv", "wb"); - if (context.fc_file == NULL) - return -1; - context.fc_duration = 0; - for (i = 1; i < argc; i++) - addflv(&context, argv[i]); - fixedflv(&context); - fclose(context.fc_file); - - printf("seconds: %d\n", context.fc_timestamp); - return 0; -} diff --git a/youtube-dl b/youtube-dl index 789cb58f5..448b2cd26 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube-dl.exe b/youtube-dl.exe index 2aa052ccf..b7123fc16 100755 Binary files a/youtube-dl.exe and b/youtube-dl.exe differ diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 3b24f593d..d6748981b 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -2995,22 +2995,15 @@ class YoukuIE(InfoExtractor): #return ''.join(mixed) return mixed - def _get_file_id(self, fileId, seed): mixed = self._get_file_ID_mix_string(seed) ids = fileId.split('*') realId = [] for ch in ids: - if ch is not '': + if ch: realId.append(mixed[int(ch)]) return ''.join(realId) - def _gen_key(self, key1, key2): - pass - - - - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -3019,10 +3012,6 @@ class YoukuIE(InfoExtractor): video_id = mobj.group('ID') info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id -<<<<<<< HEAD - print info_url -======= ->>>>>>> 51661d86005ffbdd4debd051cd9683cf5f5c2fe9 request = urllib2.Request(info_url, None, std_headers) try: @@ -3079,7 +3068,7 @@ class YoukuIE(InfoExtractor): temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:]) download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) - print download_url + info = { 'id': '%s_part%02d' % (video_id, index), 'url': download_url, -- cgit v1.2.3 From 795cc5059a6f349f861e246269d6cced39b3a753 Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Sun, 19 Aug 2012 18:46:23 +0200 Subject: Re-engineered XNXXIE to actually exit on ERRORs even with -i --- youtube-dl | Bin 41989 -> 42211 bytes youtube-dl.exe | Bin 3991827 -> 3992216 bytes youtube_dl/InfoExtractors.py | 48 +++++++++++++++++++------------------------ 3 files changed, 21 insertions(+), 27 deletions(-) diff --git a/youtube-dl b/youtube-dl index 448b2cd26..a7c5ee6f9 100755 Binary files a/youtube-dl and b/youtube-dl differ diff --git a/youtube-dl.exe b/youtube-dl.exe index b7123fc16..fbed2841a 100755 Binary files a/youtube-dl.exe and b/youtube-dl.exe differ diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 2b313966f..6c4a1b1f1 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3098,30 +3098,6 @@ class XNXXIE(InfoExtractor): """Report information extraction""" self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) - def extract_video_url(self, webpage): - "Extract the url for the video from the webpage" - - result = re.search(self.VIDEO_URL_RE, webpage) - if result is None: - self._downloader.trouble(u'ERROR: unable to extract video url') - return urllib.unquote(result.group(1).decode('utf-8')) - - def extract_video_title(self, webpage): - "Extract the title for the video from the webpage" - - result = re.search(self.VIDEO_TITLE_RE, webpage) - if result is None: - self._downloader.trouble(u'ERROR: unable to extract video title') - return result.group(1).decode('utf-8') - - def extract_video_thumbnail(self, webpage): - "Extract the thumbnail for the video from the webpage" - - result = re.search(self.VIDEO_THUMB_RE, webpage) - if result is None: - self._downloader.trouble(u'ERROR: unable to extract video thumbnail') - return result.group(1).decode('utf-8') - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -3138,14 +3114,32 @@ class XNXXIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err) return + result = re.search(self.VIDEO_URL_RE, webpage) + if result is None: + self._downloader.trouble(u'ERROR: unable to extract video url') + return + video_url = urllib.unquote(result.group(1).decode('utf-8')) + + result = re.search(self.VIDEO_TITLE_RE, webpage) + if result is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = result.group(1).decode('utf-8') + + result = re.search(self.VIDEO_THUMB_RE, webpage) + if result is None: + self._downloader.trouble(u'ERROR: unable to extract video thumbnail') + return + video_thumbnail = result.group(1).decode('utf-8') + info = {'id': video_id, - 'url': self.extract_video_url(webpage), + 'url': video_url, 'uploader': None, 'upload_date': None, - 'title': self.extract_video_title(webpage), + 'title': video_title, 'ext': 'flv', 'format': 'flv', - 'thumbnail': self.extract_video_thumbnail(webpage), + 'thumbnail': video_thumbnail, 'description': None, 'player_url': None} -- cgit v1.2.3