From c1c9a79c49e8656f3244744e6f4e336e47a03206 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Oct 2013 04:27:09 +0200 Subject: Add basic --download-archive option Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time. When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped. --- youtube_dl/YoutubeDL.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'youtube_dl/YoutubeDL.py') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2503fd09b..1f5f75e30 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -3,6 +3,7 @@ from __future__ import absolute_import +import errno import io import os import re @@ -84,6 +85,9 @@ class YoutubeDL(object): cachedir: Location of the cache files in the filesystem. None to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. + downloadarchive: File name of a file where all downloads are recorded. + Videos already present in the file are not downloaded + again. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -309,6 +313,9 @@ class YoutubeDL(object): dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + if self.in_download_archive(info_dict): + return (u'%(title)s) has already been recorded in archive' + % info_dict) return None def extract_info(self, url, download=True, ie_key=None, extra_info={}): @@ -578,6 +585,8 @@ class YoutubeDL(object): self.report_error(u'postprocessing: %s' % str(err)) return + self.record_download_archive(info_dict) + def download(self, url_list): """Download a given list of URLs.""" if len(url_list) > 1 and self.fixed_template(): @@ -617,3 +626,26 @@ class YoutubeDL(object): os.remove(encodeFilename(filename)) except (IOError, OSError): self.report_warning(u'Unable to remove downloaded video file') + + def in_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return False + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + try: + with locked_file(fn, 'r', encoding='utf-8') as archive_file: + for line in archive_file: + if line.strip() == vid_id: + return True + except IOError as ioe: + if ioe.errno != errno.ENOENT: + raise + return False + + def record_download_archive(self, info_dict): + fn = self.params.get('download_archive') + if fn is None: + return + vid_id = info_dict['extractor'] + u' ' + info_dict['id'] + with locked_file(fn, 'a', encoding='utf-8') as archive_file: + archive_file.write(vid_id + u'\n') -- cgit v1.2.3 From ee6c9f95e1e5cf118b0bdf6abc8376bd95bc7dcf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Oct 2013 16:28:36 +0200 Subject: Remove superfluous parenthesis --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/YoutubeDL.py') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1f5f75e30..856e9ac92 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -314,7 +314,7 @@ class YoutubeDL(object): if date not in dateRange: return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) if self.in_download_archive(info_dict): - return (u'%(title)s) has already been recorded in archive' + return (u'%(title)s has already been recorded in archive' % info_dict) return None -- cgit v1.2.3