1 files changed, 206 insertions, 0 deletions
diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py
new file mode 100644
index 00000000..4931bda2
--- /dev/null
+++ b/mediagoblin/gmg_commands/batchaddmedia.py
@@ -0,0 +1,206 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import requests, codecs
+import csv
+from urlparse import urlparse
+
+from mediagoblin.gmg_commands import util as commands_util
+from mediagoblin.submit.lib import (
+    submit_media, get_upload_file_limits,
+    FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
+from mediagoblin.tools.metadata import compact_and_validate
+from mediagoblin.tools.translate import pass_to_ugettext as _
+from jsonschema.exceptions import ValidationError
+
+
+def parser_setup(subparser):
+    subparser.description = """\
+This command allows the administrator to upload many media files at once."""
+    subparser.epilog = _(u"""For more information about how to properly run this
+script (and how to format the metadata csv file), read the MediaGoblin
+documentation page on command line uploading
+<http://docs.mediagoblin.org/siteadmin/commandline-upload.html>""")
+    subparser.add_argument(
+        'username',
+        help=_(u"Name of user these media entries belong to"))
+    subparser.add_argument(
+        'metadata_path',
+        help=_(
+u"""Path to the csv file containing metadata information."""))
+    subparser.add_argument(
+        '--celery',
+        action='store_true',
+        help=_(u"Don't process eagerly, pass off to celery"))
+
+
+def batchaddmedia(args):
+    # Run eagerly unless explicetly set not to
+    if not args.celery:
+        os.environ['CELERY_ALWAYS_EAGER'] = 'true'
+
+    app = commands_util.setup_app(args)
+
+    files_uploaded, files_attempted = 0, 0
+
+    # get the user
+    user = app.db.User.query.filter_by(username=args.username.lower()).first()
+    if user is None:
+        print _(u"Sorry, no user by username '{username}' exists".format(
+                    username=args.username))
+        return
+
+    upload_limit, max_file_size = get_upload_file_limits(user)
+    temp_files = []
+
+    if os.path.isfile(args.metadata_path):
+        metadata_path = args.metadata_path
+
+    else:
+        error = _(u'File at {path} not found, use -h flag for help'.format(
+                    path=args.metadata_path))
+        print error
+        return
+
+    abs_metadata_filename = os.path.abspath(metadata_path)
+    abs_metadata_dir = os.path.dirname(abs_metadata_filename)
+    upload_limit, max_file_size = get_upload_file_limits(user)
+
+    def maybe_unicodeify(some_string):
+        # this is kinda terrible
+        if some_string is None:
+            return None
+        else:
+            return unicode(some_string)
+
+    with codecs.open(
+            abs_metadata_filename, 'r', encoding='utf-8') as all_metadata:
+        contents = all_metadata.read()
+        media_metadata = parse_csv_file(contents)
+
+    for media_id, file_metadata in media_metadata.iteritems():
+        files_attempted += 1
+        # In case the metadata was not uploaded initialize an empty dictionary.
+        json_ld_metadata = compact_and_validate({})
+
+        # Get all metadata entries starting with 'media' as variables and then
+        # delete them because those are for internal use only.
+        original_location = file_metadata['location']
+
+        ### Pull the important media information for mediagoblin from the
+        ### metadata, if it is provided.
+        title = file_metadata.get('title') or file_metadata.get('dc:title')
+        description = (file_metadata.get('description') or
+            file_metadata.get('dc:description'))
+
+        license = file_metadata.get('license')
+        try:
+            json_ld_metadata = compact_and_validate(file_metadata)
+        except ValidationError, exc:
+            error = _(u"""Error with media '{media_id}' value '{error_path}': {error_msg}
+Metadata was not uploaded.""".format(
+                media_id=media_id,
+                error_path=exc.path[0],
+                error_msg=exc.message))
+            print error
+            continue
+
+        url = urlparse(original_location)
+        filename = url.path.split()[-1]
+
+        if url.scheme == 'http':
+            res = requests.get(url.geturl(), stream=True)
+            media_file = res.raw
+
+        elif url.scheme == '':
+            path = url.path
+            if os.path.isabs(path):
+                file_abs_path = os.path.abspath(path)
+            else:
+                file_path = os.path.join(abs_metadata_dir, path)
+                file_abs_path = os.path.abspath(file_path)
+            try:
+                media_file = file(file_abs_path, 'r')
+            except IOError:
+                print _(u"""\
+FAIL: Local file {filename} could not be accessed.
+{filename} will not be uploaded.""".format(filename=filename))
+                continue
+        try:
+            submit_media(
+                mg_app=app,
+                user=user,
+                submitted_file=media_file,
+                filename=filename,
+                title=maybe_unicodeify(title),
+                description=maybe_unicodeify(description),
+                license=maybe_unicodeify(license),
+                metadata=json_ld_metadata,
+                tags_string=u"",
+                upload_limit=upload_limit, max_file_size=max_file_size)
+            print _(u"""Successfully submitted {filename}!
+Be sure to look at the Media Processing Panel on your website to be sure it
+uploaded successfully.""".format(filename=filename))
+            files_uploaded += 1
+        except FileUploadLimit:
+            print _(
+u"FAIL: This file is larger than the upload limits for this site.")
+        except UserUploadLimit:
+            print _(
+"FAIL: This file will put this user past their upload limits.")
+        except UserPastUploadLimit:
+            print _("FAIL: This user is already past their upload limits.")
+    print _(
+"{files_uploaded} out of {files_attempted} files successfully submitted".format(
+        files_uploaded=files_uploaded,
+        files_attempted=files_attempted))
+
+
+def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
+    # csv.py doesn't do Unicode; encode temporarily as UTF-8:
+    csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
+                            dialect=dialect, **kwargs)
+    for row in csv_reader:
+        # decode UTF-8 back to Unicode, cell by cell:
+        yield [unicode(cell, 'utf-8') for cell in row]
+
+def utf_8_encoder(unicode_csv_data):
+    for line in unicode_csv_data:
+        yield line.encode('utf-8')
+
+def parse_csv_file(file_contents):
+    """
+    The helper function which converts the csv file into a dictionary where each
+    item's key is the provided value 'id' and each item's value is another
+    dictionary.
+    """
+    list_of_contents = file_contents.split('\n')
+    key, lines = (list_of_contents[0].split(','),
+                  list_of_contents[1:])
+    objects_dict = {}
+
+    # Build a dictionary
+    for index, line in enumerate(lines):
+        if line.isspace() or line == u'': continue
+        values = unicode_csv_reader([line]).next()
+        line_dict = dict([(key[i], val)
+            for i, val in enumerate(values)])
+        media_id = line_dict.get('id') or index
+        objects_dict[media_id] = (line_dict)
+
+    return objects_dict
+