aboutsummaryrefslogtreecommitdiffstats
path: root/mediagoblin/gmg_commands/batchaddmedia.py
diff options
context:
space:
mode:
Diffstat (limited to 'mediagoblin/gmg_commands/batchaddmedia.py')
-rw-r--r--mediagoblin/gmg_commands/batchaddmedia.py136
1 files changed, 60 insertions, 76 deletions
diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py
index 55ed865b..88fa3e5a 100644
--- a/mediagoblin/gmg_commands/batchaddmedia.py
+++ b/mediagoblin/gmg_commands/batchaddmedia.py
@@ -14,19 +14,18 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-from __future__ import print_function
+from __future__ import print_function, unicode_literals
-import codecs
import csv
import os
-import sys
+import shutil
+import tempfile
import requests
import six
-
from six.moves.urllib.parse import urlparse
-from mediagoblin.db.models import LocalUser
+from mediagoblin.db.models import LocalUser, MediaEntry
from mediagoblin.gmg_commands import util as commands_util
from mediagoblin.submit.lib import (
submit_media, FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
@@ -38,21 +37,21 @@ from jsonschema.exceptions import ValidationError
def parser_setup(subparser):
subparser.description = """\
This command allows the administrator to upload many media files at once."""
- subparser.epilog = _(u"""For more information about how to properly run this
+ subparser.epilog = _("""For more information about how to properly run this
script (and how to format the metadata csv file), read the MediaGoblin
documentation page on command line uploading
<http://docs.mediagoblin.org/siteadmin/commandline-upload.html>""")
subparser.add_argument(
'username',
- help=_(u"Name of user these media entries belong to"))
+ help=_("Name of user these media entries belong to"))
subparser.add_argument(
'metadata_path',
help=_(
-u"""Path to the csv file containing metadata information."""))
+"""Path to the csv file containing metadata information."""))
subparser.add_argument(
'--celery',
action='store_true',
- help=_(u"Don't process eagerly, pass off to celery"))
+ help=_("Don't process eagerly, pass off to celery"))
def batchaddmedia(args):
@@ -69,7 +68,7 @@ def batchaddmedia(args):
LocalUser.username==args.username.lower()
).first()
if user is None:
- print(_(u"Sorry, no user by username '{username}' exists".format(
+ print(_("Sorry, no user by username '{username}' exists".format(
username=args.username)))
return
@@ -77,7 +76,7 @@ def batchaddmedia(args):
metadata_path = args.metadata_path
else:
- error = _(u'File at {path} not found, use -h flag for help'.format(
+ error = _('File at {path} not found, use -h flag for help'.format(
path=args.metadata_path))
print(error)
return
@@ -85,19 +84,12 @@ def batchaddmedia(args):
abs_metadata_filename = os.path.abspath(metadata_path)
abs_metadata_dir = os.path.dirname(abs_metadata_filename)
- def maybe_unicodeify(some_string):
- # this is kinda terrible
- if some_string is None:
- return None
- else:
- return six.text_type(some_string)
-
- with codecs.open(
- abs_metadata_filename, 'r', encoding='utf-8') as all_metadata:
- contents = all_metadata.read()
- media_metadata = parse_csv_file(contents)
+ all_metadata = open(abs_metadata_filename, 'r')
+ media_metadata = csv.DictReader(all_metadata)
+ for index, file_metadata in enumerate(media_metadata):
+ if six.PY2:
+ file_metadata = {k.decode('utf-8'): v.decode('utf-8') for k, v in file_metadata.items()}
- for media_id, file_metadata in media_metadata.items():
files_attempted += 1
# In case the metadata was not uploaded initialize an empty dictionary.
json_ld_metadata = compact_and_validate({})
@@ -108,6 +100,7 @@ def batchaddmedia(args):
### Pull the important media information for mediagoblin from the
### metadata, if it is provided.
+ slug = file_metadata.get('slug')
title = file_metadata.get('title') or file_metadata.get('dc:title')
description = (file_metadata.get('description') or
file_metadata.get('dc:description'))
@@ -117,7 +110,8 @@ def batchaddmedia(args):
try:
json_ld_metadata = compact_and_validate(file_metadata)
except ValidationError as exc:
- error = _(u"""Error with media '{media_id}' value '{error_path}': {error_msg}
+ media_id = file_metadata.get('id') or index
+ error = _("""Error with media '{media_id}' value '{error_path}': {error_msg}
Metadata was not uploaded.""".format(
media_id=media_id,
error_path=exc.path[0],
@@ -125,12 +119,36 @@ Metadata was not uploaded.""".format(
print(error)
continue
+ if slug and MediaEntry.query.filter_by(actor=user.id, slug=slug).count():
+ # Avoid re-importing media from a previous batch run. Note that this
+ # check isn't quite robust enough, since it requires that a slug is
+ # specified. Probably needs to be based on "location" since this is
+ # the only required field.
+ error = '{}: {}'.format(
+ slug, _('An entry with that slug already exists for this user.'))
+ print(error)
+ continue
+
url = urlparse(original_location)
filename = url.path.split()[-1]
- if url.scheme == 'http':
+ if url.scheme.startswith('http'):
res = requests.get(url.geturl(), stream=True)
- media_file = res.raw
+ if res.headers.get('content-encoding'):
+ # The requests library's "raw" method does not deal with content
+ # encoding. Alternative could be to use iter_content(), and
+ # write chunks to the temporary file.
+ raise NotImplementedError('URL-based media with content-encoding (eg. gzip) are not currently supported.')
+
+ # To avoid loading the media into memory all at once, we write it to
+ # a file before importing. This currently requires free space up to
+ # twice the size of the media file. Memory use can be tested by
+ # running something like `ulimit -Sv 200000` before running
+ # `batchaddmedia` to upload a file larger than 200MB.
+ media_file = tempfile.TemporaryFile()
+ shutil.copyfileobj(res.raw, media_file)
+ if six.PY2:
+ media_file.seek(0)
elif url.scheme == '':
path = url.path
@@ -142,76 +160,42 @@ Metadata was not uploaded.""".format(
try:
media_file = open(file_abs_path, 'rb')
except IOError:
- print(_(u"""\
+ print(_("""\
FAIL: Local file {filename} could not be accessed.
{filename} will not be uploaded.""".format(filename=filename)))
continue
try:
- submit_media(
+ entry = submit_media(
mg_app=app,
user=user,
submitted_file=media_file,
filename=filename,
- title=maybe_unicodeify(title),
- description=maybe_unicodeify(description),
- collection_slug=maybe_unicodeify(collection_slug),
- license=maybe_unicodeify(license),
+ title=title,
+ description=description,
+ collection_slug=collection_slug,
+ license=license,
metadata=json_ld_metadata,
- tags_string=u"")
- print(_(u"""Successfully submitted {filename}!
+ tags_string="")
+ if slug:
+ # Slug is automatically set by submit_media, so overwrite it
+ # with the desired slug.
+ entry.slug = slug
+ entry.save()
+ print(_("""Successfully submitted {filename}!
Be sure to look at the Media Processing Panel on your website to be sure it
uploaded successfully.""".format(filename=filename)))
files_uploaded += 1
except FileUploadLimit:
print(_(
-u"FAIL: This file is larger than the upload limits for this site."))
+"FAIL: This file is larger than the upload limits for this site."))
except UserUploadLimit:
print(_(
"FAIL: This file will put this user past their upload limits."))
except UserPastUploadLimit:
print(_("FAIL: This user is already past their upload limits."))
+ finally:
+ media_file.close()
print(_(
"{files_uploaded} out of {files_attempted} files successfully submitted".format(
files_uploaded=files_uploaded,
files_attempted=files_attempted)))
-
-
-def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
- # csv.py doesn't do Unicode; encode temporarily as UTF-8:
- # TODO: this probably won't be necessary in Python 3
- csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
- dialect=dialect, **kwargs)
- for row in csv_reader:
- # decode UTF-8 back to Unicode, cell by cell:
- yield [six.text_type(cell, 'utf-8') for cell in row]
-
-def utf_8_encoder(unicode_csv_data):
- for line in unicode_csv_data:
- yield line.encode('utf-8')
-
-def parse_csv_file(file_contents):
- """
- The helper function which converts the csv file into a dictionary where each
- item's key is the provided value 'id' and each item's value is another
- dictionary.
- """
- list_of_contents = file_contents.split('\n')
- key, lines = (list_of_contents[0].split(','),
- list_of_contents[1:])
- objects_dict = {}
-
- # Build a dictionary
- for index, line in enumerate(lines):
- if line.isspace() or line == u'': continue
- if (sys.version_info[0] == 3):
- # Python 3's csv.py supports Unicode out of the box.
- reader = csv.reader([line])
- else:
- reader = unicode_csv_reader([line])
- values = next(reader)
- line_dict = dict([(key[i], val)
- for i, val in enumerate(values)])
- media_id = line_dict.get('id') or index
- objects_dict[media_id] = (line_dict)
-
- return objects_dict