diff options
Diffstat (limited to 'mediagoblin')
5 files changed, 74 insertions, 81 deletions
diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index 55ed865b..88fa3e5a 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -14,19 +14,18 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -from __future__ import print_function +from __future__ import print_function, unicode_literals -import codecs import csv import os -import sys +import shutil +import tempfile import requests import six - from six.moves.urllib.parse import urlparse -from mediagoblin.db.models import LocalUser +from mediagoblin.db.models import LocalUser, MediaEntry from mediagoblin.gmg_commands import util as commands_util from mediagoblin.submit.lib import ( submit_media, FileUploadLimit, UserUploadLimit, UserPastUploadLimit) @@ -38,21 +37,21 @@ from jsonschema.exceptions import ValidationError def parser_setup(subparser): subparser.description = """\ This command allows the administrator to upload many media files at once.""" - subparser.epilog = _(u"""For more information about how to properly run this + subparser.epilog = _("""For more information about how to properly run this script (and how to format the metadata csv file), read the MediaGoblin documentation page on command line uploading <http://docs.mediagoblin.org/siteadmin/commandline-upload.html>""") subparser.add_argument( 'username', - help=_(u"Name of user these media entries belong to")) + help=_("Name of user these media entries belong to")) subparser.add_argument( 'metadata_path', help=_( -u"""Path to the csv file containing metadata information.""")) +"""Path to the csv file containing metadata information.""")) subparser.add_argument( '--celery', action='store_true', - help=_(u"Don't process eagerly, pass off to celery")) + help=_("Don't process eagerly, pass off to celery")) def batchaddmedia(args): @@ -69,7 +68,7 @@ def batchaddmedia(args): LocalUser.username==args.username.lower() ).first() if user is None: - print(_(u"Sorry, no user by username '{username}' exists".format( + print(_("Sorry, no user by username '{username}' exists".format( username=args.username))) return @@ -77,7 +76,7 @@ def batchaddmedia(args): metadata_path = args.metadata_path else: - error = _(u'File at {path} not found, use -h flag for help'.format( + error = _('File at {path} not found, use -h flag for help'.format( path=args.metadata_path)) print(error) return @@ -85,19 +84,12 @@ def batchaddmedia(args): abs_metadata_filename = os.path.abspath(metadata_path) abs_metadata_dir = os.path.dirname(abs_metadata_filename) - def maybe_unicodeify(some_string): - # this is kinda terrible - if some_string is None: - return None - else: - return six.text_type(some_string) - - with codecs.open( - abs_metadata_filename, 'r', encoding='utf-8') as all_metadata: - contents = all_metadata.read() - media_metadata = parse_csv_file(contents) + all_metadata = open(abs_metadata_filename, 'r') + media_metadata = csv.DictReader(all_metadata) + for index, file_metadata in enumerate(media_metadata): + if six.PY2: + file_metadata = {k.decode('utf-8'): v.decode('utf-8') for k, v in file_metadata.items()} - for media_id, file_metadata in media_metadata.items(): files_attempted += 1 # In case the metadata was not uploaded initialize an empty dictionary. json_ld_metadata = compact_and_validate({}) @@ -108,6 +100,7 @@ def batchaddmedia(args): ### Pull the important media information for mediagoblin from the ### metadata, if it is provided. + slug = file_metadata.get('slug') title = file_metadata.get('title') or file_metadata.get('dc:title') description = (file_metadata.get('description') or file_metadata.get('dc:description')) @@ -117,7 +110,8 @@ def batchaddmedia(args): try: json_ld_metadata = compact_and_validate(file_metadata) except ValidationError as exc: - error = _(u"""Error with media '{media_id}' value '{error_path}': {error_msg} + media_id = file_metadata.get('id') or index + error = _("""Error with media '{media_id}' value '{error_path}': {error_msg} Metadata was not uploaded.""".format( media_id=media_id, error_path=exc.path[0], @@ -125,12 +119,36 @@ Metadata was not uploaded.""".format( print(error) continue + if slug and MediaEntry.query.filter_by(actor=user.id, slug=slug).count(): + # Avoid re-importing media from a previous batch run. Note that this + # check isn't quite robust enough, since it requires that a slug is + # specified. Probably needs to be based on "location" since this is + # the only required field. + error = '{}: {}'.format( + slug, _('An entry with that slug already exists for this user.')) + print(error) + continue + url = urlparse(original_location) filename = url.path.split()[-1] - if url.scheme == 'http': + if url.scheme.startswith('http'): res = requests.get(url.geturl(), stream=True) - media_file = res.raw + if res.headers.get('content-encoding'): + # The requests library's "raw" method does not deal with content + # encoding. Alternative could be to use iter_content(), and + # write chunks to the temporary file. + raise NotImplementedError('URL-based media with content-encoding (eg. gzip) are not currently supported.') + + # To avoid loading the media into memory all at once, we write it to + # a file before importing. This currently requires free space up to + # twice the size of the media file. Memory use can be tested by + # running something like `ulimit -Sv 200000` before running + # `batchaddmedia` to upload a file larger than 200MB. + media_file = tempfile.TemporaryFile() + shutil.copyfileobj(res.raw, media_file) + if six.PY2: + media_file.seek(0) elif url.scheme == '': path = url.path @@ -142,76 +160,42 @@ Metadata was not uploaded.""".format( try: media_file = open(file_abs_path, 'rb') except IOError: - print(_(u"""\ + print(_("""\ FAIL: Local file {filename} could not be accessed. {filename} will not be uploaded.""".format(filename=filename))) continue try: - submit_media( + entry = submit_media( mg_app=app, user=user, submitted_file=media_file, filename=filename, - title=maybe_unicodeify(title), - description=maybe_unicodeify(description), - collection_slug=maybe_unicodeify(collection_slug), - license=maybe_unicodeify(license), + title=title, + description=description, + collection_slug=collection_slug, + license=license, metadata=json_ld_metadata, - tags_string=u"") - print(_(u"""Successfully submitted {filename}! + tags_string="") + if slug: + # Slug is automatically set by submit_media, so overwrite it + # with the desired slug. + entry.slug = slug + entry.save() + print(_("""Successfully submitted {filename}! Be sure to look at the Media Processing Panel on your website to be sure it uploaded successfully.""".format(filename=filename))) files_uploaded += 1 except FileUploadLimit: print(_( -u"FAIL: This file is larger than the upload limits for this site.")) +"FAIL: This file is larger than the upload limits for this site.")) except UserUploadLimit: print(_( "FAIL: This file will put this user past their upload limits.")) except UserPastUploadLimit: print(_("FAIL: This user is already past their upload limits.")) + finally: + media_file.close() print(_( "{files_uploaded} out of {files_attempted} files successfully submitted".format( files_uploaded=files_uploaded, files_attempted=files_attempted))) - - -def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs): - # csv.py doesn't do Unicode; encode temporarily as UTF-8: - # TODO: this probably won't be necessary in Python 3 - csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), - dialect=dialect, **kwargs) - for row in csv_reader: - # decode UTF-8 back to Unicode, cell by cell: - yield [six.text_type(cell, 'utf-8') for cell in row] - -def utf_8_encoder(unicode_csv_data): - for line in unicode_csv_data: - yield line.encode('utf-8') - -def parse_csv_file(file_contents): - """ - The helper function which converts the csv file into a dictionary where each - item's key is the provided value 'id' and each item's value is another - dictionary. - """ - list_of_contents = file_contents.split('\n') - key, lines = (list_of_contents[0].split(','), - list_of_contents[1:]) - objects_dict = {} - - # Build a dictionary - for index, line in enumerate(lines): - if line.isspace() or line == u'': continue - if (sys.version_info[0] == 3): - # Python 3's csv.py supports Unicode out of the box. - reader = csv.reader([line]) - else: - reader = unicode_csv_reader([line]) - values = next(reader) - line_dict = dict([(key[i], val) - for i, val in enumerate(values)]) - media_id = line_dict.get('id') or index - objects_dict[media_id] = (line_dict) - - return objects_dict diff --git a/mediagoblin/init/config.py b/mediagoblin/init/config.py index fe469156..2e22083a 100644 --- a/mediagoblin/init/config.py +++ b/mediagoblin/init/config.py @@ -84,6 +84,15 @@ def read_mediagoblin_config(config_path, config_spec_path=CONFIG_SPEC_PATH): config_spec_path, encoding="UTF8", list_values=False, _inspec=True) + # HACK to get MediaGoblin running under Docker/Python 3. Without this line, + # `./bin/gmg dbupdate` fails as the configuration under 'DEFAULT' in + # config_spec still had %(here)s markers in it, when these should have been + # replaced with actual paths, resulting in + # "configobj.MissingInterpolationOption: missing option "here" in + # interpolation". This issue doesn't seem to appear when running on Guix, + # but adding this line also doesn't appear to cause problems on Guix. + _setup_defaults(config_spec, config_path) + # Set up extra defaults that will be pushed into the rest of the # configs. This is a combined extrapolation of defaults based on mainconfig_defaults = copy.copy(config_spec.get("DEFAULT", {})) diff --git a/mediagoblin/plugins/metadata_display/static/css/metadata_display.css b/mediagoblin/plugins/metadata_display/static/css/metadata_display.css index e4612b02..dd787e94 100644 --- a/mediagoblin/plugins/metadata_display/static/css/metadata_display.css +++ b/mediagoblin/plugins/metadata_display/static/css/metadata_display.css @@ -1,6 +1,6 @@ table.metadata_info { font-size:85%; - margin-left:10px; + margin: 8px 0 16px 8px; } table.metadata_info th { @@ -8,7 +8,7 @@ table.metadata_info th { border-spacing: 10px; text-align: left; } + table.metadata_info td { padding: 4px 8px; } - diff --git a/mediagoblin/plugins/metadata_display/templates/mediagoblin/plugins/metadata_display/metadata_table.html b/mediagoblin/plugins/metadata_display/templates/mediagoblin/plugins/metadata_display/metadata_table.html index 15ea1536..6fc46212 100644 --- a/mediagoblin/plugins/metadata_display/templates/mediagoblin/plugins/metadata_display/metadata_table.html +++ b/mediagoblin/plugins/metadata_display/templates/mediagoblin/plugins/metadata_display/metadata_table.html @@ -23,7 +23,7 @@ {#- NOTE: In some smart future where the context is more extensible, we will need to add to the prefix here-#} <table class="metadata_info"> - {%- for key, value in metadata.iteritems() if not key=='@context' %} + {%- for key, value in metadata.items() if key != '@context' %} {% if value -%} <tr> <th>{{ rdfa_to_readable(key) }}</th> diff --git a/mediagoblin/static/css/base.css b/mediagoblin/static/css/base.css index 6da19f94..11558fe5 100644 --- a/mediagoblin/static/css/base.css +++ b/mediagoblin/static/css/base.css @@ -142,7 +142,7 @@ header { .header_right { width: 47%; - margin: 8px 8px 4px 0; + margin: 8px 8px 8px 0; display: inline-block; float: right; text-align: right; @@ -195,7 +195,7 @@ a.logo { .logo img { vertical-align: middle; - margin: 6px 8px 6px 0; + margin: 8px 8px 6px 0; } .welcomeimage { |