aboutsummaryrefslogtreecommitdiffstats
path: root/mediagoblin/gmg_commands
diff options
context:
space:
mode:
authorBen Sturmfels <ben@sturm.com.au>2019-09-12 19:39:54 +1000
committerBen Sturmfels <ben@sturm.com.au>2019-09-12 19:39:54 +1000
commit8f18381bbc7d12e9a988d5ce332491ba487fc73d (patch)
tree858210a28a659c06f8a8e4c919543abaee3789e8 /mediagoblin/gmg_commands
parenteb365433641c0cfc4abb63e8e9952e3d8cc9df81 (diff)
downloadmediagoblin-8f18381bbc7d12e9a988d5ce332491ba487fc73d.tar.lz
mediagoblin-8f18381bbc7d12e9a988d5ce332491ba487fc73d.tar.xz
mediagoblin-8f18381bbc7d12e9a988d5ce332491ba487fc73d.zip
batchaddmedia: Simplify the Python 2 & 3 unicode handling.
To avoid issues with quotes, I've replaced the manual CSV handling with csv.DictReader and simplified the unicode handling down to a single line. I don't believe any special encoding is required when writing to the database. This has been tested by importing a CSV with braille characters on Python 3 and Python 2.
Diffstat (limited to 'mediagoblin/gmg_commands')
-rw-r--r--mediagoblin/gmg_commands/batchaddmedia.py71
1 files changed, 12 insertions, 59 deletions
diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py
index 60c22729..dc80f88b 100644
--- a/mediagoblin/gmg_commands/batchaddmedia.py
+++ b/mediagoblin/gmg_commands/batchaddmedia.py
@@ -16,16 +16,13 @@
from __future__ import print_function, unicode_literals
-import codecs
import csv
import os
import shutil
-import sys
import tempfile
import requests
import six
-
from six.moves.urllib.parse import urlparse
from mediagoblin.db.models import LocalUser
@@ -87,19 +84,13 @@ def batchaddmedia(args):
abs_metadata_filename = os.path.abspath(metadata_path)
abs_metadata_dir = os.path.dirname(abs_metadata_filename)
- def maybe_unicodeify(some_string):
- # this is kinda terrible
- if some_string is None:
- return None
- else:
- return six.text_type(some_string)
+ all_metadata = open(abs_metadata_filename, 'r')
+ media_metadata = csv.DictReader(all_metadata)
- with codecs.open(
- abs_metadata_filename, 'r', encoding='utf-8') as all_metadata:
- contents = all_metadata.read()
- media_metadata = parse_csv_file(contents)
+ for index, file_metadata in enumerate(media_metadata):
+ if six.PY2:
+ file_metadata = {k.decode('utf-8'): v.decode('utf-8') for k, v in file_metadata.items()}
- for media_id, file_metadata in media_metadata.items():
files_attempted += 1
# In case the metadata was not uploaded initialize an empty dictionary.
json_ld_metadata = compact_and_validate({})
@@ -119,6 +110,7 @@ def batchaddmedia(args):
try:
json_ld_metadata = compact_and_validate(file_metadata)
except ValidationError as exc:
+ media_id = file_metadata.get('id') or index
error = _("""Error with media '{media_id}' value '{error_path}': {error_msg}
Metadata was not uploaded.""".format(
media_id=media_id,
@@ -145,6 +137,8 @@ Metadata was not uploaded.""".format(
# `batchaddmedia` to upload a file larger than 200MB.
media_file = tempfile.TemporaryFile()
shutil.copyfileobj(res.raw, media_file)
+ if six.PY2:
+ media_file.seek(0)
elif url.scheme == '':
path = url.path
@@ -166,10 +160,10 @@ FAIL: Local file {filename} could not be accessed.
user=user,
submitted_file=media_file,
filename=filename,
- title=maybe_unicodeify(title),
- description=maybe_unicodeify(description),
- collection_slug=maybe_unicodeify(collection_slug),
- license=maybe_unicodeify(license),
+ title=title,
+ description=description,
+ collection_slug=collection_slug,
+ license=license,
metadata=json_ld_metadata,
tags_string="")
print(_("""Successfully submitted {filename}!
@@ -190,44 +184,3 @@ uploaded successfully.""".format(filename=filename)))
"{files_uploaded} out of {files_attempted} files successfully submitted".format(
files_uploaded=files_uploaded,
files_attempted=files_attempted)))
-
-
-def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
- # csv.py doesn't do Unicode; encode temporarily as UTF-8:
- # TODO: this probably won't be necessary in Python 3
- csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
- dialect=dialect, **kwargs)
- for row in csv_reader:
- # decode UTF-8 back to Unicode, cell by cell:
- yield [six.text_type(cell, 'utf-8') for cell in row]
-
-def utf_8_encoder(unicode_csv_data):
- for line in unicode_csv_data:
- yield line.encode('utf-8')
-
-def parse_csv_file(file_contents):
- """
- The helper function which converts the csv file into a dictionary where each
- item's key is the provided value 'id' and each item's value is another
- dictionary.
- """
- list_of_contents = file_contents.split('\n')
- key, lines = (list_of_contents[0].split(','),
- list_of_contents[1:])
- objects_dict = {}
-
- # Build a dictionary
- for index, line in enumerate(lines):
- if line.isspace() or line == '': continue
- if (sys.version_info[0] == 3):
- # Python 3's csv.py supports Unicode out of the box.
- reader = csv.reader([line])
- else:
- reader = unicode_csv_reader([line])
- values = next(reader)
- line_dict = dict([(key[i], val)
- for i, val in enumerate(values)])
- media_id = line_dict.get('id') or index
- objects_dict[media_id] = (line_dict)
-
- return objects_dict