aboutsummaryrefslogtreecommitdiffstats
path: root/mediagoblin/gmg_commands/batchaddmedia.py
diff options
context:
space:
mode:
Diffstat (limited to 'mediagoblin/gmg_commands/batchaddmedia.py')
-rw-r--r--mediagoblin/gmg_commands/batchaddmedia.py108
1 files changed, 13 insertions, 95 deletions
diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py
index 43c24f6d..e540e88c 100644
--- a/mediagoblin/gmg_commands/batchaddmedia.py
+++ b/mediagoblin/gmg_commands/batchaddmedia.py
@@ -24,12 +24,11 @@ from mediagoblin.gmg_commands import util as commands_util
from mediagoblin.submit.lib import (
submit_media, get_upload_file_limits,
FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
-from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
+from mediagoblin.tools.metadata import compact_and_validate
-from mediagoblin import mg_globals
-from jsonschema import validate
from jsonschema.exceptions import ValidationError
+
def parser_setup(subparser):
subparser.description = """\
This command allows the administrator to upload many media files at once."""
@@ -49,11 +48,6 @@ Core properties (http://dublincore.org/documents/dces/). Both "location.csv" and
have provided an example of these files at <url to be added>
"""))
subparser.add_argument(
- "-l", "--license",
- help=(
- "License these media entry will be released under, if all the same. "
- "Should be a URL."))
- subparser.add_argument(
'--celery',
action='store_true',
help="Don't process eagerly, pass off to celery")
@@ -102,14 +96,12 @@ zip files and directories"
metadata_file_path = os.path.join(dir_path, "metadata.csv")
# check for the location file, if it exists...
- location_filename = os.path.split(location_file_path)[-1]
abs_location_filename = os.path.abspath(location_file_path)
if not os.path.exists(abs_location_filename):
print "Can't find a file with filename '%s'" % location_file_path
return
# check for the metadata file, if it exists...
- metadata_filename = os.path.split(metadata_file_path)[-1]
abs_metadata_filename = os.path.abspath(metadata_file_path)
if not os.path.exists(abs_metadata_filename):
print "Can't find a file with filename '%s'" % metadata_file_path
@@ -132,24 +124,24 @@ zip files and directories"
contents = all_metadata.read()
media_metadata = parse_csv_file(contents)
- metadata_context = { 'dcterms':'http://purl.org/dc/terms/',
- 'xsd': 'http://www.w3.org/2001/XMLSchema#'}
-
for media_id in media_locations.keys():
files_attempted += 1
- file_metadata = media_metadata[media_id]
- sanitized_metadata = check_metadata_format(file_metadata)
- if sanitized_metadata == {}: continue
+ file_metadata = media_metadata[media_id]
+ try:
+ json_ld_metadata = compact_and_validate(file_metadata)
+ except ValidationError, exc:
+ print "Error with '%s' value '%s': %s" % (
+ media_id, exc.path[0], exc.message)
+ continue
- json_ld_metadata = jsonld.compact(build_json_ld_metadata(file_metadata),
- metadata_context)
original_location = media_locations[media_id]['media:original']
url = urlparse(original_location)
- title = sanitized_metadata.get('dcterms:title')
- description = sanitized_metadata.get('dcterms:description')
- license = sanitized_metadata.get('dcterms:rights')
+ title = json_ld_metadata.get('dcterms:title')
+ description = json_ld_metadata.get('dcterms:description')
+
+ license = json_ld_metadata.get('license')
filename = url.path.split()[-1]
if url.scheme == 'http':
@@ -219,77 +211,3 @@ def parse_csv_file(file_contents):
def teardown(temp_files):
for temp_file in temp_files:
subprocess.call(['rm','-r',temp_file])
-
-def build_json_ld_metadata(metadata_dict):
- output_dict = {}
- for p in metadata_dict.keys():
- if p in ["dcterms:rights", "dcterms:relation"]:
- m_type = "xsd:uri"
- elif p in ["dcterms:date", "dcterms:created"]:
- m_type = "xsd:date"
- else:
- m_type = "xsd:string"
- description = {"@value": metadata_dict[p],
- "@type" : m_type}
- output_dict[p] = description
- return output_dict
-
-def check_metadata_format(metadata_dict):
- schema = {
- "$schema":"http://json-schema.org/schema#",
- "properties":{
- "media:id":{},
- "dcterms:contributor":{},
- "dcterms:coverage":{},
- "dcterms:created":{},
- "dcterms:creator":{},
- "dcterms:date":{},
- "dcterms:description":{},
- "dcterms:format":{},
- "dcterms:identifier":{},
- "dcterms:language":{},
- "dcterms:publisher":{},
- "dcterms:relation":{},
- "dcterms:rights" : {
- "format":"uri",
- "type":"string"
- },
- "dcterms:source":{},
- "dcterms:subject":{},
- "dcterms:title":{},
- "dcterms:type":{}
- },
- "additionalProperties": False,
- "required":["dcterms:title","media:id"]
-}
- try:
- validate(metadata_dict, schema)
- output_dict = metadata_dict
- # "media:id" is only for internal use, so we delete it for the output
- del output_dict['media:id']
-
- except ValidationError, exc:
- title = (metadata_dict.get('dcterms:title') or
- metadata_dict.get('media:id') or _(u'UNKNOWN FILE'))
-
- if exc.validator == "additionalProperties":
- message = _(u'Invalid metadata provided for file "{title}". This \
-script only accepts the Dublin Core metadata terms.'.format(title=title))
-
- elif exc.validator == "required":
- message = _(
-u'All necessary metadata was not provided for file "{title}", you must include \
-a "dcterms:title" column for each media file'.format(title=title))
-
- else:
- message = _(u'Could not find appropriate metadata for file \
-"{title}".'.format(title=title))
-
- print _(u"""WARN: {message} \nSkipping File...\n""".format(
- message=message))
-
- output_dict = {}
- except:
- raise
-
- return output_dict