diff options
Diffstat (limited to 'mediagoblin/gmg_commands/batchaddmedia.py')
-rw-r--r-- | mediagoblin/gmg_commands/batchaddmedia.py | 108 |
1 files changed, 13 insertions, 95 deletions
diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py index 43c24f6d..e540e88c 100644 --- a/mediagoblin/gmg_commands/batchaddmedia.py +++ b/mediagoblin/gmg_commands/batchaddmedia.py @@ -24,12 +24,11 @@ from mediagoblin.gmg_commands import util as commands_util from mediagoblin.submit.lib import ( submit_media, get_upload_file_limits, FileUploadLimit, UserUploadLimit, UserPastUploadLimit) -from mediagoblin.tools.translate import lazy_pass_to_ugettext as _ +from mediagoblin.tools.metadata import compact_and_validate -from mediagoblin import mg_globals -from jsonschema import validate from jsonschema.exceptions import ValidationError + def parser_setup(subparser): subparser.description = """\ This command allows the administrator to upload many media files at once.""" @@ -49,11 +48,6 @@ Core properties (http://dublincore.org/documents/dces/). Both "location.csv" and have provided an example of these files at <url to be added> """)) subparser.add_argument( - "-l", "--license", - help=( - "License these media entry will be released under, if all the same. " - "Should be a URL.")) - subparser.add_argument( '--celery', action='store_true', help="Don't process eagerly, pass off to celery") @@ -102,14 +96,12 @@ zip files and directories" metadata_file_path = os.path.join(dir_path, "metadata.csv") # check for the location file, if it exists... - location_filename = os.path.split(location_file_path)[-1] abs_location_filename = os.path.abspath(location_file_path) if not os.path.exists(abs_location_filename): print "Can't find a file with filename '%s'" % location_file_path return # check for the metadata file, if it exists... - metadata_filename = os.path.split(metadata_file_path)[-1] abs_metadata_filename = os.path.abspath(metadata_file_path) if not os.path.exists(abs_metadata_filename): print "Can't find a file with filename '%s'" % metadata_file_path @@ -132,24 +124,24 @@ zip files and directories" contents = all_metadata.read() media_metadata = parse_csv_file(contents) - metadata_context = { 'dcterms':'http://purl.org/dc/terms/', - 'xsd': 'http://www.w3.org/2001/XMLSchema#'} - for media_id in media_locations.keys(): files_attempted += 1 - file_metadata = media_metadata[media_id] - sanitized_metadata = check_metadata_format(file_metadata) - if sanitized_metadata == {}: continue + file_metadata = media_metadata[media_id] + try: + json_ld_metadata = compact_and_validate(file_metadata) + except ValidationError, exc: + print "Error with '%s' value '%s': %s" % ( + media_id, exc.path[0], exc.message) + continue - json_ld_metadata = jsonld.compact(build_json_ld_metadata(file_metadata), - metadata_context) original_location = media_locations[media_id]['media:original'] url = urlparse(original_location) - title = sanitized_metadata.get('dcterms:title') - description = sanitized_metadata.get('dcterms:description') - license = sanitized_metadata.get('dcterms:rights') + title = json_ld_metadata.get('dcterms:title') + description = json_ld_metadata.get('dcterms:description') + + license = json_ld_metadata.get('license') filename = url.path.split()[-1] if url.scheme == 'http': @@ -219,77 +211,3 @@ def parse_csv_file(file_contents): def teardown(temp_files): for temp_file in temp_files: subprocess.call(['rm','-r',temp_file]) - -def build_json_ld_metadata(metadata_dict): - output_dict = {} - for p in metadata_dict.keys(): - if p in ["dcterms:rights", "dcterms:relation"]: - m_type = "xsd:uri" - elif p in ["dcterms:date", "dcterms:created"]: - m_type = "xsd:date" - else: - m_type = "xsd:string" - description = {"@value": metadata_dict[p], - "@type" : m_type} - output_dict[p] = description - return output_dict - -def check_metadata_format(metadata_dict): - schema = { - "$schema":"http://json-schema.org/schema#", - "properties":{ - "media:id":{}, - "dcterms:contributor":{}, - "dcterms:coverage":{}, - "dcterms:created":{}, - "dcterms:creator":{}, - "dcterms:date":{}, - "dcterms:description":{}, - "dcterms:format":{}, - "dcterms:identifier":{}, - "dcterms:language":{}, - "dcterms:publisher":{}, - "dcterms:relation":{}, - "dcterms:rights" : { - "format":"uri", - "type":"string" - }, - "dcterms:source":{}, - "dcterms:subject":{}, - "dcterms:title":{}, - "dcterms:type":{} - }, - "additionalProperties": False, - "required":["dcterms:title","media:id"] -} - try: - validate(metadata_dict, schema) - output_dict = metadata_dict - # "media:id" is only for internal use, so we delete it for the output - del output_dict['media:id'] - - except ValidationError, exc: - title = (metadata_dict.get('dcterms:title') or - metadata_dict.get('media:id') or _(u'UNKNOWN FILE')) - - if exc.validator == "additionalProperties": - message = _(u'Invalid metadata provided for file "{title}". This \ -script only accepts the Dublin Core metadata terms.'.format(title=title)) - - elif exc.validator == "required": - message = _( -u'All necessary metadata was not provided for file "{title}", you must include \ -a "dcterms:title" column for each media file'.format(title=title)) - - else: - message = _(u'Could not find appropriate metadata for file \ -"{title}".'.format(title=title)) - - print _(u"""WARN: {message} \nSkipping File...\n""".format( - message=message)) - - output_dict = {} - except: - raise - - return output_dict |