aboutsummaryrefslogtreecommitdiffstats
path: root/mediagoblin/gmg_commands/batchaddmedia.py
diff options
context:
space:
mode:
authorChristopher Allan Webber <cwebber@dustycloud.org>2014-05-07 13:36:52 -0500
committerChristopher Allan Webber <cwebber@dustycloud.org>2014-05-07 13:39:03 -0500
commitaf3a9107a9aef453b62f8fd83e03e9a1bbe416b8 (patch)
tree98cf912de39f0ae0041b5dd46c770e1035311bfb /mediagoblin/gmg_commands/batchaddmedia.py
parent9f5d388ec01c195ffbacc4a1fd876fb507a6f62d (diff)
downloadmediagoblin-af3a9107a9aef453b62f8fd83e03e9a1bbe416b8.tar.lz
mediagoblin-af3a9107a9aef453b62f8fd83e03e9a1bbe416b8.tar.xz
mediagoblin-af3a9107a9aef453b62f8fd83e03e9a1bbe416b8.zip
The URL format checker now works correctly
...though it isn't checking the right thing
Diffstat (limited to 'mediagoblin/gmg_commands/batchaddmedia.py')
-rw-r--r--mediagoblin/gmg_commands/batchaddmedia.py77
1 files changed, 44 insertions, 33 deletions
diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py
index 43c24f6d..41fb86c9 100644
--- a/mediagoblin/gmg_commands/batchaddmedia.py
+++ b/mediagoblin/gmg_commands/batchaddmedia.py
@@ -15,7 +15,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
-import tempfile, tarfile, zipfile, subprocess, requests
+import copy, tempfile, tarfile, zipfile, subprocess, re, requests
from csv import reader as csv_reader
from urlparse import urlparse
from pyld import jsonld
@@ -27,8 +27,10 @@ from mediagoblin.submit.lib import (
from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
from mediagoblin import mg_globals
-from jsonschema import validate
+from jsonschema import validate, FormatChecker, draft4_format_checker
from jsonschema.exceptions import ValidationError
+from jsonschema.compat import str_types
+
def parser_setup(subparser):
subparser.description = """\
@@ -49,11 +51,6 @@ Core properties (http://dublincore.org/documents/dces/). Both "location.csv" and
have provided an example of these files at <url to be added>
"""))
subparser.add_argument(
- "-l", "--license",
- help=(
- "License these media entry will be released under, if all the same. "
- "Should be a URL."))
- subparser.add_argument(
'--celery',
action='store_true',
help="Don't process eagerly, pass off to celery")
@@ -149,6 +146,8 @@ zip files and directories"
title = sanitized_metadata.get('dcterms:title')
description = sanitized_metadata.get('dcterms:description')
+
+ # TODO: this isn't the same thing
license = sanitized_metadata.get('dcterms:rights')
filename = url.path.split()[-1]
@@ -234,36 +233,48 @@ def build_json_ld_metadata(metadata_dict):
output_dict[p] = description
return output_dict
+
+## Set up the MediaGoblin checker
+#
+
+URL_REGEX = re.compile(
+ r'^[a-z]+://([^/:]+|([0-9]{1,3}\.){3}[0-9]{1,3})(:[0-9]+)?(\/.*)?$',
+ re.IGNORECASE)
+
+def is_uri(instance):
+ if not isinstance(instance, str_types):
+ return True
+
+ return URL_REGEX.match(instance)
+
+
+class DefaultChecker(FormatChecker):
+ checkers = copy.deepcopy(draft4_format_checker.checkers)
+
+DefaultChecker.checkers[u"uri"] = (is_uri, ())
+
+DEFAULT_CHECKER = DefaultChecker()
+
def check_metadata_format(metadata_dict):
schema = {
- "$schema":"http://json-schema.org/schema#",
- "properties":{
- "media:id":{},
- "dcterms:contributor":{},
- "dcterms:coverage":{},
- "dcterms:created":{},
- "dcterms:creator":{},
- "dcterms:date":{},
- "dcterms:description":{},
- "dcterms:format":{},
- "dcterms:identifier":{},
- "dcterms:language":{},
- "dcterms:publisher":{},
- "dcterms:relation":{},
- "dcterms:rights" : {
- "format":"uri",
- "type":"string"
+ "$schema": "http://json-schema.org/schema#",
+
+ "type": "object",
+ "properties": {
+ "dcterms:rights": {
+ "format": "uri",
+ "type": "string",
+ },
+ "dcterms:created": {
+
+ }
},
- "dcterms:source":{},
- "dcterms:subject":{},
- "dcterms:title":{},
- "dcterms:type":{}
- },
- "additionalProperties": False,
- "required":["dcterms:title","media:id"]
-}
+ # "required": ["dcterms:title", "media:id"],
+ }
+
try:
- validate(metadata_dict, schema)
+ validate(metadata_dict, schema,
+ format_checker=DEFAULT_CHECKER)
output_dict = metadata_dict
# "media:id" is only for internal use, so we delete it for the output
del output_dict['media:id']