diff options
| author | Christopher Allan Webber <cwebber@dustycloud.org> | 2014-05-07 18:41:34 -0500 | 
|---|---|---|
| committer | Christopher Allan Webber <cwebber@dustycloud.org> | 2014-05-07 18:41:34 -0500 | 
| commit | e5e2cc2f16f47fb28f5aef256652e2f2e20eb45d (patch) | |
| tree | ed3303fdd69d9786819c52d9f6c44d556a9b9473 | |
| parent | a4486286363cca8d0ef9d1026883b13e7f84d8e0 (diff) | |
| download | mediagoblin-e5e2cc2f16f47fb28f5aef256652e2f2e20eb45d.tar.lz mediagoblin-e5e2cc2f16f47fb28f5aef256652e2f2e20eb45d.tar.xz mediagoblin-e5e2cc2f16f47fb28f5aef256652e2f2e20eb45d.zip | |
Starting to add metadata tools, as well as mediagoblin's schema
| -rw-r--r-- | mediagoblin/static/metadata/mediagoblin-0.1.dev.jsonld | 47 | ||||
| -rw-r--r-- | mediagoblin/tools/metadata.py | 106 | 
2 files changed, 153 insertions, 0 deletions
| diff --git a/mediagoblin/static/metadata/mediagoblin-0.1.dev.jsonld b/mediagoblin/static/metadata/mediagoblin-0.1.dev.jsonld new file mode 100644 index 00000000..20a71b53 --- /dev/null +++ b/mediagoblin/static/metadata/mediagoblin-0.1.dev.jsonld @@ -0,0 +1,47 @@ +{ +  "@context": { +    "qb": "http://purl.org/linked-data/cube#", +    "grddl": "http://www.w3.org/2003/g/data-view#", +    "ma": "http://www.w3.org/ns/ma-ont#", +    "owl": "http://www.w3.org/2002/07/owl#", +    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", +    "rdfa": "http://www.w3.org/ns/rdfa#", +    "rdfs": "http://www.w3.org/2000/01/rdf-schema#", +    "rif": "http://www.w3.org/2007/rif#", +    "rr": "http://www.w3.org/ns/r2rml#", +    "skos": "http://www.w3.org/2004/02/skos/core#", +    "skosxl": "http://www.w3.org/2008/05/skos-xl#", +    "wdr": "http://www.w3.org/2007/05/powder#", +    "void": "http://rdfs.org/ns/void#", +    "wdrs": "http://www.w3.org/2007/05/powder-s#", +    "xhv": "http://www.w3.org/1999/xhtml/vocab#", +    "xml": "http://www.w3.org/XML/1998/namespace", +    "xsd": "http://www.w3.org/2001/XMLSchema#", +    "prov": "http://www.w3.org/ns/prov#", +    "sd": "http://www.w3.org/ns/sparql-service-description#", +    "org": "http://www.w3.org/ns/org#", +    "gldp": "http://www.w3.org/ns/people#", +    "cnt": "http://www.w3.org/2008/content#", +    "dcat": "http://www.w3.org/ns/dcat#", +    "earl": "http://www.w3.org/ns/earl#", +    "ht": "http://www.w3.org/2006/http#", +    "ptr": "http://www.w3.org/2009/pointers#", +    "cc": "http://creativecommons.org/ns#", +    "ctag": "http://commontag.org/ns#", +    "dc": "http://purl.org/dc/terms/", +    "dc11": "http://purl.org/dc/elements/1.1/", +    "dcterms": "http://purl.org/dc/terms/", +    "foaf": "http://xmlns.com/foaf/0.1/", +    "gr": "http://purl.org/goodrelations/v1#", +    "ical": "http://www.w3.org/2002/12/cal/icaltzd#", +    "og": "http://ogp.me/ns#", +    "rev": "http://purl.org/stuff/rev#", +    "sioc": "http://rdfs.org/sioc/ns#", +    "v": "http://rdf.data-vocabulary.org/#", +    "vcard": "http://www.w3.org/2006/vcard/ns#", +    "schema": "http://schema.org/", +    "describedby": "http://www.w3.org/2007/05/powder-s#describedby", +    "license": "http://www.w3.org/1999/xhtml/vocab#license", +    "role": "http://www.w3.org/1999/xhtml/vocab#role" +  } +} diff --git a/mediagoblin/tools/metadata.py b/mediagoblin/tools/metadata.py new file mode 100644 index 00000000..428e425c --- /dev/null +++ b/mediagoblin/tools/metadata.py @@ -0,0 +1,106 @@ +# GNU MediaGoblin -- federated, autonomous media hosting +# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. + + +import os +import copy +import json +import re +from pkg_resources import resource_filename + +import dateutil.parser +from pyld import jsonld +from jsonschema import validate, FormatChecker, draft4_format_checker +from jsonschema.compat import str_types + + +MEDIAGOBLIN_CONTEXT_PATH = resource_filename( +    "mediagoblin", +    os.path.sep.join(["static", "metadata", "mediagoblin-0.1.dev.jsonld"])) +MEDIAGOBLIN_CONTEXT = json.loads(file(MEDIAGOBLIN_CONTEXT_PATH).read()) + + +######################################################## +## Set up the MediaGoblin format checker for json-schema +######################################################## + +URL_REGEX = re.compile( +    r'^[a-z]+://([^/:]+|([0-9]{1,3}\.){3}[0-9]{1,3})(:[0-9]+)?(\/.*)?$', +    re.IGNORECASE) + +def is_uri(instance): +    """ +    jsonschema uri validator +    """ +    if not isinstance(instance, str_types): +        return True + +    return URL_REGEX.match(instance) + +def is_datetime(instance): +    """ +    Is a date or datetime readable string. +    """ +    if not isinstance(instance, str_types): +        return True + +    return dateutil.parser.parse(instance) + + +class DefaultChecker(FormatChecker): +    """ +    Default MediaGoblin format checker... extended to include a few extra things +    """ +    checkers = copy.deepcopy(draft4_format_checker.checkers) + + +DefaultChecker.checkers[u"uri"] = (is_uri, ()) +DefaultChecker.checkers[u"date-time"] = (is_datetime, (ValueError, TypeError)) +DEFAULT_CHECKER = DefaultChecker() + +# Crappy default schema, checks for things we deem important + +DEFAULT_SCHEMA = { +    "$schema": "http://json-schema.org/schema#", + +    "type": "object", +    "properties": { +        "dcterms:rights": { +            "format": "uri", +            "type": "string", +        }, +        "dcterms:created": { +            "format": "date-time", +            "type": "string", +        } +    }, +} + + +def compact_and_validate(metadata, context=MEDIAGOBLIN_CONTEXT, +                         schema=DEFAULT_SCHEMA): +    """ +    compact json with supplied context, check against schema for errors + +    raises an exception (jsonschema.exceptions.ValidationError) if +    there's an error. + +    You may wish to do this validation yourself... this is just for convenience. +    """ +    compacted = jsonld.compact(metadata, context) +    validate(metadata, schema, format_checker=DEFAULT_CHECKER) + +    return compacted | 
