The script now officially works! It works in many different situations, whether

the media is to be uploaded is stored locally or on the web. Still have to clean up the code and look for errors. I may also refactor some of this into a functi- on to be used with a GUI frontend in another project. Lastly, I need to merge this with the metadata branch I've been working on, and convert the metadata.csv information into the proper format for the new metadata column.
author: tilly-Q <nattilypigeonfowl@gmail.com> 2014-02-12 14:37:00 -0500
committer: tilly-Q <nattilypigeonfowl@gmail.com> 2014-03-23 16:40:50 -0400
commit: 714c4cb7d7a1918d3b4cf5cbe9145078cd330b5b (patch)
tree: 3a2741cf65fc5014315232a623f7e26c392a3d13
parent: 3214aeb2387cd1356685372f9abaebe35ea7f006 (diff)
download: mediagoblin-714c4cb7d7a1918d3b4cf5cbe9145078cd330b5b.tar.lz
mediagoblin-714c4cb7d7a1918d3b4cf5cbe9145078cd330b5b.tar.xz
mediagoblin-714c4cb7d7a1918d3b4cf5cbe9145078cd330b5b.zip
1 files changed, 111 insertions, 19 deletions
diff --git a/mediagoblin/gmg_commands/batchaddmedia.py b/mediagoblin/gmg_commands/batchaddmedia.py
index 1c0f6784..7d7a2d4f 100644
--- a/mediagoblin/gmg_commands/batchaddmedia.py
+++ b/mediagoblin/gmg_commands/batchaddmedia.py
@@ -15,6 +15,10 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import os
+import json, tempfile, urllib, tarfile, subprocess
+from csv import reader as csv_reader
+from urlparse import urlparse
+from pyld import jsonld
 
 from mediagoblin.gmg_commands import util as commands_util
 from mediagoblin.submit.lib import (
@@ -22,20 +26,26 @@ from mediagoblin.submit.lib import (
     FileUploadLimit, UserUploadLimit, UserPastUploadLimit)
 
 from mediagoblin import mg_globals
-import json, csv
 
 def parser_setup(subparser):
     subparser.add_argument(
         'username',
         help="Name of user this media entry belongs to")
-    subparser.add_argument(
-        'locationfile',
+    target_type = subparser.add_mutually_exclusive_group()
+    target_type.add_argument('-d',
+        '--directory', action='store_const',
+        const='directory', dest='target_type', 
+        default='directory', help=(
+"Target is a directory"))
+    target_type.add_argument('-a',
+        '--archive', action='store_const',
+        const='archive', dest='target_type',
         help=(
-"Local file on filesystem with the address of all the files to be uploaded"))
+"Target is an archive."))
     subparser.add_argument(
-        'metadatafile',
+        'target_path',
         help=(
-"Local file on filesystem with metadata of all the files to be uploaded"))
+"Path to a local archive or directory containing a location.csv and metadata.csv file"))
     subparser.add_argument(
         "-l", "--license",
         help=(
@@ -59,19 +69,36 @@ def batchaddmedia(args):
     if user is None:
         print "Sorry, no user by username '%s'" % args.username
         return
+
+    upload_limit, max_file_size = get_upload_file_limits(user)
+    temp_files = []
+
+    if args.target_type == 'archive':
+        dir_path = tempfile.mkdtemp()
+        temp_files.append(dir_path)
+        tar = tarfile.open(args.target_path)
+        tar.extractall(path=dir_path)
+
+    elif args.target_type == 'directory':
+        dir_path = args.target_path
+
+    location_file_path = "{dir_path}/location.csv".format(
+        dir_path=dir_path)
+    metadata_file_path = "{dir_path}/metadata.csv".format(
+        dir_path=dir_path)
     
     # check for the location file, if it exists...
-    location_filename = os.path.split(args.locationfile)[-1]
-    abs_location_filename = os.path.abspath(args.locationfile)
+    location_filename = os.path.split(location_file_path)[-1]
+    abs_location_filename = os.path.abspath(location_file_path)
     if not os.path.exists(abs_location_filename):
-        print "Can't find a file with filename '%s'" % args.locationfile
+        print "Can't find a file with filename '%s'" % location_file_path
         return
 
-    # check for the location file, if it exists...
-    metadata_filename = os.path.split(args.metadatafile)[-1]
-    abs_metadata_filename = os.path.abspath(args.metadatafile)
+    # check for the metadata file, if it exists...
+    metadata_filename = os.path.split(metadata_file_path)[-1]
+    abs_metadata_filename = os.path.abspath(metadata_file_path)
     if not os.path.exists(abs_metadata_filename):
-        print "Can't find a file with filename '%s'" % args.metadatafile
+        print "Can't find a file with filename '%s'" % metadata_file_path
         return
 
     upload_limit, max_file_size = get_upload_file_limits(user)
@@ -91,20 +118,85 @@ def batchaddmedia(args):
         contents = all_metadata.read()
         media_metadata = parse_csv_file(contents)
 
+    dcterms_context = { 'dcterms':'http://purl.org/dc/terms/' }
+
+    for media_id in media_locations.keys():
+        file_metadata = media_metadata[media_id]
+        json_ld_metadata = jsonld.compact(file_metadata, dcterms_context)
+        original_location = media_locations[media_id]['media:original']
+        url = urlparse(original_location)
+
+        title = file_metadata.get('dcterms:title')
+        description = file_metadata.get('dcterms:description')
+        license = file_metadata.get('dcterms:license')
+        filename = url.path.split()[-1]
+        print "Working with {filename}".format(filename=filename)
+
+        if url.scheme == 'http':
+            print "Downloading {filename}...".format(
+                filename=filename)
+            media_file = tempfile.TemporaryFile()
+            res = urllib.urlopen(url.geturl())
+            media_file.write(res.read())
+            media_file.seek(0)
+
+        elif url.scheme == '':
+            path = url.path
+            if os.path.isabs(path):
+                file_abs_path = os.path.abspath(path)
+            else:
+                file_path = "{dir_path}/{local_path}".format(
+                    dir_path=dir_path,
+                    local_path=path)
+                file_abs_path = os.path.abspath(file_path)
+            try:
+                media_file = file(file_abs_path, 'r')
+            except IOError:
+                print "Local file {filename} could not be accessed.".format(
+                    filename=filename)
+                print "Skipping it."
+                continue
+        print "Submitting {filename}...".format(filename=filename)
+        try:
+            submit_media(
+                mg_app=app,
+                user=user,
+                submitted_file=media_file,
+                filename=filename,
+                title=maybe_unicodeify(title),
+                description=maybe_unicodeify(description),
+                license=maybe_unicodeify(license),
+                tags_string=u"",
+                upload_limit=upload_limit, max_file_size=max_file_size)
+            print "Successfully uploading {filename}!".format(filename=filename)
+            print ""
+        except FileUploadLimit:
+            print "This file is larger than the upload limits for this site."
+        except UserUploadLimit:
+            print "This file will put this user past their upload limits."
+        except UserPastUploadLimit:
+            print "This user is already past their upload limits."
+    teardown(temp_files)
+
+        
+
 def parse_csv_file(file_contents):
     list_of_contents = file_contents.split('\n')
     key, lines = (list_of_contents[0].split(','),
                   list_of_contents[1:])
-    list_of_objects = []
+    objects_dict = {}
 
     # Build a dictionary
     for line in lines:
         if line.isspace() or line == '': continue
-        values = csv.reader([line]).next()
-        new_dict = dict([(key[i], val)
+        values = csv_reader([line]).next()
+        line_dict = dict([(key[i], val)
             for i, val in enumerate(values)])
-        list_of_objects.append(new_dict)
+        media_id = line_dict['media:id']
+        objects_dict[media_id] = (line_dict)
 
-    return list_of_objects
+    return objects_dict
 
-    
+def teardown(temp_files):
+    for temp_file in temp_files:
+        subprocess.call(['rm','-r',temp_file])
author	tilly-Q <nattilypigeonfowl@gmail.com>	2014-02-12 14:37:00 -0500
committer	tilly-Q <nattilypigeonfowl@gmail.com>	2014-03-23 16:40:50 -0400
commit	714c4cb7d7a1918d3b4cf5cbe9145078cd330b5b (patch)
tree	3a2741cf65fc5014315232a623f7e26c392a3d13
parent	3214aeb2387cd1356685372f9abaebe35ea7f006 (diff)
download	mediagoblin-714c4cb7d7a1918d3b4cf5cbe9145078cd330b5b.tar.lz mediagoblin-714c4cb7d7a1918d3b4cf5cbe9145078cd330b5b.tar.xz mediagoblin-714c4cb7d7a1918d3b4cf5cbe9145078cd330b5b.zip