aboutsummaryrefslogtreecommitdiffstats
path: root/mediagoblin/submit/lib.py
blob: 9ec96923a08804ea0b6e89c713cbacb73e8535f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import logging
import uuid
from os.path import splitext

import six

from celery import chord

from werkzeug.utils import secure_filename
from werkzeug.datastructures import FileStorage

from mediagoblin import mg_globals
from mediagoblin.tools.response import json_response
from mediagoblin.tools.text import convert_to_tag_list_of_dicts
from mediagoblin.tools.federation import create_activity, create_generator
from mediagoblin.db.models import Collection, MediaEntry, ProcessingMetaData
from mediagoblin.processing import mark_entry_failed, get_entry_and_processing_manager
from mediagoblin.processing.task import ProcessMedia
from mediagoblin.notifications import add_comment_subscription
from mediagoblin.media_types import sniff_media
from mediagoblin.user_pages.lib import add_media_to_collection


_log = logging.getLogger(__name__)


def check_file_field(request, field_name):
    """Check if a file field meets minimal criteria"""
    retval = (field_name in request.files
              and isinstance(request.files[field_name], FileStorage)
              and request.files[field_name].stream)
    if not retval:
        _log.debug("Form did not contain proper file field %s", field_name)
    return retval


def new_upload_entry(user):
    """
    Create a new MediaEntry for uploading
    """
    entry = MediaEntry()
    entry.actor = user.id
    entry.license = user.license_preference
    return entry


def get_upload_file_limits(user):
    """
    Get the upload_limit and max_file_size for this user
    """
    if user.upload_limit is not None and user.upload_limit >= 0:  # TODO: debug this
        upload_limit = user.upload_limit
    else:
        upload_limit = mg_globals.app_config.get('upload_limit', None)

    max_file_size = mg_globals.app_config.get('max_file_size', None)

    return upload_limit, max_file_size


class UploadLimitError(Exception):
    """
    General exception for when an upload will be over some upload limit
    """
    pass


class FileUploadLimit(UploadLimitError):
    """
    This file is over the site upload limit
    """
    pass


class UserUploadLimit(UploadLimitError):
    """
    This file is over the user's particular upload limit
    """
    pass


class UserPastUploadLimit(UploadLimitError):
    """
    The user is *already* past their upload limit!
    """
    pass



def submit_media(mg_app, user, submitted_file, filename,
                 title=None, description=None, collection_slug=None,
                 license=None, metadata=None, tags_string=u"",
                 callback_url=None, urlgen=None,):
    """
    Args:
     - mg_app: The MediaGoblinApp instantiated for this process
     - user: the user object this media entry should be associated with
     - submitted_file: the file-like object that has the
       being-submitted file data in it (this object should really have
       a .name attribute which is the filename on disk!)
     - filename: the *original* filename of this.  Not necessarily the
       one on disk being referenced by submitted_file.
     - title: title for this media entry
     - description: description for this media entry
     - collection_slug: collection for this media entry
     - license: license for this media entry
     - tags_string: comma separated string of tags to be associated
       with this entry
     - callback_url: possible post-hook to call after submission
     - urlgen: if provided, used to do the feed_url update and assign a public
               ID used in the API (very important).
    """
    upload_limit, max_file_size = get_upload_file_limits(user)
    if upload_limit and user.uploaded >= upload_limit:
        raise UserPastUploadLimit()

    # If the filename contains non ascii generate a unique name
    if not all(ord(c) < 128 for c in filename):
        filename = six.text_type(uuid.uuid4()) + splitext(filename)[-1]

    # Sniff the submitted media to determine which
    # media plugin should handle processing
    media_type, media_manager = sniff_media(submitted_file, filename)

    # create entry and save in database
    entry = new_upload_entry(user)
    entry.media_type = media_type
    entry.title = (title or six.text_type(splitext(filename)[0]))

    entry.description = description or u""

    entry.license = license or None

    entry.media_metadata = metadata or {}

    # Process the user's folksonomy "tags"
    entry.tags = convert_to_tag_list_of_dicts(tags_string)

    # Generate a slug from the title
    entry.generate_slug()

    queue_file = prepare_queue_task(mg_app, entry, filename)

    with queue_file:
        queue_file.write(submitted_file)

    # Get file size and round to 2 decimal places
    file_size = mg_app.queue_store.get_file_size(
        entry.queued_media_file) / (1024.0 * 1024)
    file_size = float('{0:.2f}'.format(file_size))

    # Check if file size is over the limit
    if max_file_size and file_size >= max_file_size:
        raise FileUploadLimit()

    # Check if user is over upload limit
    if upload_limit and (user.uploaded + file_size) >= upload_limit:
        raise UserUploadLimit()

    user.uploaded = user.uploaded + file_size
    user.save()

    entry.file_size = file_size

    # Save now so we have this data before kicking off processing
    entry.save()

    # Various "submit to stuff" things, callbackurl and this silly urlgen
    # thing
    if callback_url:
        metadata = ProcessingMetaData()
        metadata.media_entry = entry
        metadata.callback_url = callback_url
        metadata.save()

    if urlgen:
        # Generate the public_id, this is very importent, especially relating
        # to deletion, it allows the shell to be accessable post-delete!
        entry.get_public_id(urlgen)

        # Generate the feed URL
        feed_url = urlgen(
            'mediagoblin.user_pages.atom_feed',
            qualified=True, user=user.username)
    else:
        feed_url = None

    add_comment_subscription(user, entry)

    # Create activity
    create_activity("post", entry, entry.actor)
    entry.save()

    # add to collection
    if collection_slug:
        collection = Collection.query.filter_by(slug=collection_slug,
                                                actor=user.id).first()
        if collection:
            add_media_to_collection(collection, entry)

    # Pass off to processing
    #
    # (... don't change entry after this point to avoid race
    # conditions with changes to the document via processing code)
    run_process_media(entry, feed_url)

    return entry


def prepare_queue_task(app, entry, filename):
    """
    Prepare a MediaEntry for the processing queue and get a queue file
    """
    # We generate this ourselves so we know what the task id is for
    # retrieval later.

    # (If we got it off the task's auto-generation, there'd be
    # a risk of a race condition when we'd save after sending
    # off the task)
    task_id = six.text_type(uuid.uuid4())
    entry.queued_task_id = task_id

    # Now store generate the queueing related filename
    queue_filepath = app.queue_store.get_unique_filepath(
        ['media_entries',
         task_id,
         secure_filename(filename)])

    # queue appropriately
    queue_file = app.queue_store.get_file(
        queue_filepath, 'wb')

    # Add queued filename to the entry
    entry.queued_media_file = queue_filepath

    return queue_file


def run_process_media(entry, feed_url=None,
                      reprocess_action="initial", reprocess_info=None):
    """Process the media asynchronously

    :param entry: MediaEntry() instance to be processed.
    :param feed_url: A string indicating the feed_url that the PuSH servers
        should be notified of. This will be sth like: `request.urlgen(
            'mediagoblin.user_pages.atom_feed',qualified=True,
            user=request.user.username)`
    :param reprocess_action: What particular action should be run.
    :param reprocess_info: A dict containing all of the necessary reprocessing
        info for the given media_type"""

    entry, manager = get_entry_and_processing_manager(entry.id)

    try:
        wf = manager.workflow(entry, feed_url, reprocess_action, reprocess_info)
        if wf is None:
            ProcessMedia().apply_async(
                [entry.id, feed_url, reprocess_action, reprocess_info], {},
                task_id=entry.queued_task_id)
        else:
            chord(wf[0])(wf[1])
    except BaseException as exc:
        # The purpose of this section is because when running in "lazy"
        # or always-eager-with-exceptions-propagated celery mode that
        # the failure handling won't happen on Celery end.  Since we
        # expect a lot of users to run things in this way we have to
        # capture stuff here.
        #
        # ... not completely the diaper pattern because the
        # exception is re-raised :)
        mark_entry_failed(entry.id, exc)
        # re-raise the exception
        raise


def api_upload_request(request, file_data, entry):
    """ This handles a image upload request """
    # Use the same kind of method from mediagoblin/submit/views:submit_start
    entry.title = file_data.filename

    # This will be set later but currently we just don't have enough information
    entry.slug = None

    # This is a MUST.
    entry.get_public_id(request.urlgen)

    queue_file = prepare_queue_task(request.app, entry, file_data.filename)
    with queue_file:
        queue_file.write(request.data)

    entry.save()
    return json_response(entry.serialize(request))

def api_add_to_feed(request, entry):
    """ Add media to Feed """
    feed_url = request.urlgen(
        'mediagoblin.user_pages.atom_feed',
        qualified=True, user=request.user.username
    )

    add_comment_subscription(request.user, entry)

    # Create activity
    activity = create_activity(
        verb="post",
        obj=entry,
        actor=entry.actor,
        generator=create_generator(request)
    )
    entry.save()
    run_process_media(entry, feed_url)

    return activity