diff --git a/app/controllers/turk_hit.py b/app/controllers/turk_hit.py index fbd830cf3abf625bd8c711e88acfbfda3508df04..79463aa0b0e2806efb5cace7cb6f0a126e9b4326 100644 --- a/app/controllers/turk_hit.py +++ b/app/controllers/turk_hit.py @@ -1,56 +1,254 @@ """Controllers for posting or manipulating a turk_hit.""" -from ..models.turk_hit import TurkHit -from ..forms.turk_hit import TurkHitForm -from .base import RestController +import base64 +import json +import requests +import mimetypes + +from flask import current_app from werkzeug.datastructures import MultiDict -from werkzeug.exceptions import BadRequest +from werkzeug.exceptions import NotFound, BadRequest + +from .base import RestController +from ..lib.database import proc +from ..lib.mech_turk import create_hit, approve_or_reject_hit,\ + get_hit_status +from ..lib.service import services +from ..models.turk_hit import TurkHit +from ..forms.turk_hit import TurkHitPostForm, TurkHitPutForm + + class TurkHitController(RestController): """A turk_hit controller.""" Model = TurkHit - def get_form(self, filter_data): + STATUS_DICT_ID = { + 1: 'Assignable', + 2: 'Unassignable', + 3: 'Reviewable', + 4: 'Accepted', + 5: 'Rejected', + 6: 'Disposed', + 7: 'Expired' + } + + # The above dictionary with the keys as values and the values as keys + STATUS_DICT_TEXT = {v: k for k, v in STATUS_DICT_ID.items()} + + def get_post_form(self, filter_data): + """Return the turk_hit form.""" + return TurkHitPostForm + + def get_put_form(self, filter_data): """Return the turk_hit form.""" - return TurkHitForm + return TurkHitPutForm def get(self, id_, filter_data): """ - Retrieves the status of a turk_hit given the building_id + Retrieve a list of TurkHit objects + + Args: + id_: The building_id + filter_data (ImmutableMultiDict): + address (string): The address of the building for saving to box + Returns: + list: + dict: TurkHit object + """ + address = filter_data.get('address') + if not address: + raise BadRequest("Please ensure there is an address in the url params") + try: + hit_list = proc(self.Model, self.Model.PROCS['READ'], **{'building_id': id_}) + + except Exception as err: + raise ( + err if current_app.config['DEBUG'] else + BadRequest('Error while executing db call') + ) + + if not hit_list: + return [] + + cur_hit = hit_list[0] + + # No need to check in with amazon if we've already reached one of those states + status_string = self.STATUS_DICT_ID[cur_hit.status_id] + if status_string in ["Accepted", "Rejected", "Disposed", "Expired"]: + return hit_list - :param id_: building_id of building - :param filter_data: no usage currently - :return: Turk HIT status and URL used to download completed HIT + # Get the new hit status from AWS + result = get_hit_status(cur_hit.amazon_hit_id) + new_hit_status = result['hit_status'] + + # No changes if the status is the same + if new_hit_status == status_string: + return hit_list + # Download file here + if result['file_url'] and not cur_hit.csv_document_key: + response = requests.get(result['file_url']) + uploaded_fname = response.headers['Content-Disposition'].split('"')[1] + content_type = mimetypes.guess_type(uploaded_fname)[0] + # Make sure the content_type is in .xlsx format + if content_type != 'application/vnd.openxmlformats-' \ + 'officedocument.spreadsheetml.sheet': + # Automatically Reject the HIT if incorrect file format + response_message = 'This hit was automatically rejected because '\ + 'the uploaded file was not in the correct file '\ + 'format. The file must be of type .xlsx.' + put_body = {**cur_hit.get_dictionary(), + **{'approve': 0, 'response_message': response_message}} + self.put(cur_hit.db_id, put_body, None) + cur_hit.response_message = response_message + new_hit_status = "Rejected" + + else: + file_name = "BuildingDimensions--{}--{}.xlsx".format(cur_hit.building_id, + cur_hit.amazon_hit_id) + folder_path = "/Buildings/{}_{}/Building_Dimensions".format(cur_hit.building_id, + address) + # Download the file + document = self.download_file(cur_hit.building_id, folder_path, + file_name, response.content) + # Get the key to update the model + cur_hit.csv_document_key = document['key'] + + # Update the entry in the database + new_hit_status_id = self.STATUS_DICT_TEXT[new_hit_status] + cur_hit.status_id = new_hit_status_id + try: + proc(self.Model, + self.Model.PROCS['UPDATE'], + **cur_hit.get_dictionary()) + except Exception as err: + raise ( + err if current_app.config['DEBUG'] else + BadRequest('Error while executing db call') + ) + return hit_list + + def download_file(self, building_id, folder_path, file_name, byte_data): + """ + A function to download a file to box """ + encoded_byte_data = base64.b64encode(byte_data) + encoded_string_data = encoded_byte_data.decode('utf-8') + + post_data = { + "path": folder_path, + "data": "data:csv/plain;charset=utf-8;base64,{}".format(encoded_string_data), + "building_id": str(building_id), + "tags": '', + "name": file_name + } + # Call the generic method in the base class + response = services.document.post('', '/document/', data=json.dumps(post_data)) + if response.status_code != 201: + raise ( + BadRequest(vars(response)) if current_app.config['DEBUG'] else + BadRequest("Unable to download document to document service") + ) + + document = response.json()['data'] + return document - return self.Model.get_hit_status(id_) def post(self, data, filter_data): """ - Validate post data and call function in model + Create a TurkHit object - :param data: Args from Body - :param filter_data: Args from POST URL - :return: Dict containing all POST data + Args: + data (ImmutableMultiDict): Args for stored proc and mech turk + building_id (int): Building id for this hit + requester_name (string: Name of the user who requested this hit + The rest are inputs for the mech turk hit + address (string) + max_file_bytes (int) + min_file_bytes (int) + instructions_text (string) + instructions_url (string) + worksheet_url (string) + max_assignments (int) + title (string) + description (string) + keywords (string) + duration (int) + reward (string) + Returns: + dict: TurkHit object """ - form = self.get_form(filter_data)(formdata=MultiDict(data)) + form = self.get_post_form(filter_data)(formdata=MultiDict(data)) if not form.validate(): raise BadRequest(form.errors) - return TurkHit(**data).send_hit() + requester_name = data.pop('requester_name') + building_id = data.pop('building_id') + + # Create the hit in amazon + amazon_hit_id = create_hit(**data) + + # Store the hit data in the database + try: + hit_list = proc(self.Model, self.Model.PROCS['WRITE'], + **{'building_id': building_id, + 'amazon_hit_id': amazon_hit_id, + 'requester_name': requester_name, + 'status_id': self.STATUS_DICT_TEXT['Assignable']}) + except Exception as err: + raise ( + err if current_app.config['DEBUG'] else + BadRequest('Error while executing db call') + ) + + if not hit_list: + raise NotFound("Hit was not found in the database") + + return hit_list[0] + + def put(self, id_, data, filter_data): """ - Validate put data and call corresponding function in model + Update a TurkHit object - :param data: Args from Body - :param filter_data: Args from POST URL - :return: List of assignment IDs approved or rejected + Args: + id_ (str): db_id of a hit + data (ImmutableMultiDict): Args for stored proc + Should be the same as the TurkHitAcceptForm + Returns: + dict: TurkHit object """ - # TODO add form validation + form = self.get_put_form(filter_data)(formdata=MultiDict(data)) + if not form.validate(): + raise BadRequest(form.errors) + if int(id_) != data["db_id"]: + raise BadRequest("The URL id and the body db_id are not the same") + + approve = data.pop("approve") + + # Approve or reject the hit on the amazon mech turk side + if approve_or_reject_hit(data["amazon_hit_id"], + approve, + data["response_message"]): + # Update the hit data in the database + try: + approve_id = self.STATUS_DICT_TEXT['Accepted'] + reject_id = self.STATUS_DICT_TEXT['Rejected'] + status_id = approve_id if approve else reject_id + + # The data + data['status_id'] = status_id - if data["accept"]: - return self.Model.accept_hit(id_) + hit_list = proc(self.Model, self.Model.PROCS['UPDATE'], + **data) - return self.Model.reject_hit(id_) + except Exception as err: + raise ( + err if current_app.config['DEBUG'] else + BadRequest('Error while executing db call') + ) + return hit_list[0] + else: + raise BadRequest("There were no completed assignments to approve or reject") diff --git a/app/forms/turk_hit.py b/app/forms/turk_hit.py index c2548cf897f26ad8839929a1a36d2be028ad1972..b16d9b3a972572a3691b85c783eab8d9b980bec5 100644 --- a/app/forms/turk_hit.py +++ b/app/forms/turk_hit.py @@ -1,11 +1,13 @@ import wtforms as wtf -class TurkHitForm(wtf.Form): +class TurkHitPostForm(wtf.Form): """ A form for validating turk hits.""" building_id = wtf.IntegerField( validators=[wtf.validators.Required()]) + requester_name = wtf.StringField( + validators=[wtf.validators.Required()]) min_file_bytes = wtf.IntegerField( validators=[wtf.validators.Required()]) @@ -33,3 +35,37 @@ class TurkHitForm(wtf.Form): validators=[wtf.validators.Required()]) reward = wtf.FloatField( validators=[wtf.validators.Required()]) + +class TurkHitPutForm(wtf.Form): + """ A form for validating turk hits.""" + db_id = wtf.IntegerField( + validators=[wtf.validators.Required()]) + building_id = wtf.IntegerField( + validators=[wtf.validators.Required()]) + amazon_hit_id = wtf.StringField( + validators=[wtf.validators.Required()]) + status_id = wtf.IntegerField( + validators=[wtf.validators.Required()]) + hit_date = wtf.StringField( + validators=[wtf.validators.Optional()]) + requester_name = wtf.StringField( + validators=[wtf.validators.Optional()]) + csv_document_key = wtf.StringField( + validators=[wtf.validators.Optional()]) + shapefile_document_key = wtf.StringField( + validators=[wtf.validators.Optional()]) + response_message = wtf.StringField( + validators=[wtf.validators.Optional()]) + + approve = wtf.IntegerField( + validators=[wtf.validators.AnyOf([0, 1])]) + + def validate(self): + if not wtf.Form.validate(self): + return False + # If the hit will be rejected, ensure there is a message + if not self.approve.data and not self.response_message.data: + self.response_message.errors.append('If accept is 0 explanation must be nonempty') + return False + return True + diff --git a/app/lib/database.py b/app/lib/database.py index 23bea8bd4efd9f432b758b4570b53efa4ac0f761..808c41cb0fddaa47f953825bae22de30a928980f 100644 --- a/app/lib/database.py +++ b/app/lib/database.py @@ -64,7 +64,10 @@ def proc(model, method, limit=None, offset=None, **kwargs): params = "" cols = ','.join(str(i) for i in model.__table__.get_columns()) for key, value in kwargs.items(): - params += "in_{} := '{}', ".format(key, value) + if value is not None: + params += "in_{} := '{}', ".format(key, value) + else: + params += "in_{} := null, ".format(key) params = params[:-2] # remove last comma and space query = "select {} from {}.{}({})".format( diff --git a/app/lib/mech_turk.py b/app/lib/mech_turk.py new file mode 100644 index 0000000000000000000000000000000000000000..83a3341c2793164e433c36e3b0857691aebee5d1 --- /dev/null +++ b/app/lib/mech_turk.py @@ -0,0 +1,118 @@ +""" +A conveniance instance for interaction with the mech turk API. All access to +mech turk should be through this file +""" +from flask import current_app +from boto.mturk.connection import MTurkConnection +from boto.mturk.question import QuestionContent, Question, QuestionForm,\ + Overview, AnswerSpecification, FileUploadAnswer + +def get_mturk_connection(): + AWS_ACCESS_KEY = current_app.config.get('AWS_ACCESS_KEY') + AWS_SECRET_ACCESS_KEY = current_app.config.get('AWS_SECRET_ACCESS_KEY') + HOST = current_app.config.get('MECH_TURK_HOST') + if not AWS_ACCESS_KEY or not AWS_SECRET_ACCESS_KEY or not HOST: + raise ValueError("AWS keys or HOST are empty in the config file") + + return MTurkConnection( + aws_access_key_id=AWS_ACCESS_KEY, + aws_secret_access_key=AWS_SECRET_ACCESS_KEY, + host=HOST + ) + +def create_hit(min_file_bytes, max_file_bytes, + address, max_assignments, instructions_url, + instructions_text, worksheet_url, title, + description, keywords, duration, reward): + """ + Send a mechanical turk hit + + Returns: + amazon_hit_id + """ + mturk_connection = get_mturk_connection() + + # Overview + overview = Overview() + overview.append_field('Title', address) + + # Instructions + qc1 = QuestionContent() + qc1.append_field('Title', 'Instructions') + qc1.append_field('Text', instructions_text) + qc1.append_field('Text', instructions_url) + qc1.append_field('Title', 'Worksheet') + qc1.append_field('Text', worksheet_url) + + file_upload = FileUploadAnswer( + min_file_bytes, max_file_bytes) + + question = Question(identifier='measure_building', + content=qc1, + answer_spec=AnswerSpecification(file_upload), + is_required=True) + + # Question Form + question_form = QuestionForm() + question_form.append(overview) + question_form.append(question) + + # Creates a new HIT + result_set = mturk_connection.create_hit( + questions=question_form, + max_assignments=max_assignments, + title=title, + description=description, + keywords=keywords, + duration=duration, + reward=reward) + + amazon_hit_id = getattr(result_set[0], 'HITId') + return amazon_hit_id + +def get_hit_status(amazon_hit_id): + + mturk_connection = get_mturk_connection() + + hit = mturk_connection.get_hit(amazon_hit_id) + hit_status = getattr(hit[0], 'HITStatus') + + file_url = None + # Only call mturk to grab file_url if hit status is in a state + # where there possibly could be a file_url + if hit_status in ["Reviewable", "Accepted", "Rejected"]: + completed_assignments = mturk_connection.get_assignments(amazon_hit_id) + if completed_assignments: + assignment = completed_assignments[0] + file_url_object = mturk_connection.get_file_upload_url( + assignment.AssignmentId, "measure_building") + file_url = file_url_object[0].FileUploadURL + return {'hit_status': hit_status, 'file_url': file_url} + + +def approve_or_reject_hit(amazon_hit_id, approve, response_message): + """ + Given an amazon_hit_id, accept or reject the completed assignment + + Args + amazon_hit_id (string): The hit id for this amazon hit + approve (int): 0 or 1 depending on reject or approve + response message (string): The string to be used on rejection + Returns + True if succesful, false if not + Throws + A MTurkRequestError if AWS credentials are wrong, + the hit does not exist, or if the assignment is not reviewable + """ + mturk_connection = get_mturk_connection() + + assignment_id = "" + completed_assignments = mturk_connection.get_assignments(amazon_hit_id) + if completed_assignments: + assignment = completed_assignments[0] + if approve: + mturk_connection.approve_assignment(assignment.AssignmentId) + else: + mturk_connection.reject_assignment(assignment.AssignmentId, feedback=response_message) + return True + return False diff --git a/app/models/base.py b/app/models/base.py index 7b5db710f9090d32dbd9986928bc8b830b733e70..886367d7a7f76c004f84e6ab592485519d0cfd8d 100644 --- a/app/models/base.py +++ b/app/models/base.py @@ -63,89 +63,6 @@ class Model(BaseModel): id = db.Column(db.Integer, primary_key=True) - @staticmethod - def run_proc(method, schema, columns=None, limit=None, offset=None, **kwargs): - """ - Run stored proc - Args: - method (str) - Method name for stored procedure - columns (list) - DB Columns - kwargs - containing arguments for stored proc - Returns: - the results of the query, to be handled outside this function - """ - cols = '*' if not columns else ','.join(str(i) for i in columns) - - params = "" - for key, value in kwargs.items(): - params += "in_{} := '{}', ".format(key, value) - params = params[:-2] # remove last comma and space - - query = "select {} from {}.{}({})".format(cols, schema, method, params) - if limit: - query += ' limit {}'.format(limit) - if offset: - query += ' offset {}'.format(offset) - try: - results = db.session.execute(query) - db.session.commit() - except Exception as err: - raise BadRequest('Something went wrong' - ' in the database: {}'.format(str(err))) - return results - - @staticmethod - def download_to_documentservice(byte_data, building_id, address, - file_name, folder_name, tag=""): - """ - Converts inputted byte_data into proper format - and downloads using document service - - :param byte_data: The data to download in byte format - :param building_id: The building id of the file - :param file_name: The file name to download - :param folder_name: The name of the folder within a building - :param tag: Tag to add to the document - :return: the response from document service - """ - encoded_byte_data = base64.b64encode(byte_data) - encoded_string_data = encoded_byte_data.decode('utf-8') - - folder_path = "/Buildings/{}_{}/{}".format(building_id, - address, - folder_name) - post_data = { - "path": folder_path, - "data": "data:csv/plain;charset=utf-8;base64,{}".format(encoded_string_data), - "building_id": building_id, - "tags": tag, - "name": file_name - } - # Call the generic method in the base class - return services.document.post('', '/document/', data=json.dumps(post_data)) - - @staticmethod - def search_documents(path): - """ - Get a list of documents that have the inputed - building_id and are in the inputed path - - :param building_id: building_id of building - :param path: The folder path in box - :return: A list of documents that are in this path and building id - """ - url = '/document/?paths[]={}'.format(path) - response = services.document.get(url) - if response.status_code == 200: - document_list = response.json()['data'] - else: - raise InternalServerError('Failed to get documents from document' - 'service: {} {}'.format( - response.status_code, - response.reason) - ) - return document_list - class Tracked(object): """A mixin to include tracking datetime fields.""" diff --git a/app/models/turk_hit.py b/app/models/turk_hit.py index cfd5dac17bb5ed4b86a0a5dac46fda3386463ed8..cd3cd2dc597cbe73aa69bc8eecb2fccf960b52e5 100644 --- a/app/models/turk_hit.py +++ b/app/models/turk_hit.py @@ -1,371 +1,47 @@ """Models for dealing with turk hit.""" -import requests -import mimetypes -from ..lib.database import db -from .base import Model -from boto.mturk.connection import MTurkConnection -from boto.mturk.question import QuestionContent, Question, QuestionForm,\ - Overview, AnswerSpecification, FileUploadAnswer -from werkzeug.exceptions import InternalServerError -from flask import current_app -from datetime import datetime -from werkzeug.exceptions import NotFound -import json +from ..lib.database import ProcColumn, ProcTable +from .base import BaseModel -class TurkHit(Model): +class TurkHit(BaseModel): + __table_args__ = {"schema": "mechanical_turk"} - STATUS_DICT = { - 1: 'Assignable', - 2: 'Unassignable', - 3: 'Reviewable', - 4: 'Accepted', - 5: 'Rejected', - 6: 'Disposed', - 7: 'Expired' + PROCS = { + 'READ': 'get_hitinfo', + 'WRITE': 'create_hit', + 'UPDATE': 'update_hit', } - HIT_STATUS_ASSIGNABLE = 1 - def __init__(self, - building_id, - min_file_bytes, max_file_bytes, - address, max_assignments, instructions_url, - instructions_text, worksheet_url, title, - description, keywords, duration, reward - ): - # The building_id associated with this building + __table__ = ProcTable( + 'TurkHit', + ProcColumn('db_id'), + ProcColumn('building_id'), + ProcColumn('amazon_hit_id'), + ProcColumn('status_id'), + ProcColumn('hit_date'), + ProcColumn('requester_name'), + ProcColumn('csv_document_key'), + ProcColumn('shapefile_document_key'), + ProcColumn('response_message'), + ) + + def __init__(self, db_id, building_id, amazon_hit_id, status_id, + hit_date, requester_name, csv_document_key, + shapefile_document_key, response_message + ): + + self.db_id = db_id self.building_id = building_id - - # The min and max size of the upload file - self.min_file_bytes = min_file_bytes - self.max_file_bytes = max_file_bytes - - # Used to create the mechanical turk QuestionForm - self.address = address - self.instructions_url = instructions_url - self.instructions_text = instructions_text - self.worksheet_url = worksheet_url - - # Used for the rest of the mechanical turk hit creation parameters - self.max_assignments = max_assignments - self.title = title - self.description = description - self.keywords = keywords - self.duration = duration - self.reward = reward + self.amazon_hit_id = amazon_hit_id + self.status_id = status_id + self.hit_date = hit_date + self.requester_name = requester_name + self.csv_document_key = csv_document_key + self.shapefile_document_key = shapefile_document_key + self.response_message = response_message def __str__(self): - return "Turk Call for Building: {}".format(self.building_id) - - @staticmethod - def get_mturk_connection(): - AWS_ACCESS_KEY = current_app.config.get('AWS_ACCESS_KEY') - AWS_SECRET_ACCESS_KEY = current_app.config.get('AWS_SECRET_ACCESS_KEY') - HOST = current_app.config.get('MECH_TURK_HOST') - if not AWS_ACCESS_KEY or not AWS_SECRET_ACCESS_KEY or not HOST: - raise ValueError("AWS keys or HOST are empty in the config file") - - return MTurkConnection( - aws_access_key_id=AWS_ACCESS_KEY, - aws_secret_access_key=AWS_SECRET_ACCESS_KEY, - host=HOST - ) - - def send_hit(self): - """ - Send a mechanical turk hit - - :return: A dict containing the HITId - """ - mturk_connection = TurkHit.get_mturk_connection() - - # Overview - overview = Overview() - overview.append_field('Title', self.address) - - # Instructions - qc1 = QuestionContent() - qc1.append_field('Title', 'Instructions') - qc1.append_field('Text', self.instructions_text) - qc1.append_field('Text', self.instructions_url) - qc1.append_field('Title', 'Worksheet') - qc1.append_field('Text', self.worksheet_url) - - file_upload = FileUploadAnswer( - self.min_file_bytes, self.max_file_bytes) - - question = Question(identifier='measure_building', - content=qc1, - answer_spec=AnswerSpecification(file_upload), - is_required=True) - - # Question Form - question_form = QuestionForm() - question_form.append(overview) - question_form.append(question) - - # Creates a new HIT - result_set = mturk_connection.create_hit( - questions=question_form, - max_assignments=self.max_assignments, - title=self.title, - description=self.description, - keywords=self.keywords, - duration=self.duration, - reward=self.reward) - - # Store hit_id in database - hit_id = getattr(result_set[0], 'HITId') - Model.run_proc("create_hit", 'mechanical_turk', **{ - "building_id": self.building_id, - "amazon_hit_id": hit_id, - "status_id": self.HIT_STATUS_ASSIGNABLE - }) - - return {**self.__dict__, **{'hit_id': hit_id}, - **{'status': 'Assignable'}} - - @staticmethod - def get_hit_status(building_id): - """ - Retrieves the status of a turk_hit given the building_id - Also retrieves all of the buildin dimension files in box - that are associated with this building_id - If the hit is Reviewable for the first time, download the file - - :param building_id: building_id of building - :return: Turk HIT status and URL to download completed HIT - Turk HIT status can be: - Assignable (hit has been created but no worker has started on it) - Unassignable (someone is working on the hit) - Reviewable (hit expired) - Accepted (hit accepted) - Rejected - Diposed - - """ - # Get the address associated with this building_id - results = Model.run_proc("get_building", 'public', - columns=['address'], - **{"building_id": building_id}) - data = results.fetchone() - if not data: - raise NotFound('The inputted buildingId was' - ' not found in the database') - address = data[0] - # The path in box where we will search for documents to return - box_path = "/Buildings/{}_{}/Building_Dimensions".format(building_id, - address) - - stored_hit_status = TurkHit.get_stored_hit_status(building_id) - - # If we know the stored_hit_status is accept or reject we return here - if stored_hit_status == "Accepted" or stored_hit_status == "Rejected": - document_list = Model.search_documents(box_path) - return {'status': stored_hit_status, - 'box_building_list': document_list} - - mturk_connection = TurkHit.get_mturk_connection() - # Retrieve the HITId from database using the building_id - hit_id = TurkHit.get_hid_from_bid(building_id) - - hit = mturk_connection.get_hit(hit_id) - new_hit_status = getattr(hit[0], 'HITStatus') - - file_url = "" - # For now we are indexing into the first assignment because we - # don't expect to see multiple assignments per hit - completed_assignments = mturk_connection.get_assignments(hit_id) - if completed_assignments: - assignment = completed_assignments[0] - file_url_object = mturk_connection.get_file_upload_url( - assignment.AssignmentId, "measure_building") - file_url = file_url_object[0].FileUploadURL - - # Hit has expired if there are no URLs and it's in reviewable state - if new_hit_status == "Reviewable" and not file_url: - new_hit_status = "Expired" - - - box_url = '' - box_download_url = '' - # If we are newly in a reviewable state we can download the document - if (new_hit_status == "Reviewable" and - stored_hit_status != "Reviewable"): - for i in range(5): - box_response = TurkHit.download_hit_file(building_id, - address, - hit_id, - file_url) - if box_response.status_code == 201: - break - print("Trying to connect to box... " + str(i)) - # If the document fails to download to box, return an error - if box_response.status_code != 201: - raise InternalServerError('Failed to download hit to' - ' box. Try reloading.') - - if new_hit_status != stored_hit_status: - if not TurkHit.update_stored_hit_status(building_id, - new_hit_status): - raise InternalServerError('Failed to update hit in' - ' the backend database.') - - document_list = Model.search_documents(box_path) - return {'status': new_hit_status, - 'box_building_list': document_list} - - @staticmethod - def download_hit_file(building_id, address, hit_id, file_url): - """ - Given a building_id, download the mechanical turk - hit document using the document service. - - Also checks to make sure the file is .xlsx format - - :param building_id: The building_id of the building - :return: The URL of the file downloaded - """ - - response = requests.get(file_url) - - # The Content-Type value just returns binary/octet, so we - # need to guess the actually content_type from the file name - # Content-Disposation returns a string in the format: - # Content-Disposition: attachment; filename="test.xlsx" - uploaded_fname = response.headers['Content-Disposition'].split('"')[1] - content_type = mimetypes.guess_type(uploaded_fname)[0] - # Make sure the content_type is in .xlsx format - if content_type != 'application/vnd.openxmlformats-' \ - 'officedocument.spreadsheetml.sheet': - # TODO: Automatically Reject the HIT if incorrect file format - pass - - byte_data = response.content - file_name = "BuildingDimensions--{}--{}.xlsx".format(building_id, - hit_id) - box_response = Model.download_to_documentservice(byte_data, - building_id, - address, - file_name, - "Building_Dimensions") - return box_response - - @staticmethod - def accept_hit(building_id): - """ - Approve all of the assignments of the hit associated - with the inputted building_id - - :param building_id: The building_id of the building - :return: The assignment ID that was accepted and status - :throws: MTurkRequestError if the hit was not found or - is not in the correct state to be approved (Reviewable) - """ - mturk_connection = TurkHit.get_mturk_connection() - - hit_id = TurkHit.get_hid_from_bid(building_id) - - assignment_id = "" - completed_assignments = mturk_connection.get_assignments(hit_id) - if completed_assignments: - assignment = completed_assignments[0] - # assignment.AssignmentStatus returns the status of the assignment - mturk_connection.approve_assignment(assignment.AssignmentId) - assignment_id = assignment.AssignmentId - # From the AWS docs: - # A successful request for the ApproveAssignment operation - # returns with no errors. - - # Update status in database to reflect approved status - TurkHit.update_stored_hit_status(building_id, "Accepted") - return {"assignment_id": assignment_id, 'hit_status': 'Accepted'} - - @staticmethod - def reject_hit(building_id): - """ - Given a building_id, reject the completed assignment - - :param building_id: The building_id of the building - :return: The assignmentID that was rejected and the status - :throws: A MTurkRequestError if AWS credentials are wrong, - the hit does not exist, or if the assignment is not reviewable - """ - mturk_connection = TurkHit.get_mturk_connection() - - hit_id = TurkHit.get_hid_from_bid(building_id) - - assignment_id = "" - completed_assignments = mturk_connection.get_assignments(hit_id) - if completed_assignments: - assignment = completed_assignments[0] - mturk_connection.reject_assignment(assignment.AssignmentId) - assignment_id = assignment.AssignmentId - # Update status in database to reflect rejected status - TurkHit.update_stored_hit_status(building_id, "Rejected") - # TODO: Remvoe document from box - return {"assignment_id": assignment_id, 'hit_status': 'Rejected'} - - @staticmethod - def get_hid_from_bid(building_id): - """ - Get the hit ID associated with inputted building ID - - :param building_id: The building ID - :return: The HIT ID or None if such a hitID does not exist - """ - results = Model.run_proc("get_hit_id", "mechanical_turk", - **{"building_id": building_id}) - # Return the first value returned by the query - data = results.fetchone() - if data: - return data[0] - return None - - @staticmethod - def get_stored_hit_status(building_id): - """ - Get the HIT status that is stored in the database - - :param building_id: The building ID - :return: the HIT status - """ - results = Model.run_proc("get_hit_status", "mechanical_turk", - **{"building_id": building_id}) - - status = "" - - # Get the first value returned by the query - data = results.fetchone() - if not data: - return status - - status_id = data[0] - - if TurkHit.STATUS_DICT.get(status_id): - status = TurkHit.STATUS_DICT[status_id] - - return status - - @staticmethod - def update_stored_hit_status(building_id, hit_status): - """ - Update the HIT status associated with the building_id - - :param building_id: The building ID - :param hit_status: The integer corresponding to the hit status - :return: the updated HIT status - """ - hit_status_id = None - for dict_status_id, dict_status in TurkHit.STATUS_DICT.items(): - if dict_status == hit_status: - hit_status_id = dict_status_id - if not hit_status_id: - return None + return "Turk Hit for building {} with dict: {}".format(self.building_id, + self.get_dictionary()) - results = Model.run_proc("update_hit_status", - "mechanical_turk", - **{"building_id": building_id, - "status_id": hit_status_id}) - return results diff --git a/app/views/turk_hit.py b/app/views/turk_hit.py index 2aea9b27516e341d856155395fcf114df23bf33f..0190a07c6d7c3dca6fea002362da178a6b58a694 100644 --- a/app/views/turk_hit.py +++ b/app/views/turk_hit.py @@ -19,14 +19,18 @@ class TurkHitView(RestView): def get(self, id_): """/{id} GET - Retrieve the hit status by building id.""" try: - response = self.get_controller().get(id_, None) + response = self.get_controller().get(id_, request.args) # Catch an MTurkRequestError (usually AWS) and return error code except MTurkRequestError as error: # If the HITId does not exist return 404 if error.error_code == 'AWS.MechanicalTurk.HITDoesNotExist': raise NotFound(error.error_code) raise BadGateway(error.error_code) - return self.json(response) + return self.json({ + 'data': [ + self.parse(m) for m in response + ] + }) def post(self): """/ POST - Create a hit given an id in the POST body""" @@ -37,8 +41,9 @@ class TurkHitView(RestView): except MTurkRequestError as error: raise BadGateway(error.error_code) - return self.json(response, 201) - + return self.json({ + 'data': self.parse(response) + }, 201) def put(self, id_): """ / PUT - Approve or decline a hit given a building_id. @@ -56,7 +61,10 @@ class TurkHitView(RestView): raise BadRequest(error.error_code) raise BadGateway(error.error_code) - return self.json(response) + return self.json({ + 'data': self.parse(response) + + }) def delete(self, id_): raise MethodNotAllowed() diff --git a/docs/API/turkhit.md b/docs/API/turkhit.md index 7adb033104d119a12f8fb0c385dc31bbf9539a64..9674f5b7b01ce3e08a851a46a51ca94d064303cd 100644 --- a/docs/API/turkhit.md +++ b/docs/API/turkhit.md @@ -8,7 +8,7 @@ ---- ## Get Hit Status -Get the hit status of the currently active mech turk hit associated with the inputted building_id and the file data of all of the mech turk box files associated with the inputted building_id. If the hit is newly completed download the file to box. +Get the data of all mech turk hit associated with the inputted building_id. If the newest hit is newly completed download the file to box. * **URL** /turkhit/:building_id/ @@ -23,41 +23,33 @@ Get the hit status of the currently active mech turk hit associated with the inp **Content:** ``` -"box_building_list": [ +"data": [ { - "box_id": 130364791242, - "building_id": "12345", - "content_type": "csv/plain;charset=utf-8", - "created": "2017-02-01T12:57:52.241794+00:00", - "id": 2, - "key": "36c7d0a9-8357-42dc-bcf0-6423791014b5", - "name": "BuildingDimensions2017-02-01 12:57:48.151547.xlsx", - "path": "/Buildings/12345_120 EAST 37 STREET/Building_Dimensions", - "tags": "", - "updated": null, - "url_box": "https://blocpower.box.com/s/fz4u5iyl0uizbz0xsdfscetbdz82h7s8", - "url_download": "https://blocpower.box.com/shared/static/fz4u5iyl0uizbz0xsdfscetbdz82h7s8.xlsx" + "amazon_hit_id": "3JMQI2OLFZ1MUYF5YFY5JGD6PZWDNL", + "building_id": 180361, + "csv_document_key": "4af4c35f-40f6-4af9-85a2-80e34e137d58", + "db_id": 21, + "hit_date": "Thu, 02 Mar 2017 15:27:25 GMT", + "requester_name": "no name", + "response_message": null, + "shapefile_document_key": null, + "status_id": 4 }, { - "box_id": 130403024306, - "building_id": "12345", - "content_type": "csv/plain;charset=utf-8", - "created": "2017-02-01T14:17:10.684649+00:00", - "id": 25, - "key": "c07a978a-d86f-4d87-b21a-db3512617982", - "name": "BuildingDimensions2017-02-01 14:17:06.179782.xlsx", - "path": "/Buildings/12345_120 EAST 37 STREET/Building_Dimensions", - "tags": "", - "updated": null, - "url_box": "https://blocpower.box.com/s/ibj0t8uh79e0mao1rq9vmb2qusuwpuug", - "url_download": "https://blocpower.box.com/shared/static/ibj0t8uh79e0mao1rq9vmb2qusuwpuug.xlsx" + "amazon_hit_id": "3YGE63DIN8TII8NPE41X0S6EOLK0WX", + "building_id": 180361, + "csv_document_key": "46985658-bf1a-4320-997c-cfde3c9f5ba3", + "db_id": 19, + "hit_date": "Wed, 01 Mar 2017 22:03:51 GMT", + "requester_name": "no name", + "response_message": "Rejection message test", + "shapefile_document_key": "None", + "status_id": 5 }, ... ... ... -], -"error": "", -"status": "Reviewable" +] ``` @@ -72,22 +64,16 @@ Get the hit status of the currently active mech turk hit associated with the inp * **Notes:** -2/7/17 - **BUG:** Inputting a building\_id that is non integer returns a 500 internal server error. - -2/7/17 - **BUG:** Inputting a building\_id that does not exist in the database returns a 500 internal server error. - -2/7/17 - If there is some error in the GET function but there is other information that needs to be returned (files that have already been downloaded, hit status, etc.) then the error will be returned in the 'error' field and the front end must handle it on its own. Possible values for this error include _'Failed to update the hit in the backend database'_ and _'Failed to download the hit to box. Try reloading'_. - 2/7/17 - This endpoint does a lot more than just get the hit status. Here is the flow: ``` -1. Get the hit status from the database. -2. If the database hit status is Accepted or Rejected, return this status with a list of box files associated with this hit. +1. Get all hits from the database. +2. If the database hit status of the most recent hit is Accepted or Rejected, return all hits. 3. Get the hit status from amazon mechanical turk API. 4. Get the URL of the completed hit (it will be empty if the hit is in the Assignable, Unassignable or Expired state). 5. Download the hit to box if we are newly in the Reviewable state. -6. Assuming the download succeeded, update the database with the new state. -7. Return the hit status and all of the box files associated with this hit. +6. Assuming the download succeeded, update the database with the new data. +7. Return all hits. ``` ---- @@ -111,6 +97,7 @@ Create a mechanical turk hit with the inputted parameters ``` building_id=[integer] address=[string] +requester_name=[string] max_file_bytes=[integer] min_file_bytes=[integer] instructions_text=[string] @@ -131,21 +118,18 @@ reward=[string] **Content:** ``` -"address": "some address 2", -"building_id": 12345, -"description": "some description", -"duration": 300, -"hit_id": "39WICJI5ATOITVRR8E3N1UY38SJZ3E", -"instructions_text": "We need to get building dimensions for buildings using Google Earth. ", -"instructions_url": "http://beta.blocpower.us/dimensions/BuildingDimensionsInstructions.pdf", -"keywords": "some keyword", -"max_assignments": 1, -"max_file_bytes": 80000, -"min_file_bytes": 1, -"reward": ".26", -"status": "Assignable", -"title": "some title", -"worksheet_url": "some worksheet url" +"data": { + + "amazon_hit_id": "3JMQI2OLFZ1MUYF5YFY5JGD6PZWDNL", + "building_id": 180361, + "csv_document_key": null, + "db_id": 21, + "hit_date": "Thu, 02 Mar 2017 15:27:25 GMT", + "requester_name": "Alessandro DiMarco", + "response_message": null, + "shapefile_document_key": null, + "status_id": 1 +} ``` * **Error Response:** @@ -161,18 +145,15 @@ reward=[string] * **Notes:** -2/7/17 - **BUG:** Inputting a building_id that doesn't exist in the database raises a 500 internal server error - - ---- ## Approve Hit -Approve or reject the mech turk hit associated with the inputted building_id +Approve or reject the mech turk hit associated * **URL** - /building/:building_id/ + /building/:db_id/ * **Method:** @@ -184,7 +165,16 @@ Approve or reject the mech turk hit associated with the inputted building_id **Required:** ``` +db_id=[integer]: Must be the same as in the URI accept=[integer]: 1 to approve, 0 to reject +response_message=[string]: Required if accept = 0 +amazon_hit_id +building_id, +csv_document_key, +hit_date:, +requester_name, +shapefile_document_key +status_id ``` * **Success Response:** @@ -194,8 +184,18 @@ accept=[integer]: 1 to approve, 0 to reject **Content:** ``` -"assignment_id": "373ERPL3YP4HD51O3EK2MOVI603TRH", -"hit_status": "Accepted" +"data": { + + "amazon_hit_id": "3JMQI2OLFZ1MUYF5YFY5JGD6PZWDNL", + "building_id": 180361, + "csv_document_key": null, + "db_id": 21, + "hit_date": "Thu, 02 Mar 2017 15:27:25 GMT", + "requester_name": "Alessandro DiMarco", + "response_message": null, + "shapefile_document_key": null, + "status_id": 2 +} ``` * **Error Response:** @@ -216,5 +216,3 @@ accept=[integer]: 1 to approve, 0 to reject **Content:** `{ error : "ValueError: AWS keys or HOST are empty in the config file" }` * **Notes:** - -2/7/17 - **BUG:** Inputting a building_id that doesn't exist in the database raises a 500 internal server error