Source code for grandchallenge.evaluation.models

from json import dumps
from pathlib import Path
from urllib.parse import parse_qs, urljoin, urlparse

from django.conf import settings
from django.contrib.auth.models import Group
from django.core.exceptions import ObjectDoesNotExist, ValidationError
from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.validators import RegexValidator
from django.db import models
from django.utils.text import get_valid_filename
from django_extensions.db.fields import AutoSlugField
from guardian.shortcuts import assign_perm, remove_perm

from grandchallenge.algorithms.models import AlgorithmImage
from grandchallenge.algorithms.tasks import (
    create_algorithm_jobs_for_evaluation,
)
from grandchallenge.archives.models import Archive
from grandchallenge.challenges.models import Challenge
from grandchallenge.components.backends.docker import Executor, put_file
from grandchallenge.components.models import (
    ComponentImage,
    ComponentInterface,
    ComponentInterfaceValue,
    ComponentJob,
)
from grandchallenge.core.models import UUIDModel
from grandchallenge.core.storage import protected_s3_storage, public_s3_storage
from grandchallenge.core.validators import (
    ExtensionValidator,
    JSONSchemaValidator,
    MimeTypeValidator,
    get_file_mimetype,
)
from grandchallenge.evaluation.emails import (
    send_failed_evaluation_email,
    send_successful_evaluation_email,
)
from grandchallenge.evaluation.tasks import calculate_ranks
from grandchallenge.subdomains.utils import reverse

EXTRA_RESULT_COLUMNS_SCHEMA = {
    "definitions": {},
    "$schema": "http://json-schema.org/draft-06/schema#",
    "type": "array",
    "title": "The Extra Results Columns Schema",
    "items": {
        "$id": "#/items",
        "type": "object",
        "title": "The Items Schema",
        "required": ["title", "path", "order"],
        "additionalProperties": False,
        "properties": {
            "title": {
                "$id": "#/items/properties/title",
                "type": "string",
                "title": "The Title Schema",
                "default": "",
                "examples": ["Mean Dice"],
                "pattern": "^(.*)$",
            },
            "path": {
                "$id": "#/items/properties/path",
                "type": "string",
                "title": "The Path Schema",
                "default": "",
                "examples": ["aggregates.dice.mean"],
                "pattern": "^(.*)$",
            },
            "error_path": {
                "$id": "#/items/properties/error_path",
                "type": "string",
                "title": "The Error Path Schema",
                "default": "",
                "examples": ["aggregates.dice.std"],
                "pattern": "^(.*)$",
            },
            "order": {
                "$id": "#/items/properties/order",
                "type": "string",
                "enum": ["asc", "desc"],
                "title": "The Order Schema",
                "default": "",
                "examples": ["asc"],
                "pattern": "^(asc|desc)$",
            },
        },
    },
}

OBSERVABLE_URL_VALIDATOR = RegexValidator(
    r"^https\:\/\/observablehq\.com\/embed\/\@[^\/]+\/[^\?\.]+\?cell\=.*$",
    "URL must be of the form https://observablehq.com/embed/@user/notebook?cell=*",
)


[docs]class Phase(UUIDModel): # This must match the syntax used in jquery datatables # https://datatables.net/reference/option/order ASCENDING = "asc" DESCENDING = "desc" EVALUATION_SCORE_SORT_CHOICES = ( (ASCENDING, "Ascending"), (DESCENDING, "Descending"), ) OFF = "off" OPTIONAL = "opt" REQUIRED = "req" PUBLICATION_LINK_CHOICES = SUPPLEMENTARY_FILE_CHOICES = ( (OFF, "Off"), (OPTIONAL, "Optional"), (REQUIRED, "Required"), ) ALL = "all" MOST_RECENT = "rec" BEST = "bst" RESULT_DISPLAY_CHOICES = ( (ALL, "Display all results"), (MOST_RECENT, "Only display each users most recent result"), (BEST, "Only display each users best result"), ) ABSOLUTE = "abs" MEAN = "avg" MEDIAN = "med" SCORING_CHOICES = ( (ABSOLUTE, "Use the absolute value of the score column"), ( MEAN, "Use the mean of the relative ranks of the score and extra result columns", ), ( MEDIAN, "Use the median of the relative ranks of the score and extra result columns", ), )
[docs] class SubmissionKind(models.IntegerChoices): CSV = 1, "CSV" ZIP = 2, "ZIP" ALGORITHM = 3, "Algorithm"
challenge = models.ForeignKey( Challenge, on_delete=models.CASCADE, editable=False, ) archive = models.ForeignKey( Archive, on_delete=models.SET_NULL, null=True, blank=True, help_text=( "Which archive should be used as the source dataset for this " "phase?" ), ) title = models.CharField( max_length=64, help_text="The title of this phase.", default="Challenge", ) slug = AutoSlugField(populate_from="title", max_length=64) score_title = models.CharField( max_length=32, blank=False, default="Score", help_text=( "The name that will be displayed for the scores column, for " "instance: Score (log-loss)" ), ) score_jsonpath = models.CharField( max_length=255, blank=True, help_text=( "The jsonpath of the field in metrics.json that will be used " "for the overall scores on the results page. See " "http://goessner.net/articles/JsonPath/ for syntax. For example: " "dice.mean" ), ) score_error_jsonpath = models.CharField( max_length=255, blank=True, help_text=( "The jsonpath for the field in metrics.json that contains the " "error of the score, eg: dice.std" ), ) score_default_sort = models.CharField( max_length=4, choices=EVALUATION_SCORE_SORT_CHOICES, default=DESCENDING, help_text=( "The default sorting to use for the scores on the results page." ), ) score_decimal_places = models.PositiveSmallIntegerField( blank=False, default=4, help_text=("The number of decimal places to display for the score"), ) extra_results_columns = models.JSONField( default=list, blank=True, help_text=( "A JSON object that contains the extra columns from metrics.json " "that will be displayed on the results page. " ), validators=[JSONSchemaValidator(schema=EXTRA_RESULT_COLUMNS_SCHEMA)], ) scoring_method_choice = models.CharField( max_length=3, choices=SCORING_CHOICES, default=ABSOLUTE, help_text=("How should the rank of each result be calculated?"), ) result_display_choice = models.CharField( max_length=3, choices=RESULT_DISPLAY_CHOICES, default=ALL, help_text=("Which results should be displayed on the leaderboard?"), ) creator_must_be_verified = models.BooleanField( default=False, help_text=( "If True, only participants with verified accounts can make " "submissions to this phase" ), ) submission_kind = models.PositiveSmallIntegerField( default=SubmissionKind.CSV, choices=SubmissionKind.choices, help_text=( "Should participants submit a .csv/.zip file of predictions, " "or an algorithm?" ), ) allow_submission_comments = models.BooleanField( default=False, help_text=( "Allow users to submit comments as part of their submission." ), ) display_submission_comments = models.BooleanField( default=False, help_text=( "If true, submission comments are shown on the results page." ), ) supplementary_file_choice = models.CharField( max_length=3, choices=SUPPLEMENTARY_FILE_CHOICES, default=OFF, help_text=( "Show a supplementary file field on the submissions page so that " "users can upload an additional file along with their predictions " "file as part of their submission (eg, include a pdf description " "of their method). Off turns this feature off, Optional means " "that including the file is optional for the user, Required means " "that the user must upload a supplementary file." ), ) supplementary_file_label = models.CharField( max_length=32, blank=True, default="Supplementary File", help_text=( "The label that will be used on the submission and results page " "for the supplementary file. For example: Algorithm Description." ), ) supplementary_file_help_text = models.CharField( max_length=128, blank=True, default="", help_text=( "The help text to include on the submissions page to describe the " 'submissions file. Eg: "A PDF description of the method.".' ), ) show_supplementary_file_link = models.BooleanField( default=False, help_text=( "Show a link to download the supplementary file on the results " "page." ), ) publication_url_choice = models.CharField( max_length=3, choices=PUBLICATION_LINK_CHOICES, default=OFF, help_text=( "Show a publication url field on the submission page so that " "users can submit a link to a publication that corresponds to " "their submission. Off turns this feature off, Optional means " "that including the url is optional for the user, Required means " "that the user must provide an url." ), ) show_publication_url = models.BooleanField( default=False, help_text=("Show a link to the publication on the results page"), ) daily_submission_limit = models.PositiveIntegerField( default=10, help_text=( "The limit on the number of times that a user can make a " "submission in a 24 hour period." ), ) submissions_open = models.DateTimeField( null=True, blank=True, help_text=( "If set, participants will not be able to make submissions to " "this phase before this time." ), ) submissions_close = models.DateTimeField( null=True, blank=True, help_text=( "If set, participants will not be able to make submissions to " "this phase after this time." ), ) submission_page_html = models.TextField( help_text=( "HTML to include on the submission page for this challenge." ), blank=True, ) auto_publish_new_results = models.BooleanField( default=True, help_text=( "If true, new results are automatically made public. If false, " "the challenge administrator must manually publish each new " "result." ), ) display_all_metrics = models.BooleanField( default=True, help_text=( "Should all of the metrics be displayed on the Result detail page?" ), ) evaluation_detail_observable_url = models.URLField( blank=True, validators=[OBSERVABLE_URL_VALIDATOR], max_length=2000, help_text=( "The URL of the embeddable observable notebook for viewing " "individual results. Must be of the form " "https://observablehq.com/embed/@user/notebook?cell=..." ), ) evaluation_comparison_observable_url = models.URLField( blank=True, validators=[OBSERVABLE_URL_VALIDATOR], max_length=2000, help_text=( "The URL of the embeddable observable notebook for comparing" "results. Must be of the form " "https://observablehq.com/embed/@user/notebook?cell=..." ), ) inputs = models.ManyToManyField( to=ComponentInterface, related_name="evaluation_inputs" ) outputs = models.ManyToManyField( to=ComponentInterface, related_name="evaluation_outputs" ) class Meta: unique_together = ( ("challenge", "title"), ("challenge", "slug"), ) ordering = ("challenge", "submissions_open", "created") permissions = (("create_phase_submission", "Create Phase Submission"),) def __str__(self): return f"{self.title} Evaluation for {self.challenge.short_name}"
[docs] def save(self, *args, **kwargs): adding = self._state.adding super().save(*args, **kwargs) if adding: self.set_default_interfaces() self.assign_permissions() calculate_ranks.apply_async(kwargs={"phase_pk": self.pk})
def set_default_interfaces(self): self.inputs.set( [ComponentInterface.objects.get(slug="predictions-csv-file")] ) self.outputs.set( [ComponentInterface.objects.get(slug="metrics-json-file")] ) def assign_permissions(self): assign_perm("view_phase", self.challenge.admins_group, self) assign_perm("change_phase", self.challenge.admins_group, self) assign_perm( "create_phase_submission", self.challenge.admins_group, self ) assign_perm( "create_phase_submission", self.challenge.participants_group, self ) def get_absolute_url(self): return reverse( "pages:home", kwargs={"challenge_short_name": self.challenge.short_name}, ) def get_observable_url(self, view_kind, url_kind): if view_kind == "detail": url = self.evaluation_detail_observable_url elif view_kind == "comparison": url = self.evaluation_comparison_observable_url else: raise ValueError("View or notebook not found") if not url: return "", [] parsed_url = urlparse(url) cells = parse_qs(parsed_url.query)["cell"] url = f"{urljoin(url, parsed_url.path)}" if url_kind == "js": url = url.replace( "https://observablehq.com/embed/", "https://api.observablehq.com/", ) url += ".js?v=3" elif url_kind == "edit": url = url.replace( "https://observablehq.com/embed/", "https://observablehq.com/" ) else: raise ValueError("URL kind must be one of edit or js") return url, cells @property def observable_detail_edit_url(self): url, _ = self.get_observable_url(view_kind="detail", url_kind="edit") return url @property def observable_comparison_edit_url(self): url, _ = self.get_observable_url( view_kind="comparison", url_kind="edit" ) return url
class Method(UUIDModel, ComponentImage): """Store the methods for performing an evaluation.""" phase = models.ForeignKey(Phase, on_delete=models.CASCADE, null=True) def save(self, *args, **kwargs): adding = self._state.adding super().save(*args, **kwargs) if adding: self.assign_permissions() def assign_permissions(self): assign_perm("view_method", self.phase.challenge.admins_group, self) def get_absolute_url(self): return reverse( "evaluation:method-detail", kwargs={ "pk": self.pk, "challenge_short_name": self.phase.challenge.short_name, }, ) def submission_file_path(instance, filename): # Must match the protected serving url return ( f"{settings.EVALUATION_FILES_SUBDIRECTORY}/" f"{instance.phase.challenge.pk}/" f"submissions/" f"{instance.creator.pk}/" f"{instance.pk}/" f"{get_valid_filename(filename)}" ) def submission_supplementary_file_path(instance, filename): return ( f"evaluation-supplementary/" f"{instance.phase.challenge.pk}/" f"{instance.pk}/" f"{get_valid_filename(filename)}" ) class Submission(UUIDModel): """Store files for evaluation.""" creator = models.ForeignKey( settings.AUTH_USER_MODEL, null=True, on_delete=models.SET_NULL ) creators_ip = models.GenericIPAddressField( null=True, default=None, editable=False ) creators_user_agent = models.TextField( blank=True, default="", editable=False ) phase = models.ForeignKey(Phase, on_delete=models.CASCADE, null=True) algorithm_image = models.ForeignKey( AlgorithmImage, null=True, on_delete=models.SET_NULL ) predictions_file = models.FileField( upload_to=submission_file_path, validators=[ MimeTypeValidator(allowed_types=("application/zip", "text/plain")), ExtensionValidator(allowed_extensions=(".zip", ".csv")), ], storage=protected_s3_storage, blank=True, ) supplementary_file = models.FileField( upload_to=submission_supplementary_file_path, storage=public_s3_storage, validators=[ MimeTypeValidator(allowed_types=("text/plain", "application/pdf")) ], blank=True, ) comment = models.CharField( max_length=128, blank=True, default="", help_text=( "You can add a comment here to help you keep track of your " "submissions." ), ) publication_url = models.URLField( blank=True, help_text=( "A URL for the publication associated with this submission." ), ) class Meta: unique_together = (("phase", "predictions_file", "algorithm_image"),) def save(self, *args, **kwargs): adding = self._state.adding super().save(*args, **kwargs) if adding: self.create_evaluation() self.assign_permissions() def assign_permissions(self): assign_perm("view_submission", self.phase.challenge.admins_group, self) assign_perm("view_submission", self.creator, self) def create_evaluation(self): method = self.latest_ready_method if not method: # TODO Email admins return evaluation = Evaluation.objects.create(submission=self, method=method) if self.algorithm_image: create_algorithm_jobs_for_evaluation.apply_async( kwargs={"evaluation_pk": evaluation.pk} ) else: mimetype = get_file_mimetype(self.predictions_file) if mimetype == "application/zip": interface = ComponentInterface.objects.get( slug="predictions-zip-file" ) elif mimetype == "text/plain": interface = ComponentInterface.objects.get( slug="predictions-csv-file" ) else: raise NotImplementedError( f"Interface is not defined for {mimetype} files" ) evaluation.inputs.set( [ ComponentInterfaceValue.objects.create( interface=interface, file=self.predictions_file ) ] ) evaluation.signature.apply_async() @property def latest_ready_method(self): return ( Method.objects.filter(phase=self.phase, ready=True) .order_by("-created") .first() ) def get_absolute_url(self): return reverse( "evaluation:submission-detail", kwargs={ "pk": self.pk, "challenge_short_name": self.phase.challenge.short_name, }, ) class SubmissionEvaluator(Executor): def __init__(self, *args, **kwargs): super().__init__( *args, results_file=Path("/output/metrics.json"), **kwargs ) def _copy_input_files(self, writer): for file in self._input_files: dest_file = "/tmp/submission-src" put_file(container=writer, src=file, dest=dest_file) if hasattr(file, "content_type"): mimetype = file.content_type else: with file.open("rb") as f: mimetype = get_file_mimetype(f) if mimetype.lower() == "application/zip": # Unzip the file in the container rather than in the python # process. With resource limits this should provide some # protection against zip bombs etc. writer.exec_run( f"unzip {dest_file} -d /input/ -x '__MACOSX/*'" ) # Remove a duplicated directory input_files = ( writer.exec_run("ls -1 /input/") .output.decode() .splitlines() ) if ( len(input_files) == 1 and not writer.exec_run( f"ls -d /input/{input_files[0]}/" ).exit_code ): writer.exec_run( f'/bin/sh -c "mv /input/{input_files[0]}/* /input/ ' f'&& rm -r /input/{input_files[0]}/"' ) elif mimetype.lower() == "application/json": writer.exec_run(f"mv {dest_file} /input/predictions.json") else: # Not a zip file, so must be a csv writer.exec_run(f"mv {dest_file} /input/submission.csv") class Evaluation(UUIDModel, ComponentJob): """Stores information about a evaluation for a given submission.""" submission = models.ForeignKey("Submission", on_delete=models.CASCADE) method = models.ForeignKey("Method", on_delete=models.CASCADE) published = models.BooleanField(default=True) rank = models.PositiveIntegerField( default=0, help_text=( "The position of this result on the leaderboard. If the value is " "zero, then the result is unranked." ), ) rank_score = models.FloatField(default=0.0) rank_per_metric = models.JSONField(default=dict) def save(self, *args, **kwargs): adding = self._state.adding if adding: self.published = self.submission.phase.auto_publish_new_results super().save(*args, **kwargs) self.assign_permissions() calculate_ranks.apply_async( kwargs={"phase_pk": self.submission.phase.pk} ) @property def title(self): return f"#{self.rank} {self.submission.creator.username}" def assign_permissions(self): admins_group = self.submission.phase.challenge.admins_group assign_perm("view_evaluation", admins_group, self) assign_perm("change_evaluation", admins_group, self) if self.submission.phase.challenge.hidden: viewer_group = self.submission.phase.challenge.participants_group non_viewer_group = Group.objects.get( name=settings.REGISTERED_AND_ANON_USERS_GROUP_NAME ) else: viewer_group = Group.objects.get( name=settings.REGISTERED_AND_ANON_USERS_GROUP_NAME ) non_viewer_group = ( self.submission.phase.challenge.participants_group ) if self.published: assign_perm("view_evaluation", viewer_group, self) else: remove_perm("view_evaluation", viewer_group, self) remove_perm("view_evaluation", non_viewer_group, self) @property def container(self): return self.method @property def input_files(self): try: return [ SimpleUploadedFile( "predictions.json", dumps( self.inputs.get( interface__title="Predictions JSON File" ).value ).encode("utf-8"), content_type="application/json", ) ] except ObjectDoesNotExist: return [inpt.file for inpt in self.inputs.all()] @property def executor_cls(self): return SubmissionEvaluator def create_result(self, *, result: dict): interface = ComponentInterface.objects.get(slug="metrics-json-file") try: output_civ = self.outputs.get(interface=interface) output_civ.value = result output_civ.save() except ObjectDoesNotExist: output_civ = ComponentInterfaceValue.objects.create( interface=interface, value=result ) self.outputs.add(output_civ) send_successful_evaluation_email(self) def clean(self): if self.submission.phase != self.method.phase: raise ValidationError( "The submission and method phases should" "be the same. You are trying to evaluate a" f"submission for {self.submission.phase}" f"with a method for {self.method.phase}" ) super().clean() def update_status(self, *args, **kwargs): res = super().update_status(*args, **kwargs) if self.status == self.FAILURE: send_failed_evaluation_email(self) return res def get_absolute_url(self): return reverse( "evaluation:detail", kwargs={ "pk": self.pk, "challenge_short_name": self.submission.phase.challenge.short_name, }, )