Source code for grandchallenge.components.models

import json
import logging
import re
from datetime import timedelta
from pathlib import Path
from typing import Optional

from celery import signature
from django import forms
from django.apps import apps
from django.conf import settings
from django.core.exceptions import ValidationError
from django.core.files import File
from django.core.files.base import ContentFile
from django.core.validators import (
    MaxValueValidator,
    MinValueValidator,
    RegexValidator,
)
from django.db import models
from django.db.models import Avg, F, IntegerChoices, QuerySet, Sum
from django.db.transaction import on_commit
from django.forms import ModelChoiceField, ModelMultipleChoiceField
from django.forms.models import model_to_dict
from django.utils.functional import cached_property
from django.utils.module_loading import import_string
from django.utils.text import get_valid_filename
from django.utils.timezone import now
from django.utils.translation import gettext_lazy as _
from django_deprecate_fields import deprecate_field
from django_extensions.db.fields import AutoSlugField
from panimg.models import MAXIMUM_SEGMENTS_LENGTH

from grandchallenge.cases.models import Image, ImageFile
from grandchallenge.components.schemas import INTERFACE_VALUE_SCHEMA
from grandchallenge.components.tasks import (
    assign_docker_image_from_upload,
    deprovision_job,
    provision_job,
    validate_docker_image,
)
from grandchallenge.components.validators import (
    validate_no_slash_at_ends,
    validate_safe_path,
)
from grandchallenge.core.storage import (
    private_s3_storage,
    protected_s3_storage,
)
from grandchallenge.core.validators import (
    ExtensionValidator,
    JSONSchemaValidator,
    JSONValidator,
    MimeTypeValidator,
)
from grandchallenge.uploads.models import UserUpload
from grandchallenge.workstation_configs.models import (
    OVERLAY_SEGMENTS_SCHEMA,
    LookUpTable,
)

logger = logging.getLogger(__name__)

DEFAULT_OUTPUT_INTERFACE_SLUG = "generic-overlay"


[docs]class InterfaceKindChoices(models.TextChoices): """Interface kind choices.""" STRING = "STR", _("String") INTEGER = "INT", _("Integer") FLOAT = "FLT", _("Float") BOOL = "BOOL", _("Bool") ANY = "JSON", _("Anything") CHART = "CHART", _("Chart") # Annotation Types TWO_D_BOUNDING_BOX = "2DBB", _("2D bounding box") MULTIPLE_TWO_D_BOUNDING_BOXES = "M2DB", _("Multiple 2D bounding boxes") DISTANCE_MEASUREMENT = "DIST", _("Distance measurement") MULTIPLE_DISTANCE_MEASUREMENTS = ( "MDIS", _("Multiple distance measurements"), ) POINT = "POIN", _("Point") MULTIPLE_POINTS = "MPOI", _("Multiple points") POLYGON = "POLY", _("Polygon") MULTIPLE_POLYGONS = "MPOL", _("Multiple polygons") LINE = "LINE", _("Line") MULTIPLE_LINES = "MLIN", _("Multiple lines") ANGLE = "ANGL", _("Angle") MULTIPLE_ANGLES = "MANG", _("Multiple angles") # Choice Types CHOICE = "CHOI", _("Choice") MULTIPLE_CHOICE = "MCHO", _("Multiple choice") # Image types IMAGE = "IMG", _("Image") SEGMENTATION = "SEG", _("Segmentation") HEAT_MAP = "HMAP", _("Heat Map") # File types PDF = "PDF", _("PDF file") SQREG = "SQREG", _("SQREG file") THUMBNAIL_JPG = "JPEG", _("Thumbnail jpg") THUMBNAIL_PNG = "PNG", _("Thumbnail png") OBJ = "OBJ", _("OBJ file") MP4 = "MP4", _("MP4 file") # Legacy support CSV = "CSV", _("CSV file") ZIP = "ZIP", _("ZIP file")
[docs]class InterfaceSuperKindChoices(models.TextChoices): IMAGE = "I", "Image" FILE = "F", "File" VALUE = "V", "Value"
[docs]class InterfaceKind: """Interface kind.""" InterfaceKindChoices = InterfaceKindChoices
[docs] @staticmethod def interface_type_json(): """Interface kinds that are json serializable: * String * Integer * Float * Bool * Anything that is JSON serializable (any object) * 2D bounding box * Multiple 2D bounding boxes * Distance measurement * Multiple distance measurements * Point * Multiple points * Polygon * Multiple polygons * Lines * Multiple lines * Angle * Multiple angles * Choice (string) * Multiple choice (array of strings) * Chart Example json for 2D bounding box annotation .. code-block:: json { "type": "2D bounding box", "corners": [ [ 130.80001831054688, 148.86666870117188, 0.5009999871253967], [ 69.73332977294922, 148.86666870117188, 0.5009999871253967], [ 69.73332977294922, 73.13333129882812, 0.5009999871253967 ], [ 130.80001831054688, 73.13333129882812, 0.5009999871253967] ], "version": { "major": 1, "minor": 0 } } Example json for Multiple 2D bounding boxes annotation .. code-block:: json { "type": "Multiple 2D bounding boxes", "boxes": [ { "corners": [ [ 92.66666412353516, 136.06668090820312, 0.5009999871253967], [ 54.79999923706055, 136.06668090820312, 0.5009999871253967], [ 54.79999923706055, 95.53333282470703, 0.5009999871253967], [ 92.66666412353516, 95.53333282470703, 0.5009999871253967] ]}, { "corners": [ [ 92.66666412353516, 136.06668090820312, 0.5009999871253967], [ 54.79999923706055, 136.06668090820312, 0.5009999871253967], [ 54.79999923706055, 95.53333282470703, 0.5009999871253967], [ 92.66666412353516, 95.53333282470703, 0.5009999871253967] ]} ], "version": { "major": 1, "minor": 0 } } Example json for Distance measurement annotation .. code-block:: json { "type": "Distance measurement", "start": [ 59.79176712036133, 78.76753997802734, 0.5009999871253967 ], "end": [ 69.38014221191406, 143.75546264648438, 0.5009999871253967 ], "version": { "major": 1, "minor": 0 } } Example json for Multiple distance measurement annotation .. code-block:: json { "type": "Multiple distance measurements", "lines": [ { "start": [ 49.733333587646484, 103.26667022705078, 0.5009999871253967 ], "end": [ 55.06666564941406, 139.26666259765625, 0.5009999871253967 ] }, { "start": [ 49.733333587646484, 103.26667022705078, 0.5009999871253967 ], "end": [ 55.06666564941406, 139.26666259765625, 0.5009999871253967 ] } ], "version": { "major": 1, "minor": 0 } } Example json for Point annotation .. code-block:: json { "point": [ 152.13333129882812, 111.0, 0.5009999871253967 ], "type": "Point", "version": { "major": 1, "minor": 0 } } Example json for Multiple points annotation .. code-block:: json { "type": "Multiple points", "points": [ { "point": [ 96.0145263671875, 79.83292388916016, 0.5009999871253967 ], }, { "point": [ 130.10653686523438, 115.52300262451172, 0.5009999871253967 ], } ], "version": { "major": 1, "minor": 0 } } Example json for Polygon annotation .. code-block:: json { "type": "Polygon", "seed_point": [ 76.413756408691, 124.014717102050, 0.5009999871253967 ], "path_points": [ [ 76.41375842260106, 124.01471710205078, 0.5009999871253967 ], [ 76.41694876387268, 124.0511828696491, 0.5009999871253967 ], [ 76.42642285078242, 124.0865406433515, 0.5009999871253967 ] ], "sub_type": "brush", "groups": [], "version": { "major": 1, "minor": 0 } } Example json for Multiple polygon annotation .. code-block:: json { "type": "Multiple polygons", "polygons": [ { "seed_point": [ 55.82666793823242, 90.46666717529297, 0.5009999871253967 ], "path_points": [ [ 55.82667599387105, 90.46666717529297, 0.5009999871253967 ], [ 55.93921357544119, 90.88666314747366, 0.5009999871253967 ], [ 56.246671966051736, 91.1941215380842, 0.5009999871253967 ], [ 56.66666793823242, 91.30665911965434, 0.5009999871253967 ] ], "sub_type": "brush", "groups": [ "manual"], }, { "seed_point": [ 90.22666564941406, 96.06666564941406, 0.5009999871253967 ], "path_points": [ [ 90.22667370505269, 96.06666564941406, 0.5009999871253967 ], [ 90.33921128662283, 96.48666162159475, 0.5009999871253967 ], [ 90.64666967723338, 96.7941200122053, 0.5009999871253967 ] ], "sub_type": "brush", "groups": [] } ], "version": { "major": 1, "minor": 0 } } Example json for Line annotation .. code-block:: json { "type": "Line", "seed_points": [[1, 2, 3], [1, 2, 3]], "path_point_lists": [ [[5, 6, 7], [8, 9, 10], [1, 0, 10], [2, 4, 2]], [[5, 6, 7], [8, 9, 10], [1, 0, 10], [2, 4, 2]] ], "version": { "major": 1, "minor": 0 } } Example json for Multiple lines annotation .. code-block:: json { "type": "Multiple lines", "lines": [ { "seed_points": [[1, 2, 3], [1, 2, 3]], "path_point_lists": [ [[5, 6, 7], [8, 9, 10], [1, 0, 10], [2, 4, 2]], [[5, 6, 7], [8, 9, 10], [1, 0, 10], [2, 4, 2]], ] }, { "seed_points": [[1, 2, 3], [1, 2, 3]], "path_point_lists": [ [[5, 6, 7], [8, 9, 10], [1, 0, 10], [2, 4, 2]], [[5, 6, 7], [8, 9, 10], [1, 0, 10], [2, 4, 2]], ] } ], "version": { "major": 1, "minor": 0 } } Example json for Angle annotation .. code-block:: json { "type": "Angle", "lines": [[[180, 10, 0.5], [190, 10, 0.5]],[[180, 25, 0.5], [190, 15, 0.5]]], "version": {"major": 1, "minor": 0} } Example json for Multiple angles annotation .. code-block:: json { "type": "Multiple angles", "angles": [ { "lines": [[[110, 135, 0.5], [60, 165, 0.5]],[[70, 25, 0.5], [85, 65, 0.5]]], "name": "First angle" }, { "lines": [[[130, 210, 0.5], [160, 130, 0.5]], [[140, 40, 0.5], [180, 75, 0.5]]], "name": "Second angle" }, { "lines": [[[20, 30, 0.5], [20, 100, 0.5]], [[180, 200, 0.5], [210, 200, 0.5]]], "name": "Third angle" } ], "version": {"major": 1, "minor": 0} } Example json for Chart (for more examples, see `here<https://vega.github.io/vega-lite/examples/>` and `here<https://grand-challenge.org/blogs/visualisations-for-challenges/>`) .. code-block:: json { "$schema":"https://vega.github.io/schema/vega-lite/v5.json", "width":300, "height":300, "data":{ "values":[ { "target":"Negative", "prediction":"Negative", "value":198 }, { "target":"Negative", "prediction":"Positive", "value":9 }, { "target":"Positive", "prediction":"Negative", "value":159 }, { "target":"Positive", "prediction":"Positive", "value":376 } ], "format":{ "type":"json" } }, "layer":[ { "mark":"rect", "encoding":{ "y":{ "field":"target", "type":"ordinal" }, "x":{ "field":"prediction", "type":"ordinal" }, "color":{ "field":"value", "type":"quantitative", "title":"Count of Records", "legend":{ "direction":"vertical", "gradientLength":300 } } } }, { "mark":"text", "encoding":{ "y":{ "field":"target", "type":"ordinal" }, "x":{ "field":"prediction", "type":"ordinal" }, "text":{ "field":"value", "type":"quantitative" }, "color":{ "condition":{ "test":"datum['value'] < 40", "value":"black" }, "value":"white" } } } ], "config":{ "axis":{ "grid":True, "tickBand":"extent" } } } """ return { InterfaceKind.InterfaceKindChoices.STRING, InterfaceKind.InterfaceKindChoices.INTEGER, InterfaceKind.InterfaceKindChoices.FLOAT, InterfaceKind.InterfaceKindChoices.BOOL, InterfaceKind.InterfaceKindChoices.TWO_D_BOUNDING_BOX, InterfaceKind.InterfaceKindChoices.MULTIPLE_TWO_D_BOUNDING_BOXES, InterfaceKind.InterfaceKindChoices.DISTANCE_MEASUREMENT, InterfaceKind.InterfaceKindChoices.MULTIPLE_DISTANCE_MEASUREMENTS, InterfaceKind.InterfaceKindChoices.POINT, InterfaceKind.InterfaceKindChoices.MULTIPLE_POINTS, InterfaceKind.InterfaceKindChoices.POLYGON, InterfaceKind.InterfaceKindChoices.MULTIPLE_POLYGONS, InterfaceKind.InterfaceKindChoices.CHOICE, InterfaceKind.InterfaceKindChoices.MULTIPLE_CHOICE, InterfaceKind.InterfaceKindChoices.ANY, InterfaceKind.InterfaceKindChoices.CHART, InterfaceKind.InterfaceKindChoices.LINE, InterfaceKind.InterfaceKindChoices.MULTIPLE_LINES, InterfaceKind.InterfaceKindChoices.ANGLE, InterfaceKind.InterfaceKindChoices.MULTIPLE_ANGLES, }
[docs] @staticmethod def interface_type_image(): """Interface kinds that are images: * Image * Heat Map * Segmentation """ return { InterfaceKind.InterfaceKindChoices.IMAGE, InterfaceKind.InterfaceKindChoices.HEAT_MAP, InterfaceKind.InterfaceKindChoices.SEGMENTATION, }
[docs] @staticmethod def interface_type_file(): """Interface kinds that are files: * CSV file * ZIP file * PDF file * SQREG file * Thumbnail JPG * Thumbnail PNG * OBJ file * MP4 file """ return { InterfaceKind.InterfaceKindChoices.CSV, InterfaceKind.InterfaceKindChoices.ZIP, InterfaceKind.InterfaceKindChoices.PDF, InterfaceKind.InterfaceKindChoices.SQREG, InterfaceKind.InterfaceKindChoices.THUMBNAIL_JPG, InterfaceKind.InterfaceKindChoices.THUMBNAIL_PNG, InterfaceKind.InterfaceKindChoices.OBJ, InterfaceKind.InterfaceKindChoices.MP4, }
class OverlaySegmentsMixin(models.Model): overlay_segments = models.JSONField( blank=True, default=list, help_text=( "The schema that defines how categories of values in the overlay " "images are differentiated." ), validators=[JSONValidator(schema=OVERLAY_SEGMENTS_SCHEMA)], ) look_up_table = models.ForeignKey( to=LookUpTable, blank=True, null=True, on_delete=models.SET_NULL, help_text="The look-up table that is applied when an overlay image is first shown", ) @property def voxel_values(self): allowed_values = {x["voxel_value"] for x in self.overlay_segments} # An implicit background value of 0 is always allowed, this saves the # user having to declare it and the annotator mark it allowed_values.add(0) return allowed_values def _validate_voxel_values(self, image): if not self.overlay_segments: return if image.segments is None: raise ValidationError( "Image segments could not be determined, ensure the file is " "not a tiff file, its voxel values are integers and that it " f"contains no more than {MAXIMUM_SEGMENTS_LENGTH} segments." ) invalid_values = set(image.segments) - self.voxel_values if invalid_values: raise ValidationError( f"The valid voxel values for this segmentation are: " f"{self.voxel_values}. This segmentation is invalid as " f"it contains the voxel values: {invalid_values}." ) class Meta: abstract = True
[docs]class ComponentInterface(OverlaySegmentsMixin): Kind = InterfaceKind.InterfaceKindChoices title = models.CharField( max_length=255, help_text="Human readable name of this input/output field.", unique=True, ) slug = AutoSlugField(populate_from="title") description = models.TextField( blank=True, help_text="Description of this input/output field." ) default_value = models.JSONField( blank=True, null=True, default=None, help_text="Default value for this field, only valid for inputs.", ) schema = models.JSONField( default=dict, blank=True, help_text=( "Additional JSON schema that the values for this interface must " "satisfy. See https://json-schema.org/. " "Only Draft 7, 6, 4 or 3 are supported." ), validators=[JSONSchemaValidator()], ) kind = models.CharField( blank=False, max_length=5, choices=Kind.choices, help_text=( "What is the type of this interface? Used to validate interface " "values and connections between components." ), ) relative_path = models.CharField( max_length=255, help_text=( "The path to the entity that implements this interface relative " "to the input or output directory." ), unique=True, validators=[ validate_safe_path, validate_no_slash_at_ends, # No uuids in path RegexValidator( regex=r".*[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}.*", inverse_match=True, flags=re.IGNORECASE, ), ], ) store_in_database = models.BooleanField( default=True, editable=True, help_text=( "Should the value be saved in a database field, " "only valid for outputs." ), )
[docs] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._overlay_segments_orig = self.overlay_segments
def __str__(self): return f"{self.title} ({self.get_kind_display()})" @property def is_image_kind(self): return self.kind in InterfaceKind.interface_type_image() @property def is_json_kind(self): return self.kind in InterfaceKind.interface_type_json() @property def is_file_kind(self): return self.kind in InterfaceKind.interface_type_file() @property def super_kind(self): if self.saved_in_object_store: if self.is_image_kind: return InterfaceSuperKindChoices.IMAGE else: return InterfaceSuperKindChoices.FILE else: return InterfaceSuperKindChoices.VALUE @property def saved_in_object_store(self): # files and images should always be saved to S3, others are optional return ( self.is_image_kind or self.is_file_kind or not self.store_in_database ) @property def requires_file(self): return ( self.is_file_kind or self.is_json_kind and not self.store_in_database ) @property def default_field(self): if self.requires_file: return ModelChoiceField elif self.is_image_kind: return ModelMultipleChoiceField elif self.kind in { InterfaceKind.InterfaceKindChoices.STRING, InterfaceKind.InterfaceKindChoices.CHOICE, }: return forms.CharField elif self.kind == InterfaceKind.InterfaceKindChoices.INTEGER: return forms.IntegerField elif self.kind == InterfaceKind.InterfaceKindChoices.FLOAT: return forms.FloatField elif self.kind == InterfaceKind.InterfaceKindChoices.BOOL: return forms.BooleanField else: return forms.JSONField @property def file_mimetypes(self): if self.kind == InterfaceKind.InterfaceKindChoices.CSV: return ( "application/csv", "application/vnd.ms-excel", "text/csv", "text/plain", ) elif self.kind == InterfaceKind.InterfaceKindChoices.ZIP: return ("application/zip", "application/x-zip-compressed") elif self.kind == InterfaceKind.InterfaceKindChoices.PDF: return ("application/pdf",) elif self.kind == InterfaceKind.InterfaceKindChoices.THUMBNAIL_JPG: return ("image/jpeg",) elif self.kind == InterfaceKind.InterfaceKindChoices.THUMBNAIL_PNG: return ("image/png",) elif self.kind == InterfaceKind.InterfaceKindChoices.SQREG: return ( "application/octet-stream", "application/x-sqlite3", "application/vnd.sqlite3", ) elif self.kind == InterfaceKind.InterfaceKindChoices.OBJ: return ("text/plain", "application/octet-stream") elif self.kind in InterfaceKind.interface_type_json(): return ( "text/plain", "application/json", ) elif self.kind == InterfaceKind.InterfaceKindChoices.MP4: return ("video/mp4",) else: raise RuntimeError(f"Unknown kind {self.kind}") def create_instance(self, *, image=None, value=None, fileobj=None): civ = ComponentInterfaceValue.objects.create(interface=self) if image: civ.image = image elif fileobj: container = File(fileobj) civ.file.save(Path(self.relative_path).name, container) elif self.saved_in_object_store: civ.file = ContentFile( json.dumps(value).encode("utf-8"), name=Path(self.relative_path).name, ) else: civ.value = value civ.full_clean() civ.save() return civ
[docs] def clean(self): super().clean() self._clean_overlay_segments() self._clean_store_in_database() self._clean_relative_path()
def _clean_overlay_segments(self): if ( self.kind == InterfaceKindChoices.SEGMENTATION and not self.overlay_segments ): raise ValidationError( "Overlay segments must be set for this interface" ) if ( self.kind != InterfaceKindChoices.SEGMENTATION and self.overlay_segments ): raise ValidationError( "Overlay segments should only be set for segmentations" ) Question = apps.get_model("reader_studies", "question") # noqa: N806 if ( self.pk is not None and self._overlay_segments_orig != self.overlay_segments and ( ComponentInterfaceValue.objects.filter(interface=self).exists() or Question.objects.filter(interface=self).exists() ) ): raise ValidationError( "Overlay segments cannot be changed, as values or questions " "for this ComponentInterface exist." ) def _clean_relative_path(self): if self.is_json_kind: if not self.relative_path.endswith(".json"): raise ValidationError("Relative path should end with .json") elif self.is_file_kind and not self.relative_path.endswith( f".{self.kind.lower()}" ): raise ValidationError( f"Relative path should end with .{self.kind.lower()}" ) if self.is_image_kind: if not self.relative_path.startswith("images/"): raise ValidationError( "Relative path should start with images/" ) if Path(self.relative_path).name != Path(self.relative_path).stem: # Maybe not in the future raise ValidationError("Images should be a directory") else: if self.relative_path.startswith("images/"): raise ValidationError( "Relative path should not start with images/" ) def _clean_store_in_database(self): object_store_required = self.kind in { *InterfaceKind.interface_type_image(), *InterfaceKind.interface_type_file(), # These values can be large, so for any new interfaces of this # type always add them to the object store InterfaceKind.InterfaceKindChoices.MULTIPLE_TWO_D_BOUNDING_BOXES, InterfaceKind.InterfaceKindChoices.MULTIPLE_DISTANCE_MEASUREMENTS, InterfaceKind.InterfaceKindChoices.MULTIPLE_POINTS, InterfaceKind.InterfaceKindChoices.MULTIPLE_POLYGONS, InterfaceKind.InterfaceKindChoices.MULTIPLE_LINES, InterfaceKind.InterfaceKindChoices.MULTIPLE_ANGLES, } if object_store_required and self.store_in_database: raise ValidationError( f"Interface {self.kind} objects cannot be stored in the database" )
[docs] def validate_against_schema(self, *, value): """Validates values against both default and custom schemas""" JSONValidator( schema={ **INTERFACE_VALUE_SCHEMA, "anyOf": [{"$ref": f"#/definitions/{self.kind}"}], } )(value=value) if self.schema: JSONValidator(schema=self.schema)(value=value)
class Meta: ordering = ("pk",)
def component_interface_value_path(instance, filename): # Convert the pk to a hex, padded to 4 chars with zeros pk_as_padded_hex = f"{instance.pk:04x}" return ( f"{instance._meta.app_label.lower()}/" f"{instance._meta.model_name.lower()}/" f"{pk_as_padded_hex[-4:-2]}/{pk_as_padded_hex[-2:]}/{instance.pk}/" f"{get_valid_filename(filename)}" )
[docs]class ComponentInterfaceValue(models.Model): """Encapsulates the value of an interface at a certain point in the graph.""" id = models.BigAutoField(primary_key=True) interface = models.ForeignKey( to=ComponentInterface, on_delete=models.PROTECT ) value = models.JSONField(null=True, blank=True, default=None) file = models.FileField( null=True, blank=True, upload_to=component_interface_value_path, storage=protected_s3_storage, validators=[ ExtensionValidator( allowed_extensions=( ".json", ".zip", ".csv", ".png", ".jpg", ".jpeg", ".pdf", ".sqreg", ".obj", ".mp4", ) ), MimeTypeValidator( allowed_types=( "application/json", "application/zip", "text/plain", "application/csv", "application/pdf", "image/png", "image/jpeg", "application/octet-stream", "application/x-sqlite3", "application/vnd.sqlite3", "video/mp4", ) ), ], ) image = models.ForeignKey( to=Image, null=True, blank=True, on_delete=models.PROTECT ) _user_upload_validated = False @property def title(self): if self.value is not None: return str(self.value) if self.file: return Path(self.file.name).name if self.image: return self.image.name return "" @property def has_value(self): return self.value is not None or self.image or self.file @property def decompress(self): """ Should the CIV be decompressed? This is only for legacy support of zip file submission for prediction evaluation. We should not support this anywhere else as it clobbers the input directory. """ return self.interface.kind == InterfaceKindChoices.ZIP @cached_property def image_file(self): """The single image file for this interface""" return ( self.image.files.filter( image_type__in=[ ImageFile.IMAGE_TYPE_MHD, ImageFile.IMAGE_TYPE_TIFF, ] ) .get() .file ) @property def relative_path(self): """ Where should the file be located? Images need special handling as their names are fixed. """ path = Path(self.interface.relative_path) if self.image: path /= Path(self.image_file.name).name return path def __str__(self): if self.value is None: return self.title else: return f"Component Interface Value {self.pk} for {self.interface}"
[docs] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._value_orig = self.value self._image_orig = self._dict["image"] self._file_orig = self.file
@property def _dict(self): return model_to_dict( self, fields=[field.name for field in self._meta.fields] )
[docs] def save(self, *args, **kwargs): if ( ( self._value_orig not in (None, self.interface.default_value) and self.value is not None and self._value_orig != self.value ) or (self._image_orig and self._image_orig != self.image.pk) or ( self._file_orig.name not in (None, "") and self._file_orig != self.file ) ): raise ValidationError( "You cannot change the value, file or image of an existing CIV. " "Please create a new CIV instead." ) super().save(*args, **kwargs)
[docs] def clean(self): super().clean() attributes = [ attribute for attribute in [self.value, self.image, self.file.name] if attribute is not None if attribute != "" ] if len(attributes) > 1: raise ValidationError( "Only one of image, value and file can be defined." ) if self.interface.is_image_kind: self._validate_image_only() if self.interface.kind == InterfaceKindChoices.SEGMENTATION: self.interface._validate_voxel_values(self.image) elif self.interface.is_file_kind: self._validate_file_only() else: self._validate_value()
def _validate_image_only(self): if not self.image: raise ValidationError("Image must be set") if self.file or self.value is not None: raise ValidationError( f"File ({self.file}) or value should not be set for images" ) def _validate_file_only(self): if not self._user_upload_validated and not self.file: raise ValidationError("File must be set") if self.image or self.value is not None: raise ValidationError( f"Image ({self.image}) or value must not be set for files" ) def _validate_value_only(self): # Do not check self.value here, it can be anything including None. # This is checked later with interface.validate_against_schema. if self.image or self.file: raise ValidationError( f"Image ({self.image}) or file ({self.file}) must not be set for values" ) def _validate_value(self): if self._user_upload_validated: return if self.interface.saved_in_object_store: self._validate_file_only() with self.file.open("r") as f: value = json.loads(f.read().decode("utf-8")) else: self._validate_value_only() value = self.value self.interface.validate_against_schema(value=value) def validate_user_upload(self, user_upload): if not user_upload.is_completed: raise ValidationError("User upload is not completed.") if self.interface.is_json_kind: value = user_upload.read_object() self.interface.validate_against_schema(value=value) self._user_upload_validated = True class Meta: ordering = ("pk",)
[docs]class ComponentJobManager(models.QuerySet):
[docs] def with_duration(self): """Annotate the queryset with the duration of completed jobs""" return self.annotate(duration=F("completed_at") - F("started_at"))
[docs] def average_duration(self): """Calculate the average duration that completed jobs ran for""" return ( self.with_duration() .exclude(duration=None) .aggregate(Avg("duration"))["duration__avg"] )
def total_duration(self): return ( self.with_duration() .exclude(duration=None) .aggregate(Sum("duration"))["duration__sum"] ) def active(self): return self.exclude( status__in=[ ComponentJob.SUCCESS, ComponentJob.CANCELLED, ComponentJob.FAILURE, ] )
class ComponentJob(models.Model): # The job statuses come directly from celery.result.AsyncResult.status: # http://docs.celeryproject.org/en/latest/reference/celery.result.html PENDING = 0 STARTED = 1 RETRY = 2 FAILURE = 3 SUCCESS = 4 CANCELLED = 5 PROVISIONING = 6 PROVISIONED = 7 EXECUTING = 8 EXECUTED = 9 PARSING = 10 EXECUTING_PREREQUISITES = 11 STATUS_CHOICES = ( (PENDING, "Queued"), (STARTED, "Started"), (RETRY, "Re-Queued"), (FAILURE, "Failed"), (SUCCESS, "Succeeded"), (CANCELLED, "Cancelled"), (PROVISIONING, "Provisioning"), (PROVISIONED, "Provisioned"), (EXECUTING, "Executing"), (EXECUTED, "Executed"), (PARSING, "Parsing Outputs"), (EXECUTING_PREREQUISITES, "Executing Algorithm"), ) status = models.PositiveSmallIntegerField( choices=STATUS_CHOICES, default=PENDING, db_index=True ) attempt = models.PositiveSmallIntegerField(editable=False, default=0) stdout = models.TextField() stderr = models.TextField(default="") runtime_metrics = models.JSONField(default=dict, editable=False) error_message = models.CharField(max_length=1024, default="") started_at = models.DateTimeField(null=True) completed_at = models.DateTimeField(null=True) input_prefixes = models.JSONField( default=dict, editable=False, help_text=( "Map of the ComponentInterfaceValue id to the path prefix to use " "for this input, e.g. {'1': 'foo/bar/'} will place CIV 1 at " "/input/foo/bar/<relative_path>" ), ) task_on_success = models.JSONField( default=None, null=True, editable=False, help_text="Serialized task that is run on job success", ) task_on_failure = models.JSONField( default=None, null=True, editable=False, help_text="Serialized task that is run on job failure", ) time_limit = models.PositiveSmallIntegerField( default=60 * 60, help_text="Time limit for the job in seconds", validators=[ MinValueValidator(limit_value=60), MaxValueValidator(limit_value=60 * 60), ], ) inputs = models.ManyToManyField( to=ComponentInterfaceValue, related_name="%(app_label)s_%(class)ss_as_input", ) outputs = models.ManyToManyField( to=ComponentInterfaceValue, related_name="%(app_label)s_%(class)ss_as_output", ) objects = ComponentJobManager.as_manager() def update_status( self, *, status: STATUS_CHOICES, stdout: str = "", stderr: str = "", error_message="", duration: Optional[timedelta] = None, runtime_metrics=None, ): self.status = status if stdout: self.stdout = stdout if stderr: self.stderr = stderr if error_message: self.error_message = error_message[:1024] if ( status in [self.STARTED, self.EXECUTING] and self.started_at is None ): self.started_at = now() elif ( status in [self.EXECUTED, self.SUCCESS, self.FAILURE, self.CANCELLED] and self.completed_at is None ): self.completed_at = now() if duration and self.started_at: # TODO: maybe add separate timings for provisioning, executing, parsing and total self.started_at = self.completed_at - duration if runtime_metrics is not None: self.runtime_metrics = runtime_metrics self.save() if self.status == self.SUCCESS: on_commit(self.execute_task_on_success) elif self.status in [self.FAILURE, self.CANCELLED]: on_commit(self.execute_task_on_failure) @property def executor_kwargs(self): return { "job_id": f"{self._meta.app_label}-{self._meta.model_name}-{self.pk}-{self.attempt:02}", "exec_image_repo_tag": self.container.shimmed_repo_tag, "memory_limit": self.container.requires_memory_gb, "time_limit": self.time_limit, "requires_gpu": self.container.requires_gpu, } def get_executor(self, *, backend): Executor = import_string(backend) # noqa: N806 return Executor(**self.executor_kwargs) @property def container(self) -> "ComponentImage": """ Returns the container object associated with this instance, which should be a foreign key to an object that is a subclass of ComponentImage """ raise NotImplementedError @property def output_interfaces(self) -> QuerySet: """Returns an unevaluated QuerySet for the output interfaces""" raise NotImplementedError @property def signature_kwargs(self): return { "kwargs": { "job_pk": str(self.pk), "job_app_label": self._meta.app_label, "job_model_name": self._meta.model_name, "backend": settings.COMPONENTS_DEFAULT_BACKEND, }, "immutable": True, } def execute(self): return provision_job.signature(**self.signature_kwargs).apply_async() def execute_task_on_success(self): deprovision_job.signature(**self.signature_kwargs).apply_async() if self.task_on_success: signature(self.task_on_success).apply_async() def execute_task_on_failure(self): deprovision_job.signature(**self.signature_kwargs).apply_async() if self.task_on_failure: signature(self.task_on_failure).apply_async() @property def animate(self): return self.status in { self.STARTED, self.PROVISIONING, self.PROVISIONED, self.EXECUTING, self.EXECUTED, self.PARSING, self.EXECUTING_PREREQUISITES, } @property def status_context(self): if self.status == self.SUCCESS: if self.stderr: return "warning" else: return "success" elif self.status in {self.FAILURE, self.CANCELLED}: return "danger" elif self.status in { self.PENDING, self.STARTED, self.RETRY, self.PROVISIONING, self.PROVISIONED, self.EXECUTING, self.EXECUTED, self.PARSING, self.EXECUTING_PREREQUISITES, }: return "info" else: return "secondary" @property def runtime_metrics_chart(self): instance_metrics = self.runtime_metrics["instance"] cpu_limit = 100 * instance_metrics["cpu"] if instance_metrics["gpus"]: gpu_str = ( f"{instance_metrics['gpus']}x {instance_metrics['gpu_type']}" ) else: gpu_str = "No" title = f"{instance_metrics['name']} / {instance_metrics['cpu']} CPU / {instance_metrics['memory']} GB Memory / {gpu_str} GPU" return { "$schema": "https://vega.github.io/schema/vega-lite/v5.json", "width": "container", "padding": 0, "title": title, "data": { "values": [ { "Metric": metric["label"], "Timestamp": timestamp, "Percent": value / 100.0, } for metric in self.runtime_metrics["metrics"] for timestamp, value in zip( metric["timestamps"], metric["values"] ) ] }, "layer": [ { "transform": [ {"calculate": "100*datum.Percent", "as": "Percent100"}, ], "encoding": { "x": { "timeUnit": "hoursminutesseconds", "field": "Timestamp", "title": "Local Time / HH:MM:SS", }, "y": { "field": "Percent100", "type": "quantitative", "title": "Utilization / %", }, "color": {"field": "Metric", "type": "nominal"}, }, "layer": [ {"mark": "line"}, { "transform": [ {"filter": {"param": "hover", "empty": False}} ], "mark": "point", }, ], }, { "transform": [ { "pivot": "Metric", "value": "Percent", "groupby": ["Timestamp"], } ], "mark": "rule", "encoding": { "opacity": { "condition": { "value": 0.3, "param": "hover", "empty": False, }, "value": 0, }, "tooltip": [ { "field": metric["label"], "type": "quantitative", "format": ".2%", } for metric in self.runtime_metrics["metrics"] ], "x": { "timeUnit": "hoursminutesseconds", "field": "Timestamp", "title": "Local Time / HH:MM:SS", }, }, "params": [ { "name": "hover", "select": { "type": "point", "fields": ["Timestamp"], "nearest": True, "on": "mouseover", "clear": "mouseout", }, } ], }, { "data": {"values": [{}]}, "mark": {"type": "rule", "strokeDash": [8, 8]}, "encoding": {"y": {"datum": cpu_limit}}, }, { "data": {"values": [{}]}, "mark": {"type": "text", "baseline": "line-bottom"}, "encoding": { "text": {"datum": "CPU Utilization Limit"}, "y": {"datum": cpu_limit}, }, }, { "data": {"values": [{}]}, "mark": {"type": "rule", "strokeDash": [8, 8]}, "encoding": {"y": {"datum": 100}}, }, { "data": {"values": [{}]}, "mark": {"type": "text", "baseline": "line-bottom"}, "encoding": { "text": { "datum": "Memory / GPU / GPU Memory Utilization Limit" }, "y": {"datum": 100}, }, }, ], } class Meta: abstract = True def docker_image_path(instance, filename): return ( f"docker/" f"images/" f"{instance._meta.app_label.lower()}/" f"{instance._meta.model_name.lower()}/" f"{instance.pk}/" f"{get_valid_filename(filename)}" )
[docs]class ImportStatusChoices(IntegerChoices): INITIALIZED = 0, "Initialized" QUEUED = 1, "Queued" RETRY = 2, "Re-Queued" STARTED = 3, "Started" CANCELLED = 4, "Cancelled" FAILED = 5, "Failed" COMPLETED = 6, "Completed"
class ComponentImageManager(models.Manager): def executable_images(self): queryset = self.filter(is_manifest_valid=True, is_in_registry=True) if ( self.model.SHIM_IMAGE and settings.COMPONENTS_CREATE_SAGEMAKER_MODEL ): # SageMaker models are only created for shimmed images # See validate_docker_image return queryset.filter(is_on_sagemaker=True) else: return queryset class ComponentImage(models.Model): SHIM_IMAGE = True ImportStatusChoices = ImportStatusChoices objects = ComponentImageManager() creator = models.ForeignKey( settings.AUTH_USER_MODEL, null=True, on_delete=models.SET_NULL ) user_upload = models.ForeignKey( UserUpload, blank=True, null=True, on_delete=models.SET_NULL ) image = models.FileField( blank=True, upload_to=docker_image_path, validators=[ ExtensionValidator( allowed_extensions=(".tar", ".tar.gz", ".tar.xz") ) ], help_text=( ".tar.xz archive of the container image produced from the command " "'docker save IMAGE | xz -T0 -c > IMAGE.tar.xz'. See " "https://docs.docker.com/engine/reference/commandline/save/" ), storage=private_s3_storage, ) image_sha256 = models.CharField(editable=False, max_length=71) latest_shimmed_version = models.CharField( editable=False, max_length=8, default="" ) ready = deprecate_field( models.BooleanField( default=False, editable=False, help_text="Is this image ready to be used?", ) ) import_status = models.PositiveSmallIntegerField( choices=ImportStatusChoices.choices, default=ImportStatusChoices.INITIALIZED, db_index=True, ) is_manifest_valid = models.BooleanField( default=None, null=True, editable=False, help_text="Is this image's manifest valid?", ) is_in_registry = models.BooleanField( default=False, editable=False, help_text="Is this image in the container registry?", ) is_on_sagemaker = models.BooleanField( default=False, editable=False, help_text="Does a SageMaker model for this image exist?", ) status = models.TextField(editable=False) requires_gpu = models.BooleanField(default=False) requires_memory_gb = models.PositiveIntegerField(default=4) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._image_orig = self.image @cached_property def can_execute(self): return ( self.__class__.objects.executable_images() .filter(pk=self.pk) .exists() ) def save(self, *args, **kwargs): image_needs_validation = ( self.import_status == ImportStatusChoices.INITIALIZED and self.is_manifest_valid is None ) validate_image_now = False if self._image_orig: if self.image != self._image_orig: raise RuntimeError("The image cannot be changed") if image_needs_validation: self.import_status = ImportStatusChoices.QUEUED validate_image_now = True elif self.image and image_needs_validation: self.import_status = ImportStatusChoices.QUEUED validate_image_now = True super().save(*args, **kwargs) if validate_image_now: on_commit( validate_docker_image.signature( kwargs={ "app_label": self._meta.app_label, "model_name": self._meta.model_name, "pk": self.pk, } ).apply_async ) def assign_docker_image_from_upload(self): on_commit( assign_docker_image_from_upload.signature( kwargs={ "app_label": self._meta.app_label, "model_name": self._meta.model_name, "pk": self.pk, } ).apply_async ) @property def original_repo_tag(self): """The tag of this image in the container repository""" return ( f"{settings.COMPONENTS_REGISTRY_URL}/" f"{settings.COMPONENTS_REGISTRY_PREFIX}/" f"{self._meta.app_label}/{self._meta.model_name}:{self.pk}" ) @property def shimmed_repo_tag(self): return f"{self.original_repo_tag}-{self.latest_shimmed_version}" class Meta: abstract = True @property def animate(self): return self.import_status == self.ImportStatusChoices.STARTED @property def import_status_context(self): if self.import_status == self.ImportStatusChoices.COMPLETED: return "success" elif self.import_status in { self.ImportStatusChoices.FAILED, self.ImportStatusChoices.CANCELLED, }: return "danger" elif self.import_status in { self.ImportStatusChoices.INITIALIZED, self.ImportStatusChoices.QUEUED, self.ImportStatusChoices.RETRY, self.ImportStatusChoices.STARTED, }: return "info" else: return "secondary"