JSON тестирование схемы - PullRequest
0 голосов
/ 10 марта 2020

Я разрабатываю схему json и пытаюсь проверить, правильно ли проверяются файлы. Все еще новичок во всем мире json схем (с сегодняшнего дня), извиняюсь, если моя терминология неверна.

У меня есть разные типы файлов, и они будут отличаться в зависимости от их biomaterial_type. Каждый из них должен быть проверен на «# / Definitions / Basi c», некоторые на «# / Definitions / Donor», и все они будут иметь уникальные поля для проверки.

Вот (сокращенный) пример, содержащий один тип biomaterial_type:

{
    "$schema": "http://json-schema.org/draft-07/schema#",
    "definitions": {
      "basic": {
        "type": "object",
        "description": "Objects shared across all samples",
        "properties": {
          "sample_ontology_uri" : {
            "type": "array", "minItems": 1, 
            "items": {
              "type": "string", 
              "format": "uri", 
              "description": "(Ontology: EFO) links to sample ontology information."}},
          "disease_ontology_uri" : {
            "type": "array", "minItems": 1, 
            "items": {
              "type": "string", 
              "format": "uri", 
              "description": "(Ontology: NCIM)"}},
          "disease" : {
            "type": "array", "minItems": 1, "maxItems": 1, 
            "items": {
              "type": "string", 
              "description": "Free form field "}},
          "biomaterial_provider" : {
            "type": "array", "minItems": 1, "maxItems": 1, 
            "items": {
              "type": "string", 
              "description": "The name of the company, laboratory or person that provided the biological material."}},
          "biomaterial_type" : {
            "type": "array", "minItems": 1, "maxItems": 1, 
            "items": {
              "type": "string", 
              "description": "The type of the biosample used (Cell Line, Primary Cell, Primary Cell Culture, Primary Tissue)",
              "enum":["Cell Line", "Primary Cell", "Primary Cell Culture", "Primary Tissue"]}},
          "treatment" : {
            "type": "array", "minItems": 1, "maxItems": 1, 
            "items": {
              "type": "string", 
              "description": "Any artificial modification (differentiation, activation, genome editing, etc)."}},
          "biological_replicates": {
            "type": "array", 
            "items": {
              "type": "string", 
              "description": "List of biological replicate sample accessions"}}
        },
        "required": ["sample_ontology_curie", "disease_ontology_curie", "disease", "biomaterial_provider", "biomaterial_type", "treatment", "biological_replicates"]

      },
      "donor": {
        "type": "object",
        "description": "Additional set of properties for samples coming from a donor.",
        "properties": {
          "donor_id" : {
            "type": "array", "minItems": 1, "maxItems": 1, 
            "items": {
              "type": "string", 
              "description": "An identifying designation for the donor that provided the cells/tissues."}},
          "donor_age" : { 
            "type": "array", "minItems": 1, "maxItems": 1, 
            "items": {
              "description": "The age of the donor that provided the cells/tissues. NA if not available. If over 90 years enter as 90+. If entering a range of ages use the format “{age}-{age}”.",
              "oneOf": [
              { "type": "number" },
              { "type": "string", "enum": ["90+", "NA"] },
              { "type": "string", "format": "uri" }
              ]
            }},
          "donor_age_unit" : {
            "type": "array", "minItems": 1, "maxItems": 1, 
            "items": {
              "type": "string", 
              "description": "The unit of measurement used to describe the age of the sample (year, month, week, day)",  
              "enum": ["year", "month", "week", "day"]}},
          "donor_life_stage": {
            "type": "array", "minItems": 1, "maxItems": 1, 
            "items": {
              "type": "string", 
              "description": "The stage or phase of the donor when the sample was taken (embryonic, fetal, postnatal, newborn, child, adult, unknown)",
              "enum": ["embryonic", "fetal", "postnatal", "newborn", "child", "adult", "unknown"]}},
          "donor_health_status" : {
            "type": "array", "minItems": 1, "maxItems": 1, "items": {
              "type": "string", 
              "description": "The health status of the donor that provided the primary cell. NA if not available."}},
          "donor_health_status_ontology_uri" : {
            "type": "array", "minItems": 1, 
            "items": {
              "type": "string", 
              "format": "uri", 
              "description": "(Ontology: NCIM) "}},
          "donor_sex" : {"type": "array", "minItems": 1, "maxItems": 1, "items": {"type": "string", "enum": ["Male", "Female", "Unknown", "Mixed"], "description": "'Male', 'Female', 'Unknown', or 'Mixed' for pooled samples."}},
          "donor_ethnicity" : {
            "type": "array", "minItems": 1, "maxItems": 1, 
            "items": {
              "type": "string", 
              "description": "The ethnicity of the donor that provided the primary cell. NA if not available. If dealing with small/vulnerable populations consider identifiability issues."}}
        },
        "required": ["donor_id", "donor_age", "donor_age_unit", "donor_life_stage", "donor_health_status_uri", "donor_health_status", "donor_sex", "donor_ethnicity"]
      }
    },

    "type" : "object",


    "if": 
    {"properties": 
      { "biomaterial_type": {"const": "Primary Tissue"}},
      "required": ["biomaterial_type"] },
    "then": {
      "allOf": [
      {  "$ref": "#/definitions/donor" },
      {
        "properties": {
          "tissue_type" : {
            "type": "array", "minItems": 1, "maxItems": 1, 
            "items": {
              "type": "string", 
              "description": "The type of tissue."}},
          "tissue_depot" : {
            "type": "array", "minItems": 1, "maxItems": 1, 
            "items": {
              "type": "string", 
              "description": "Details about the anatomical location from which the primary tissue was collected."}},
          "collection_method" : {
            "type": "array", "minItems": 1, "maxItems": 1, 
            "items": {
              "type": "string", 
              "description": "The protocol for collecting the primary tissue."}}
        },
        "required": ["tissue_type", "tissue_depot", "collection_method"]
      }  
      ]
    }
}

Дополнительный тип biomaterial_type будет добавлен через дополнительные условия if.

Вот пример json:

{
  "SAMPLE_SET": {
    "SAMPLE": [
      {
        "TITLE": "Homo sapiens male embryo (108 days) small intestine tissue",
        "SAMPLE_NAME": {
          "TAXON_ID": "9606",
          "SCIENTIFIC_NAME": "Homo sapiens",
          "COMMON_NAME": "human"
        },
        "SAMPLE_ATTRIBUTES": {
          "SAMPLE_ATTRIBUTE": [
            {
              "TAG": "SAMPLE_ONTOLOGY_URI",
              "VALUE": "http://purl.obolibrary.org/obo/UBERON:0002108"
            },
            {
              "TAG": "DISEASE_ONTOLOGY_URI",
              "VALUE": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=C115935"
            },
            {
              "TAG": "DISEASE",
              "VALUE": "Healthy"
            },
            {
              "TAG": "BIOMATERIAL_PROVIDER",
              "VALUE": "Ian Glass at Congenital Defects Lab, University of Washington"
            },
            {
              "TAG": "BIOMATERIAL_TYPE",
              "VALUE": "Primary Tissue"
            },
            {
              "TAG": "TISSUE_TYPE",
              "VALUE": "small intestine"
            },
            {
              "TAG": "TISSUE_DEPOT",
              "VALUE": "Ian Glass at Congenital Defects Lab, University of Washington"
            },
            {
              "TAG": "COLLECTION_METHOD",
              "VALUE": "unknown"
            },
            {
              "TAG": "DONOR_ID",
              "VALUE": "ENCDO119ASK"
            },
            {
              "TAG": "DONOR_AGE",
              "VALUE": "NA"
            },
            {
              "TAG": "DONOR_AGE_UNIT",
              "VALUE": "day"
            },
            {
              "TAG": "DONOR_LIFE_STAGE",
              "VALUE": "embryonic"
            },
            {
              "TAG": "DONOR_HEALTH_STATUS_ONTOLOGY_URI",
              "VALUE": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=C115935"
            },
            {
              "TAG": "DONOR_HEALTH_STATUS",
              "VALUE": "Healthy"
            },
            {
              "TAG": "DONOR_SEX",
              "VALUE": "Male"
            },
            {
              "TAG": "DONOR_ETHNICITY",
              "VALUE": "NA"
            }
          ]
        },
        "_accession": "ENCBS054KUO",
        "_center_name": "ENCODE"
      },
    ]
  }
}

Я пытаюсь проверить, имеет ли схема смысл, используя jsonschema с python:

import json
import jsonschema
from jsonschema import validate


data = ''
schema = ''
with open('data.json', 'r') as file:
      data = file.read()
with open(schema.json, 'r') as file:
      schema = file.read()

try:
    jsonschema.validate(json.loads(data), json.loads(schema))
    print('ok')                                                                                                                                                                                            
except jsonschema.ValidationError as e:
    print (e.message)
except jsonschema.SchemaError as e:
    print (e) 

Я всегда получаю "хорошо", даже если я предоставляю json данные с ошибками.

Проблема в моем Python скрипте или в моей схеме?

Спасибо за любые указатели.

...