Я разрабатываю схему json и пытаюсь проверить, правильно ли проверяются файлы. Все еще новичок во всем мире json схем (с сегодняшнего дня), извиняюсь, если моя терминология неверна.
У меня есть разные типы файлов, и они будут отличаться в зависимости от их biomaterial_type. Каждый из них должен быть проверен на «# / Definitions / Basi c», некоторые на «# / Definitions / Donor», и все они будут иметь уникальные поля для проверки.
Вот (сокращенный) пример, содержащий один тип biomaterial_type:
{
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"basic": {
"type": "object",
"description": "Objects shared across all samples",
"properties": {
"sample_ontology_uri" : {
"type": "array", "minItems": 1,
"items": {
"type": "string",
"format": "uri",
"description": "(Ontology: EFO) links to sample ontology information."}},
"disease_ontology_uri" : {
"type": "array", "minItems": 1,
"items": {
"type": "string",
"format": "uri",
"description": "(Ontology: NCIM)"}},
"disease" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "Free form field "}},
"biomaterial_provider" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The name of the company, laboratory or person that provided the biological material."}},
"biomaterial_type" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The type of the biosample used (Cell Line, Primary Cell, Primary Cell Culture, Primary Tissue)",
"enum":["Cell Line", "Primary Cell", "Primary Cell Culture", "Primary Tissue"]}},
"treatment" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "Any artificial modification (differentiation, activation, genome editing, etc)."}},
"biological_replicates": {
"type": "array",
"items": {
"type": "string",
"description": "List of biological replicate sample accessions"}}
},
"required": ["sample_ontology_curie", "disease_ontology_curie", "disease", "biomaterial_provider", "biomaterial_type", "treatment", "biological_replicates"]
},
"donor": {
"type": "object",
"description": "Additional set of properties for samples coming from a donor.",
"properties": {
"donor_id" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "An identifying designation for the donor that provided the cells/tissues."}},
"donor_age" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"description": "The age of the donor that provided the cells/tissues. NA if not available. If over 90 years enter as 90+. If entering a range of ages use the format “{age}-{age}”.",
"oneOf": [
{ "type": "number" },
{ "type": "string", "enum": ["90+", "NA"] },
{ "type": "string", "format": "uri" }
]
}},
"donor_age_unit" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The unit of measurement used to describe the age of the sample (year, month, week, day)",
"enum": ["year", "month", "week", "day"]}},
"donor_life_stage": {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The stage or phase of the donor when the sample was taken (embryonic, fetal, postnatal, newborn, child, adult, unknown)",
"enum": ["embryonic", "fetal", "postnatal", "newborn", "child", "adult", "unknown"]}},
"donor_health_status" : {
"type": "array", "minItems": 1, "maxItems": 1, "items": {
"type": "string",
"description": "The health status of the donor that provided the primary cell. NA if not available."}},
"donor_health_status_ontology_uri" : {
"type": "array", "minItems": 1,
"items": {
"type": "string",
"format": "uri",
"description": "(Ontology: NCIM) "}},
"donor_sex" : {"type": "array", "minItems": 1, "maxItems": 1, "items": {"type": "string", "enum": ["Male", "Female", "Unknown", "Mixed"], "description": "'Male', 'Female', 'Unknown', or 'Mixed' for pooled samples."}},
"donor_ethnicity" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The ethnicity of the donor that provided the primary cell. NA if not available. If dealing with small/vulnerable populations consider identifiability issues."}}
},
"required": ["donor_id", "donor_age", "donor_age_unit", "donor_life_stage", "donor_health_status_uri", "donor_health_status", "donor_sex", "donor_ethnicity"]
}
},
"type" : "object",
"if":
{"properties":
{ "biomaterial_type": {"const": "Primary Tissue"}},
"required": ["biomaterial_type"] },
"then": {
"allOf": [
{ "$ref": "#/definitions/donor" },
{
"properties": {
"tissue_type" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The type of tissue."}},
"tissue_depot" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "Details about the anatomical location from which the primary tissue was collected."}},
"collection_method" : {
"type": "array", "minItems": 1, "maxItems": 1,
"items": {
"type": "string",
"description": "The protocol for collecting the primary tissue."}}
},
"required": ["tissue_type", "tissue_depot", "collection_method"]
}
]
}
}
Дополнительный тип biomaterial_type будет добавлен через дополнительные условия if.
Вот пример json:
{
"SAMPLE_SET": {
"SAMPLE": [
{
"TITLE": "Homo sapiens male embryo (108 days) small intestine tissue",
"SAMPLE_NAME": {
"TAXON_ID": "9606",
"SCIENTIFIC_NAME": "Homo sapiens",
"COMMON_NAME": "human"
},
"SAMPLE_ATTRIBUTES": {
"SAMPLE_ATTRIBUTE": [
{
"TAG": "SAMPLE_ONTOLOGY_URI",
"VALUE": "http://purl.obolibrary.org/obo/UBERON:0002108"
},
{
"TAG": "DISEASE_ONTOLOGY_URI",
"VALUE": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=C115935"
},
{
"TAG": "DISEASE",
"VALUE": "Healthy"
},
{
"TAG": "BIOMATERIAL_PROVIDER",
"VALUE": "Ian Glass at Congenital Defects Lab, University of Washington"
},
{
"TAG": "BIOMATERIAL_TYPE",
"VALUE": "Primary Tissue"
},
{
"TAG": "TISSUE_TYPE",
"VALUE": "small intestine"
},
{
"TAG": "TISSUE_DEPOT",
"VALUE": "Ian Glass at Congenital Defects Lab, University of Washington"
},
{
"TAG": "COLLECTION_METHOD",
"VALUE": "unknown"
},
{
"TAG": "DONOR_ID",
"VALUE": "ENCDO119ASK"
},
{
"TAG": "DONOR_AGE",
"VALUE": "NA"
},
{
"TAG": "DONOR_AGE_UNIT",
"VALUE": "day"
},
{
"TAG": "DONOR_LIFE_STAGE",
"VALUE": "embryonic"
},
{
"TAG": "DONOR_HEALTH_STATUS_ONTOLOGY_URI",
"VALUE": "https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=C115935"
},
{
"TAG": "DONOR_HEALTH_STATUS",
"VALUE": "Healthy"
},
{
"TAG": "DONOR_SEX",
"VALUE": "Male"
},
{
"TAG": "DONOR_ETHNICITY",
"VALUE": "NA"
}
]
},
"_accession": "ENCBS054KUO",
"_center_name": "ENCODE"
},
]
}
}
Я пытаюсь проверить, имеет ли схема смысл, используя jsonschema с python:
import json
import jsonschema
from jsonschema import validate
data = ''
schema = ''
with open('data.json', 'r') as file:
data = file.read()
with open(schema.json, 'r') as file:
schema = file.read()
try:
jsonschema.validate(json.loads(data), json.loads(schema))
print('ok')
except jsonschema.ValidationError as e:
print (e.message)
except jsonschema.SchemaError as e:
print (e)
Я всегда получаю "хорошо", даже если я предоставляю json данные с ошибками.
Проблема в моем Python скрипте или в моей схеме?
Спасибо за любые указатели.