Как построить нейронную сеть, чтобы оценить степень экспрессии гена, связанную с наличием поражений легких - PullRequest
0 голосов
/ 19 июня 2019
  • Я должен использовать сверточно-рекуррентную нейронную сеть с некоторыми слоями.Эти слои должны быть реализованы с помощью уравнений LSTM и должны быть убедительными.Каждый слой имеет шаг = 1 и отступ = (k-1) / 2 (k = ядро).

  • Я должен объединить эту модель с неповторяющимся слоем conv3d.Выход этой модели должен иметь 2 канала, а его размер равен высоте и ширине входа (глубина = 1).Эти 2 канала представляют вероятность того, что в этой точке есть легочное повреждение.Я могу использовать классификацию потерь с отрицательным логарифмическим правдоподобием.

У меня уже есть набор данных, состоящий из набора изображений CT (TAC), которые я должен проанализировать.

THIS IS THE DATASET


import os
import pydicom
import re
import torch
import torch.nn as nn
import numpy as np
from math import ceil, log
from PIL import Image, ImageDraw

from torch.utils.data import Dataset, DataLoader

class CTSegmented(Dataset):
    def __init__(self, ddir, test):
        self.dset = []
        mappings = {
                "1": {
                        ddir + "/1/study/series/IM-0001-0006-0002.dcm": ddir + "/1/study/series/IM-0025-0000-0001.dcm",
                        ddir + "/1/study/series/IM-0001-0008-0002.dcm": ddir + "/1/study/series/IM-0025-0000-0002.dcm",
                        ddir + "/1/study/series/IM-0001-0010-0002.dcm": ddir + "/1/study/series/IM-0025-0000-0003.dcm",
                        ddir + "/1/study/series/IM-0001-0009-0002.dcm": ddir + "/1/study/series/IM-0025-0000-0004.dcm",
                        ddir + "/1/study/series/IM-0001-0007-0002.dcm": ddir + "/1/study/series/IM-0025-0000-0005.dcm",
                        ddir + "/1/study/series/IM-0001-0011-0002.dcm": ddir + "/1/study/series/IM-0025-0000-0006.dcm",
                        ddir + "/1/study/series/IM-0001-0005-0002.dcm": ddir + "/1/study/series/IM-0025-0000-0007.dcm",
                        ddir + "/1/study/series/IM-0001-0003-0002.dcm": ddir + "/1/study/series/IM-0025-0000-0008.dcm",
                        ddir + "/1/study/series/IM-0001-0002-0002.dcm": ddir + "/1/study/series/IM-0025-0000-0009.dcm",
                        ddir + "/1/study/series/IM-0001-0004-0002.dcm": ddir + "/1/study/series/IM-0025-0000-0010.dcm",
                        ddir + "/1/study/series/IM-0001-0001-0002.dcm": ddir + "/1/study/series/IM-0025-0000-0011.dcm"
                },

                "2": {
                        ddir + "/2/study/series/IM-0038-0044-0001.dcm": ddir + "/2/study/series/IM-0050-0000-0001.dcm",
                        ddir + "/2/study/series/IM-0038-0048-0001.dcm": ddir + "/2/study/series/IM-0050-0000-0002.dcm",
                        ddir + "/2/study/series/IM-0038-0046-0001.dcm": ddir + "/2/study/series/IM-0050-0000-0003.dcm",
                        ddir + "/2/study/series/IM-0038-0047-0001.dcm": ddir + "/2/study/series/IM-0050-0000-0004.dcm",
                        ddir + "/2/study/series/IM-0038-0045-0001.dcm": ddir + "/2/study/series/IM-0050-0000-0008.dcm",
                        ddir + "/2/study/series/IM-0038-0043-0001.dcm": ddir + "/2/study/series/IM-0050-0000-0009.dcm",
                        ddir + "/2/study/series/IM-0038-0042-0001.dcm": ddir + "/2/study/series/IM-0050-0000-0010.dcm",
                        ddir + "/2/study/series/IM-0038-0040-0001.dcm": ddir + "/2/study/series/IM-0050-0000-0011.dcm",
                        ddir + "/2/study/series/IM-0038-0041-0001.dcm": ddir + "/2/study/series/IM-0050-0000-0012.dcm",
                        ddir + "/2/study/series/IM-0038-0039-0001.dcm": ddir + "/2/study/series/IM-0050-0000-0013.dcm",
                        ddir + "/2/study/series/IM-0038-0037-0001.dcm": ddir + "/2/study/series/IM-0050-0000-0014.dcm",
                        ddir + "/2/study/series/IM-0038-0038-0001.dcm": ddir + "/2/study/series/IM-0050-0000-0015.dcm",
                        ddir + "/2/study/series/IM-0038-0036-0001.dcm": ddir + "/2/study/series/IM-0050-0000-0016.dcm"
                },
.
.
.
}

        if test:
            del mappings["10"]
            del mappings["2"]
            del mappings["3"]
            del mappings["4"]
            del mappings["5"]
            del mappings["6"]
            del mappings["7"]
        else:
            del mappings["8"]
            del mappings["9"]
            del mappings["1"]

        for patient_id, mapping in mappings.items():
            patient_dir = ddir + "/" + patient_id + "/study/series"
            path_list = os.listdir(patient_dir)
            series_map = {} #associa ad ogni serie una lista di file
            series_limits = {} #associa ad ogni serie la prima e ultima posizione

            for img_name in path_list:
                img_path = patient_dir + "/" + img_name
                match = re.match("IM-([0-9]+)-[0-9]+.dcm", img_name)
                if (match is not None):
                    series_id = match.group(1)
                    if (series_id not in series_map):
                        series_map[series_id] = []
                    series_map[series_id].append(img_path)
            for series_id, img_paths in series_map.items():
                sorted_paths = sorted(img_paths)
                series_map[series_id] = sorted_paths

                first_path = sorted_paths[0]
                second_path = sorted_paths[1]
                last_path = sorted_paths[-1]

                first_file = pydicom.read_file(first_path)
                if (first_file.SeriesDescription == "BIOPSIA ASSIALI"):
                    first_z = first_file.ImagePositionPatient[2]
                    last_file = pydicom.read_file(last_path)
                    last_z = last_file.ImagePositionPatient[2]
                    second_file = pydicom.read_file(second_path)
                    second_z = second_file.ImagePositionPatient[2]
                    step = second_z - first_z
                    series_limits[series_id] = (first_z, last_z, step)
            for segmented_path, metadata_path in mapping.items():
                segmented_z = pydicom.read_file(segmented_path).ImagePositionPatient[2]
                img_paths = None
                for series_id, (series_first, series_last, series_step) in series_limits.items():
                    direct = series_step > 0
                    if ((direct and segmented_z >= series_first and segmented_z <= series_last) or (not direct and segmented_z <= series_first and segmented_z >= series_last)):
                        img_id = round((segmented_z - first_z) / step)
                        img_paths = []
                        for i in [img_id - 1, img_id, img_id + 1]:
                            if i < 0 or i >= len(series_map[series_id]):
                                img_paths.append(None)
                            else:
                                img_paths.append(series_map[series_id][i])
                        break
                if (img_paths is not None):
                    self.dset.append((img_paths, metadata_path))

    def __len__(self):
        return len(self.dset)

    def __getitem__(self, idx):
        img_paths, metadata_path = self.dset[idx]
        print(img_paths, metadata_path)
        scans_arrays = []
        for img_path in img_paths:
            file = None
            if img_path is not None:
                try:
                    file = pydicom.read_file(img_path)
                    if "PixelData" in file and len(file.pixel_array.shape) == 2:
                        scans_arrays.append(file.pixel_array / 4000)
                    else:
                        scans_arrays.append(None)
                except pydicom.errors.InvalidDicomError:
                    scans_arrays.append(None)
            if file is None:
                scans_arrays.append(None)

        if scans_arrays[1] is None:
            scans_dss = np.zeros((3, 512, 512), dtype = float)
        else:
            for i in [0, 2]:
                if scans_arrays[i] is None:
                    scans_arrays[i] = np.zeros(scans_arrays[1].shape, dtype = float)
            if (scans_arrays[0].shape != scans_arrays[1].shape or scans_arrays[1].shape != scans_arrays[2].shape):
                scans_dss = np.zeros((3, 512, 512), dtype = float)
            else:
                scans_dss = np.array(scans_arrays, dtype = float)

        findpoints = lambda f: re.findall("{[0-9.]+, [0-9.]+}", f.EncapsulatedDocument.decode('unicode_escape'))
        pointcoords = lambda point: tuple([float(coord) * 224 / 512 for coord in re.findall("[0-9.]+", point)])

        segimg = np.zeros((224, 224), np.int64)
        coords = [pointcoords(match) for match in findpoints(pydicom.read_file(metadata_path)) if match != '{0, 0}']
        image = Image.fromarray(segimg, mode='1')
        draw = ImageDraw.Draw(image)
        draw.polygon(coords, fill = 1)
        #image.resize((224, 224))
        seg_dss = np.asarray(image).astype(np.int64)

        return [torch.from_numpy(scans_dss).float(), torch.from_numpy(np.asarray(seg_dss)).long()]
    ```
...