Разница между многомерными и уплощенными массивами при обработке случайных лесов в python - PullRequest
1 голос
/ 02 марта 2020

Привет!

У меня есть вопрос по python обработке

Я использовал процессор Ryzen 9 3950x.

Итак, я попытался запустить Случайный лес с процессором.

Я использую следующий код

import csv
from math import sqrt
from PIL import Image
from osgeo import gdal
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


class CalculateIndex():

    def __init__(self, train, target, pif):
        self.train = train
        self.target = target

        self.train_image = gdal.Open(self.train)
        self.target_image = gdal.Open(self.target)

        self.train_array = self.train_image.ReadAsArray()
        self.target_array = self.target_image.ReadAsArray()

        if len(self.train_array.shape) == 2:
            self.band = 1
            self.height, self.width = self.train_array.shape

        else:
            self.band, self.height, self.width = self.train_array.shape

        if len(self.train_array.shape) == 2:
            self.band = 1
            self.height, self.width = self.train_array.shape

        else:
            self.band, self.height, self.width = self.train_array.shape

        self.pif_array =gdal.Open(pif).ReadAsArray()
        self.pif_location =np.where(self.pif_array!=0,self.pif_array,self.pif_array)


        print(self.pif_location)

    def intensity_index(self):

        self.all_train_intensity = []
        self.all_target_intensity = []

        self.pif_train_intensity = []
        self.pif_target_intensity = []

        for i in range(self.band):

            each_train =self.train_array[i,:,:]
            each_target =self.target_array[i,:,:]

            # Doesn't use RESHAPE 
            #self.all_train_intensity.append(each_train)
            #self.all_target_intensity.append(each_target)

            # use RESHAPE 
            self.all_train_intensity.append(each_train.reshape(1, -1))
            self.all_target_intensity.append(each_target.reshape(1, -1))

            each_train_pif =np.where(self.pif_location==0,self.train_array[i,:,:],0)
            each_target_pif =np.where(self.pif_location==0,self.target_array[i,:,:],0)

            # Doesn't use RESHAPE
            #self.pif_train_intensity.append(each_train_pif)
            #self.pif_target_intensity.append(each_target_pif)

            # use RESHAPE
            self.pif_train_intensity.append(each_train_pif.reshape(1, -1))
            self.pif_target_intensity.append(each_target_pif.reshape(1, -1))

    def save_image(self, image_array, image_name, data_type):

        trans = self.train_image.GetGeoTransform()
        proj = self.train_image.GetProjection()

        outdriver = gdal.GetDriverByName('GTiff')

        ## Data Type
        if data_type == "uint8":
            data_type_name = gdal.GDT_Byte

        elif data_type == "uint16":
            data_type_name = gdal.GDT_Uint16

        elif data_type == "float":
            data_type_name = gdal.GDT_Float64

        ## Image save
        if len(image_array.shape) == 2:
            outdata = outdriver.Create(image_name, self.width, self.height, 1, data_type_name)
            outdata.SetGeoTransform(trans)
            outdata.SetProjection(proj)
            outdata.GetRasterBand(1).WriteArray(image_array)

        else:
            outdata = outdriver.Create(image_name, self.width, self.height, image_array.shape[0], data_type_name)
            outdata.SetGeoTransform(trans)
            outdata.SetProjection(proj)

            for i in range(image_array.shape[0]):
                outdata.GetRasterBand(i + 1).WriteArray(image_array[i, :, :])


    def machine_learning(self, prediction_name):

        prediction_array = np.zeros((self.band, self.height, self.width), dtype = np.float64)

        for i in range(self.band):
            print(i)
            model =RandomForestRegressor(n_estimators=32,n_jobs=26)

            model.fit(self.pif_train_intensity[i],
                      self.pif_target_intensity[i])

            print(np.shape(self.all_train_intensity))
            prediction =model.predict(self.all_train_intensity[i])
            print(np.shape(prediction))

            rmse = sqrt(mean_squared_error(prediction, self.all_target_intensity[i]))
            print(rmse)
            print(np.shape(prediction))

            # Doesn't use  RESHAPE
            #prediction_array[i, :, :] = prediction
            # use RESHAPE
            prediction_array[i, :, :] =prediction.reshape(1000,1000)

        CalculateIndex.save_image(self,prediction_array, prediction_name, "float")



if __name__ == "__main__":

    train = "train.tif"
    target = "target.tif"
    pif = "cva_binary.tif"

    prediction_name = "predction.tif"

    ci = CalculateIndex(train, target, pif)
    ci.intensity_index()
    ci.machine_learning(prediction_name)

Я запускаю два случая о многомерности, а не. (не использует изменение формы или изменение формы)

Многоразмерный случай вызвал очень серьезную нагрузку. (Процессор загружен на 100 процентов, а градусы достигают 100 градусов.)

Однако случай изменения формы был быстрым и легким. (Процессор загружен на 30 процентов, а градусы достигают 50 градусов.)

Почему сделать это?

Потому что размер? .. изменение смещения инструкции? \

спасибо за чтение

...