Как ускорить простую задачу оптимизации линейной алгебры в Юлии? - PullRequest
1 голос
/ 03 июня 2019

Я реализовал метод обнаружения точек изменения LSDD, описанный в [1] в Julia, чтобы посмотреть, смогу ли я сделать это быстрее, чем существующая реализация python [2], которая основана на поиске в сетке, который ищет оптимальные параметры.

Я получаю желаемые результаты, но, несмотря на все мои усилия, моя версия поиска по сетке требует примерно того же времени, что и Python, что все еще слишком долго для реальных приложений.Я также попытался использовать пакет Optimize, который только усугубляет ситуацию (в 2 или 3 раза медленнее).

Вот поиск по сетке, который я реализовал:

using Random
using LinearAlgebra

function squared_distance(X::Array{Float64,1},C::Array{Float64,1})
    sqd = zeros(length(X),length(C))
    for i in 1:length(X)
        for j in 1:length(C)
            sqd[i,j] = X[i]^2 + C[j]^2 - 2*X[i]*C[j]
        end
    end
    return sqd
end

function lsdd(x::Array{Float64,1},y::Array{Float64,1}; folds = 5, sigma_list = nothing , lambda_list = nothing)
    lx,ly = length(x), length(y)
    b = min(lx+ly,300)
    C = shuffle(vcat(x,y))[1:b]
    CC_dist2 = squared_distance(C,C)
    xC_dist2, yC_dist2 = squared_distance(x,C), squared_distance(y,C)
    Tx,Ty = length(x) - div(lx,folds), length(y) - div(ly,folds)
    #Define the training and testing data sets
    cv_split1, cv_split2 = floor.(collect(1:lx)*folds/lx), floor.(collect(1:ly)*folds/ly)
    cv_index1, cv_index2 = shuffle(cv_split1), shuffle(cv_split2)
    tr_idx1,tr_idx2 = [findall(x->x!=i,cv_index1) for i in 1:folds], [findall(x->x!=i,cv_index2) for i in 1:folds]
    te_idx1,te_idx2 = [findall(x->x==i,cv_index1) for i in 1:folds], [findall(x->x==i,cv_index2) for i in 1:folds]
    xTr_dist, yTr_dist = [xC_dist2[i,:] for i in tr_idx1], [yC_dist2[i,:] for i in tr_idx2]
    xTe_dist, yTe_dist = [xC_dist2[i,:] for i in te_idx1], [yC_dist2[i,:] for i in te_idx2]

    if sigma_list == nothing
        sigma_list = [0.25, 0.5, 0.75, 1, 1.2, 1.5, 2, 2.5, 2.2, 3, 5]
    end
    if lambda_list == nothing
        lambda_list = [1.00000000e-03, 3.16227766e-03, 1.00000000e-02, 3.16227766e-02,
       1.00000000e-01, 3.16227766e-01, 1.00000000e+00, 3.16227766e+00,
       1.00000000e+01]
    end
    #memory prealocation
    score_cv = zeros(length(sigma_list),length(lambda_list))
    H = zeros(b,b)
    hx_tr, hy_tr = [zeros(b,1) for i in 1:folds], [zeros(b,1) for i in 1:folds]
    hx_te, hy_te = [zeros(1,b) for i in 1:folds], [zeros(1,b) for i in 1:folds]
    #h_tr,h_te = zeros(b,1), zeros(1,b)
    theta = zeros(b)

    for (sigma_idx,sigma) in enumerate(sigma_list)
        #the expression of H is different for higher dimension
        #H = sqrt((sigma^2)*pi)*exp.(-CC_dist2/(4*sigma^2))
        set_H(H,CC_dist2,sigma,b)
        #check if the sum is performed along the right dimension
        set_htr(hx_tr,xTr_dist,sigma,Tx), set_htr(hy_tr,yTr_dist,sigma,Ty)
        set_hte(hx_te,xTe_dist,sigma,lx-Tx), set_hte(hy_te,yTe_dist,sigma,ly-Ty)
        for i in 1:folds
            h_tr = hx_tr[i] - hy_tr[i]
            h_te = hx_te[i] - hy_te[i]
            #set_h(h_tr,hx_tr[i],hy_tr[i],b)
            #set_h(h_te,hx_te[i],hy_te[i],b)
            for (lambda_idx,lambda) in enumerate(lambda_list)
                set_theta(theta,H,lambda,h_tr,b)
                score_cv[sigma_idx,lambda_idx] += dot(theta,H*theta) - 2*dot(theta,h_te)
            end
        end
    end
    #retrieve the value of the optimal parameters
    sigma_chosen = sigma_list[findmin(score_cv)[2][2]]
    lambda_chosen = lambda_list[findmin(score_cv)[2][2]]
    #calculating the new "optimal" solution
    H = sqrt((sigma_chosen^2)*pi)*exp.(-CC_dist2/(4*sigma_chosen^2))
    H_lambda = H + lambda_chosen*Matrix{Float64}(I, b, b)
    h = (1/lx)*sum(exp.(-xC_dist2/(2*sigma_chosen^2)),dims = 1) - (1/ly)*sum(exp.(-yC_dist2/(2*sigma_chosen^2)),dims = 1)
    theta_final =  H_lambda\transpose(h)
    f = transpose(theta_final).*sum(exp.(-vcat(xC_dist2,yC_dist2)/(2*sigma_chosen^2)),dims = 1)
    L2 = 2*dot(theta_final,h) - dot(theta_final,H*theta_final)
    return L2
end

function set_H(H::Array{Float64,2},dist::Array{Float64,2},sigma::Float64,b::Int16)
    for i in 1:b
        for j in 1:b
            H[i,j] = sqrt((sigma^2)*pi)*exp(-dist[i,j]/(4*sigma^2))
        end
    end
end

function set_theta(theta::Array{Float64,1},H::Array{Float64,2},lambda::Float64,h::Array{Float64,2},b::Int64)
    Hl = (H + lambda*Matrix{Float64}(I, b, b))
    LAPACK.posv!('L', Hl, h)
    theta = h
end

function set_htr(h::Array{Float64,1},dists::Array{Float64,2},sigma::Float64,T::Int16)
    for (CVidx,dist) in enumerate(dists)
        for (idx,value) in enumerate((1/T)*sum(exp.(-dist/(2*sigma^2)),dims = 1))
            h[CVidx][idx] = value
        end
    end
end

function set_hte(h::Array{Float64,1},dists::Array{Float64,2},sigma::Array{Float64,1},T::Int16)
    for (CVidx,dist) in enumerate(dists)
        for (idx,value) in enumerate((1/T)*sum(exp.(-dist/(2*sigma^2)),dims = 1))
            h[CVidx][idx] = value
        end
    end
end

function set_h(h,h1,h2,b)
    for i in 1:b
        h[i] = h1[i] - h2[i]
    end
end

set_H, set_h и set_thetaфункции есть, потому что я где-то читал, что изменение предварительно выделенной памяти на месте с помощью функции было быстрее, но это не имело большого значения.
Для проверки я использую два случайных распределения в качестве входных данных:

x,y = rand(500),1.5*rand(500)
lsdd(x,y) #returns a value around 0.3

Теперь вот версия кода, где я пытаюсь использовать Оптимизатор:

function Theta(sigma::Float64,lambda::Float64,x::Array{Float64,1},y::Array{Float64,1},folds::Int8)
    lx,ly = length(x), length(y)
    b = min(lx+ly,300)
    C = shuffle(vcat(x,y))[1:b]
    CC_dist2 = squared_distance(C,C)
    xC_dist2, yC_dist2 = squared_distance(x,C), squared_distance(y,C)
    #the subsets are not be mutually exclusive !
    Tx,Ty = length(x) - div(lx,folds), length(y) - div(ly,folds)
    shuffled_x, shuffled_y = [shuffle(1:lx) for i in 1:folds], [shuffle(1:ly) for i in 1:folds]
    cv_index1, cv_index2 = floor.(collect(1:lx)*folds/lx)[shuffle(1:lx)], floor.(collect(1:ly)*folds/ly)[shuffle(1:ly)]
    tr_idx1,tr_idx2 = [i[1:Tx] for i in shuffled_x], [i[1:Ty] for i in shuffled_y]
    te_idx1,te_idx2 = [i[Tx:end] for i in shuffled_x], [i[Ty:end] for i in shuffled_y]
    xTr_dist, yTr_dist = [xC_dist2[i,:] for i in tr_idx1], [yC_dist2[i,:] for i in tr_idx2]
    xTe_dist, yTe_dist = [xC_dist2[i,:] for i in te_idx1], [yC_dist2[i,:] for i in te_idx2]
    score_cv = 0
    Id = Matrix{Float64}(I, b, b)
    H = sqrt((sigma^2)*pi)*exp.(-CC_dist2/(4*sigma^2))
    hx_tr, hy_tr = [transpose((1/Tx)*sum(exp.(-dist/(2*sigma^2)),dims = 1)) for dist in xTr_dist], [transpose((1/Ty)*sum(exp.(-dist/(2*sigma^2)),dims = 1)) for dist in yTr_dist]
    hx_te, hy_te  = [(lx-Tx)*sum(exp.(-dist/(2*sigma^2)),dims = 1) for dist in xTe_dist], [(ly-Ty)*sum(exp.(-dist/(2*sigma^2)),dims = 1) for dist in yTe_dist]
    for i in 1:folds
        h_tr, h_te = hx_tr[i] - hy_tr[i], hx_te[i] - hy_te[i]
        #theta = (H + lambda * Id)\h_tr
        theta = copy(h_tr)
        Hl = (H + lambda*Matrix{Float64}(I, b, b))
        LAPACK.posv!('L', Hl, theta)
        score_cv += dot(theta,H*theta) - 2*dot(theta,h_te)
    end
    return score_cv,(CC_dist2,xC_dist2,yC_dist2)
end

function cost(params::Array{Float64,1},x::Array{Float64,1},y::Array{Float64,1},folds::Int8)
    s,l = params[1],params[2]
    return Theta(s,l,x,y,folds)[1]
end

"""
Performs the optinization
"""

function lsdd3(x::Array{Float64,1},y::Array{Float64,1}; folds = 4)
    start = [1,0.1]
    b = min(length(x)+length(y),300)
    lx,ly = length(x),length(y)
    #result = optimize(params -> cost(params,x,y,folds),fill(0.0,2),fill(50.0,2),start, Fminbox(LBFGS(linesearch=LineSearches.BackTracking())); autodiff = :forward)
    result = optimize(params -> cost(params,x,y,folds),start, BFGS(),Optim.Options(f_calls_limit = 5, iterations = 5))
    #bboptimize(rosenbrock2d; SearchRange = [(-5.0, 5.0), (-2.0, 2.0)])
    #result = optimize(cost,[0,0],[Inf,Inf],start, Fminbox(AcceleratedGradientDescent()))
    sigma_chosen,lambda_chosen = Optim.minimizer(result)
    CC_dist2, xC_dist2, yC_dist2 = Theta(sigma_chosen,lambda_chosen,x,y,folds)[2]
    H = sqrt((sigma_chosen^2)*pi)*exp.(-CC_dist2/(4*sigma_chosen^2))
    h = (1/lx)*sum(exp.(-xC_dist2/(2*sigma_chosen^2)),dims = 1) - (1/ly)*sum(exp.(-yC_dist2/(2*sigma_chosen^2)),dims = 1)
    theta_final = (H + lambda_chosen*Matrix{Float64}(I, b, b))\transpose(h)
    f = transpose(theta_final).*sum(exp.(-vcat(xC_dist2,yC_dist2)/(2*sigma_chosen^2)),dims = 1)
    L2 = 2*dot(theta_final,h) - dot(theta_final,H*theta_final)
    return L2
end

Независимо от того, какую опцию я использую в оптимизаторе, я всегда получаю что-то слишком медленное.Может быть, поиск по сетке - лучший вариант, но я не знаю, как сделать это быстрее ... У кого-нибудь есть идеи, как мне действовать дальше?

[1]: http://www.mcduplessis.com/wp-content/uploads/2016/05/Journal-IEICE-2014-CLSDD-1.pdf
[2]: http://www.ms.k.u -tokyo.ac.jp / software.html

...