Эффективное (быстрое) двоичное дерево в Фортране - PullRequest
0 голосов
/ 06 июня 2018

Я использую процедуру в следующем коде (которую я взял из здесь ) для программы, которую я пытаюсь сделать как можно быстрее.Процедура, однако, очень медленная, поскольку она, вероятно, оптимизирована для педагогических целей, а не для ускорения.

program tree_sort
! Sorts a file of integers by building a
! tree, sorted in infix order.
! This sort has expected behavior n log n,
! but worst case (input is sorted) n ** 2.

   implicit none
   type node
      integer :: value
      type (node), pointer :: left, right
   end type node

   type (node), pointer :: t  ! A tree
   integer :: number, ios

   nullify (t)  ! Start with empty tree
   do
      read (*, *, iostat = ios) number
      if (ios < 0) exit
      call insert (t, number) ! Put next number in tree
   end do
   ! Print nodes of tree in infix order
   call print_tree (t)

contains

   recursive subroutine insert (t, number)

      type (node), pointer :: t  ! A tree
      integer, intent (in) :: number

      ! If (sub)tree is empty, put number at root
      if (.not. associated (t)) then
         allocate (t)
         t % value = number
         nullify (t % left)
         nullify (t % right)
      ! Otherwise, insert into correct subtree
      else if (number < t % value) then
         call insert (t % left, number)
      else
         call insert (t % right, number)
      end if

   end subroutine insert

   recursive subroutine print_tree (t)
   ! Print tree in infix order

      type (node), pointer :: t  ! A tree

      if (associated (t)) then
         call print_tree (t % left)
         print *, t % value
         call print_tree (t % right)
      end if

   end subroutine print_tree

end program tree_sort

Есть ли способ ускорить его?Я использую процедуру для последовательного добавления элементов в вектор без добавления повторяющихся (поэтому я изменил else в подпрограмме insert на else if (number > t % value) then. Кроме этого, вместо печати я сохраняю значения в глобальной переменной.

Редактировать:

Вот фактический код:

MODULE MOD_PARAMETERS

USE, INTRINSIC :: ISO_FORTRAN_ENV
IMPLICIT NONE
SAVE

INTEGER(INT32), PARAMETER :: d     = 10    ! number of dimensions
INTEGER(INT32), PARAMETER :: L_0   = 5     ! after this adaptive grid kicks in, for L <= L_0 usual sparse grid
INTEGER(INT32), PARAMETER :: L_max = 5     ! maximum level
INTEGER(INT32), PARAMETER :: bound = 1     ! 0 -> for f = 0 at boundary
                                           ! 1 -> adding grid points at boundary
                                           ! 2 -> extrapolating close to boundary

INTEGER(INT32), PARAMETER :: testing_sample = 10**4
INTEGER(INT32), PARAMETER :: error_sample = 10**2

REAL(REAL64), PARAMETER :: eps = 0.001D0   ! epsilon for adaptive grid

TYPE NODE
  INTEGER :: value
  TYPE (NODE), POINTER :: left, right
END TYPE NODE

INTEGER(INT32), DIMENSION(:), ALLOCATABLE :: tree_vector
INTEGER(INT32) :: iii

END MODULE MOD_PARAMETERS

SUBROUTINE FF(x,output)

    USE MOD_PARAMETERS
    IMPLICIT NONE

    REAL(REAL64), DIMENSION(d), INTENT(IN)  :: x
    REAL(REAL64)              , INTENT(OUT) :: output

    output = 1.0D0/(ABS(0.5D0-SUM(x(:)**4.0D0))+0.1D0)

END SUBROUTINE

SUBROUTINE XX(n,L,i,output)

    USE MOD_PARAMETERS
    IMPLICIT NONE

    INTEGER(INT32)              , INTENT(IN)  :: n
    INTEGER(INT32), DIMENSION(n), INTENT(IN)  :: L, i
    REAL(REAL64),   DIMENSION(n), INTENT(OUT) :: output

    INTEGER(INT32) :: j

    DO j = 1,n
        IF ((bound .EQ. 0) .OR. (bound .EQ. 2)) THEN
            output(j) = REAL(i(j),REAL64)/REAL(2**L(j),REAL64)
        ELSEIF (bound .EQ. 1) THEN
            output(j) = REAL(i(j),REAL64)/REAL(2**MAX(L(j)-1,1),REAL64)
        ENDIF
    ENDDO

END SUBROUTINE

SUBROUTINE XX_INV(L,x,output)

    USE MOD_PARAMETERS
    IMPLICIT NONE

    INTEGER(INT32), DIMENSION(d), INTENT(IN)  :: L
    REAL(REAL64),   DIMENSION(d), INTENT(IN)  :: x
    INTEGER(INT32), DIMENSION(d), INTENT(OUT) :: output

    INTEGER(INT32) :: j

    DO j = 1,d
        IF ((bound .EQ. 0) .OR. (bound .EQ. 2)) THEN
            output(j) = 2*FLOOR(x(j)*REAL(2**(L(j)-1),REAL64))+1
        ELSEIF (bound .EQ. 1) THEN
            IF (L(j) .EQ. 2) THEN
                IF (x(j) .LT. 0.5D0) THEN
                    output(j) = 0
                ELSE
                    output(j) = 2
                ENDIF
            ELSE
                output(j) = 2*FLOOR(x(j)*(REAL(2**MAX(L(j)-2,0),REAL64)))+1
            ENDIF
        ENDIF
    ENDDO

END SUBROUTINE

SUBROUTINE BASE(x,L,i,output)

    USE MOD_PARAMETERS
    IMPLICIT NONE

    REAL(REAL64),   INTENT(IN)  :: x
    INTEGER(INT32), INTENT(IN)  :: L,i
    REAL(REAL64),   INTENT(OUT) :: output

    IF (bound .EQ. 0) THEN
        output = MAX((1.0D0-ABS(x*REAL(2**L,REAL64)-REAL(i,REAL64))),0.0D0)
    ELSEIF (bound .EQ. 1) THEN
        IF ((L .EQ. 1) .AND. (i .EQ. 1)) THEN
            output = 1.0D0
        ELSEIF ((L .EQ. 2) .AND. (i .EQ. 0)) THEN
            output = MAX(1.0D0-2.0D0*x,0.0D0)
        ELSEIF ((L .EQ. 2) .AND. (i .EQ. 2)) THEN
            output = MAX(2.0D0*x-1.0D0,0.0D0)
        ELSE
            output = MAX((1.0D0-ABS(x*REAL(2**(L-1),REAL64)-REAL(i,REAL64))),0.0D0)
        ENDIF
    ELSEIF (bound .EQ. 2) THEN
        IF ((L .EQ. 1) .AND. (i .EQ. 1)) THEN
            output = 1.0D0
        ELSEIF ((L .GT. 1) .AND. (i .EQ. 1)) THEN
            output = MAX(2.0D0-REAL(2**L,REAL64)*x,0.0D0)
        ELSEIF ((L .GT. 1) .AND. (i .EQ. (2**L)-1)) THEN
            output = MAX(REAL(2**L,REAL64)*x+REAL(1-i,REAL64),0.0D0)
        ELSE
            output = MAX((1.0D0-ABS(x*REAL(2**L,REAL64)-REAL(i,REAL64))),0.0D0)
        ENDIF
    ENDIF

END SUBROUTINE

PROGRAM MAIN

USE MOD_PARAMETERS
IMPLICIT NONE

INTEGER(INT32), DIMENSION(d,d)                :: ident
REAL(REAL64),   DIMENSION(1)                  :: x1
REAL(REAL64),   DIMENSION(d)                  :: xd
INTEGER(INT32), DIMENSION(2*d)                :: temp
INTEGER(INT32), DIMENSION(:,:),   ALLOCATABLE :: grid_index, temp_grid_index, grid_index_new, J_index, &
                                                 adj_list, temp_adj_list
INTEGER(INT32), DIMENSION(:),     ALLOCATABLE :: to_do, to_do_new, to_add_ind
REAL(REAL64),   DIMENSION(:),     ALLOCATABLE :: coeff, temp_coeff, J_coeff

REAL(REAL64)  :: temp_min, temp_max, V, T, B, F
INTEGER(INT32) :: i, k, k1, k2, h, j, L, n, dd, dsize, count, count1, count2, count3, flag, &
                  first, repeated, add, ind, adj_list_ind


INTEGER(INT32) :: time1, time2, time3, time4, clock_rate, clock_max

INTEGER(INT32), DIMENSION(d) :: LL, ii

REAL(REAL64), DIMENSION(error_sample,d) :: sample_x
REAL(REAL64), DIMENSION(error_sample)   :: sample_e, interp1
REAL(REAL64)                            :: max_error, L2_error

REAL(REAL64), DIMENSION(testing_sample,d) :: x_rand
REAL(REAL64), DIMENSION(testing_sample)   :: interp2

TYPE(NODE), POINTER :: tree

! ============================================================================
! EXECUTABLE
! ============================================================================

ident = 0
DO i = 1,d
    ident(i,i) = 1
ENDDO

! Initial grid point
dsize = 1
ALLOCATE(grid_index(dsize,2*d),grid_index_new(dsize,2*d),adj_list(dsize,2*d))
grid_index(1,:) = 1
grid_index_new  = grid_index
adj_list        = 0

ALLOCATE(coeff(0:dsize))
coeff(0) = 0.0D0
xd = 0.5D0
CALL FF(xd,coeff(1))

L = 1
n = SIZE(grid_index_new,1)
ALLOCATE(J_index(n*2*d,2*d))
ALLOCATE(J_coeff(n*2*d))
ALLOCATE(to_add_ind(1))
to_add_ind = 1

CALL RANDOM_NUMBER(sample_x)
sample_e = 0.0D0

CALL SYSTEM_CLOCK (time1,clock_rate,clock_max)

DO WHILE (L .LT. L_max)

    CALL SYSTEM_CLOCK (time3,clock_rate,clock_max)
    L       = L+1
    n       = SIZE(grid_index_new,1)
    count   = 0
    first   = 1
    DEALLOCATE(J_index,J_coeff)
    ALLOCATE(J_index(n*2*d,2*d))
    ALLOCATE(J_coeff(n*2*d))
    J_index = 0
    J_coeff = 0.0D0
    DO k = 1,n
        adj_list_ind = 0
        DO i = 1,d
            DO j = 1,2
                IF ((bound .EQ. 0) .OR. (bound .EQ. 2)) THEN
                    temp = grid_index_new(k,:)+(/ident(i,:),ident(i,:)*(grid_index_new(k,d+i)-(-1)**j)/)
                ELSEIF (bound .EQ. 1) THEN
                    IF (grid_index_new(k,i) .EQ. 1) THEN
                        temp = grid_index_new(k,:)+(/ident(i,:),ident(i,:)*(-(-1)**j)/)
                    ELSE
                        temp = grid_index_new(k,:)+(/ident(i,:),ident(i,:)*(grid_index_new(k,d+i)-(-1)**j)/)
                    ENDIF
                ENDIF
                CALL XX(d,temp(1:d),temp(d+1:2*d),xd)
                temp_min = MINVAL(xd)
                temp_max = MAXVAL(xd)
                IF ((temp_min .GE. 0.0D0) .AND. (temp_max .LE. 1.0D0)) THEN
                    IF (first .EQ. 1) THEN
                        first = 0
                        count = count+1
                        J_index(count,:) = temp
                        V = 0.0D0
                        DO k1 = 1,SIZE(grid_index,1)
                            T = 1.0D0
                            DO k2 = 1,d
                                CALL XX(1,temp(k2),temp(d+k2),x1)
                                CALL BASE(x1(1),grid_index(k1,k2),grid_index(k1,k2+d),B)
                                T = T*B
                            ENDDO
                            V = V+coeff(k1)*T
                        ENDDO
                        CALL FF(xd,F)
                        J_coeff(count) = F-V
                        adj_list(to_add_ind(k),adj_list_ind+1) = dsize+count
                        adj_list_ind = adj_list_ind+1
                    ELSE
                        repeated = 0
                        DO h = 1,count
                            IF (SUM(ABS(J_index(h,:)-temp)) .EQ. 0) THEN
                                repeated = 1
                                adj_list(to_add_ind(k),adj_list_ind+1) = dsize+h
                                adj_list_ind = adj_list_ind+1
                            ENDIF
                        ENDDO
                        IF (repeated .EQ. 0) THEN
                            count = count+1
                            J_index(count,:) = temp
                            V = 0.0D0
                            DO k1 = 1,SIZE(grid_index,1)
                                T = 1.0D0
                                DO k2 = 1,d
                                    CALL XX(1,temp(k2),temp(d+k2),x1)
                                    CALL BASE(x1(1),grid_index(k1,k2),grid_index(k1,k2+d),B)
                                    T = T*B
                                ENDDO
                                V = V+coeff(k1)*T
                            ENDDO
                            CALL FF(xd,F)
                            J_coeff(count) = F-V
                            adj_list(to_add_ind(k),adj_list_ind+1) = dsize+count
                            adj_list_ind = adj_list_ind+1
                        ENDIF
                    ENDIF
                ENDIF
            ENDDO
        ENDDO
    ENDDO

    ALLOCATE(temp_grid_index(dsize,2*d))
    ALLOCATE(temp_coeff(dsize))
    temp_grid_index = grid_index
    temp_coeff      = coeff
    DEALLOCATE(grid_index,coeff)
    ALLOCATE(grid_index(dsize+count,2*d))
    ALLOCATE(coeff(0:dsize+count))
    grid_index(1:dsize,:) = temp_grid_index
    coeff(0:dsize)        = temp_coeff
    DEALLOCATE(temp_grid_index,temp_coeff)
    grid_index(dsize+1:dsize+count,:) = J_index(1:count,:)
    coeff(dsize+1:dsize+count)        = J_coeff(1:count)

    IF (L .LT. L_max) THEN ! put this after error threshhold when implemented
        ALLOCATE(temp_adj_list(dsize,2*d))
        temp_adj_list = adj_list
        DEALLOCATE(adj_list)
        ALLOCATE(adj_list(dsize+count,2*d))
        adj_list            = 0
        adj_list(1:dsize,:) = temp_adj_list
        DEALLOCATE(temp_adj_list)
    ENDIF

    dsize = dsize + count

    IF (L .LE. L_0) THEN
        DEALLOCATE(grid_index_new)
        ALLOCATE(grid_index_new(count,2*d))
        grid_index_new = J_index(1:count,:)
        DEALLOCATE(to_add_ind)
        ALLOCATE(to_add_ind(count))
        to_add_ind = dsize-count + (/ (h,h=1,count) /)
    ELSE
        DEALLOCATE(to_add_ind)
        ALLOCATE(to_add_ind(count))
        add = 0
        to_add_ind = 0
        DO h = 1,count
            IF (ABS(J_coeff(h)) .GT. eps) THEN
                add = add + 1
                J_index(add,:) = J_index(h,:)
                to_add_ind(add) = dsize-count+h
            ENDIF
        ENDDO
        DEALLOCATE(grid_index_new)
        ALLOCATE(grid_index_new(add,2*d))
        grid_index_new = J_index(1:add,:)
    ENDIF

    DO i = 1,error_sample
        V = 0.0D0
        DO k1 = 1,SIZE(grid_index,1)
            T = 1.0D0
            DO k2 = 1,d
                CALL BASE(sample_x(i,k2),grid_index(k1,k2),grid_index(k1,k2+d),B)
                T = T*B
            ENDDO
            V = V+coeff(k1)*T
        ENDDO
        CALL FF(sample_x(i,:),F)
        sample_e(i) = F-V
        interp1(i)  = V
    ENDDO

    max_error = MAXVAL(ABS(sample_e))
    L2_error  = (SUM(sample_e**2.0D0)/REAL(error_sample,REAL64))**0.5D0

    CALL SYSTEM_CLOCK (time4,clock_rate,clock_max)
    WRITE(*,'(A,I5,A,F10.5,A,I8,A,F15.10,A,F15.10)') ' level = ', L,&
        '      time = ',REAL(time4-time3,REAL64)/REAL(clock_rate,REAL64),&
        '    grid points = ',SIZE(grid_index,1),&
        '    max error = ',max_error,&
        '    L2 error = ',L2_error

ENDDO

!PRINT *, ' '
!PRINT *, ' '
!PRINT *, ' '
!DO i = 1,SIZE(adj_list,1)
!    PRINT *, i, adj_list(i,:)
!ENDDO
!PRINT *, ' '
!PRINT *, ' '
!PRINT *, ' '
!DO i = 1,dsize
!    PRINT *, i, grid_index(i,:), coeff(i)
!ENDDO
!PRINT *, ' '
!PRINT *, ' '
!PRINT *, ' '

ALLOCATE (to_do(dsize),to_do_new(dsize),tree_vector(dsize))

CALL SYSTEM_CLOCK (time2,clock_rate,clock_max)
PRINT *, ' '
WRITE(*,'(A,F10.5)') '    total time for setup = ', REAL(time2-time1,REAL64)/REAL(clock_rate,REAL64)

! ============================================================================
!  Compute interpolated values:
! ============================================================================

IF (testing_sample .EQ. error_sample) THEN
!    x_rand = sample_x
ELSE
    CALL RANDOM_NUMBER(x_rand)
ENDIF

count1 = 0
count2 = 0
count3 = 0

CALL SYSTEM_CLOCK (time1,clock_rate,clock_max)
DO i = 1,testing_sample

    V = 0.0D0
    to_do     = 0
    to_do(1)  = 1
    to_do_new = 0
    k = 1
    DO L = 1,L_max

        NULLIFY (tree)
        tree_vector = 0

        CALL SYSTEM_CLOCK (time3,clock_rate,clock_max)
        DO j = 1,k
            ind = to_do(j)
            T = 1.0D0
            DO dd = 1,d
                CALL BASE(x_rand(i,dd),grid_index(ind,dd),grid_index(ind,d+dd),B)
                T = T*B
            ENDDO
            V = V + coeff(ind)*T
        ENDDO
        CALL SYSTEM_CLOCK (time4,clock_rate,clock_max)
        count1 = count1 + time4-time3

        IF (L .LT. L_max) THEN
            n = k
            k = 0

            DO j = 1,n
                IF (adj_list(to_do(j),1) .GT. 0) THEN
                    DO h = 1,2*d

                        CALL SYSTEM_CLOCK (time3,clock_rate,clock_max)
                        LL = grid_index(adj_list(to_do(j),h),1:d)
                        ii = grid_index(adj_list(to_do(j),h),d+1:2*d)
                        flag = 0
                        k1 = 1
                        DO WHILE ((flag .EQ. 0) .AND. (k1 .LE. d))
                            IF ((bound .EQ. 0) .OR. (bound .EQ. 2)) THEN
                                k2 = 2*FLOOR(x_rand(i,k1)*REAL(2**(LL(k1)-1),REAL64))+1
                            ELSEIF (bound .EQ. 1) THEN
                                IF (LL(k1) .EQ. 2) THEN
                                    IF (x_rand(i,k1) .LT. 0.5D0) THEN
                                        k2 = 0
                                    ELSE
                                        k2 = 2
                                    ENDIF
                                ELSE
                                    k2 = 2*FLOOR(x_rand(i,k1)*(REAL(2**MAX(LL(k1)-2,0),REAL64)))+1
                                ENDIF
                            ENDIF
                            IF (k2 .NE. ii(k1)) THEN
                                flag = 1
                            ENDIF
                            k1 = k1 +1
                        ENDDO
                        CALL SYSTEM_CLOCK (time4,clock_rate,clock_max)
                        count2 = count2 + time4-time3

!                        CALL SYSTEM_CLOCK (time3,clock_rate,clock_max)
                        IF (flag .EQ. 0) THEN
                            !IF (MINVAL(ABS(to_do_new(1:MAX(k,1))-adj_list(to_do(j),h))) .GT. 0) THEN
                                to_do_new(k+1) = adj_list(to_do(j),h)
                                k = k+1
                                CALL SYSTEM_CLOCK (time3,clock_rate,clock_max)
                                CALL INSERT(tree,to_do_new(k))
                                CALL SYSTEM_CLOCK (time4,clock_rate,clock_max)
                                count3 = count3 + time4-time3
                            !ENDIF
                        ENDIF
!                        CALL SYSTEM_CLOCK (time4,clock_rate,clock_max)
!                        count3 = count3 + time4-time3
                    ENDDO
                ENDIF
            ENDDO

            CALL SYSTEM_CLOCK (time3,clock_rate,clock_max)
            iii = 0
            CALL PRINT_TREE(tree)
            to_do = tree_vector
            CALL SYSTEM_CLOCK (time4,clock_rate,clock_max)
            count3 = count3 + time4-time3

            !to_do = to_do_new
            to_do_new = 0
        ENDIF

    ENDDO
    interp2(i) = V
ENDDO

CALL SYSTEM_CLOCK (time2,clock_rate,clock_max)

PRINT *, ' '
WRITE(*,'(A,F10.5,A,I10)') '  time for interpolation = ', REAL(time2-time1,REAL64)/REAL(clock_rate,REAL64),&
                           '    points = ', testing_sample
PRINT *, ' '
WRITE(*,'(A,F10.5)') '           time for base = ', REAL(count1,REAL64)/REAL(clock_rate,REAL64)

PRINT *, ' '
WRITE(*,'(A,F10.5)') '          time for x_inv = ', REAL(count2,REAL64)/REAL(clock_rate,REAL64)

PRINT *, ' '
WRITE(*,'(A,F10.5)') '       time for repeated = ', REAL(count3,REAL64)/REAL(clock_rate,REAL64)

!PRINT *, ' '
!WRITE(*,'(A,F20.15)') '                   check = ', MAXVAL(ABS(interp2-interp1))

DEALLOCATE(grid_index,grid_index_new,J_index,coeff,J_coeff,adj_list,to_do,to_do_new,to_add_ind,tree_vector)

CONTAINS

RECURSIVE SUBROUTINE INSERT(tree,number)

    TYPE(NODE), POINTER :: tree
    INTEGER(INT32),  INTENT(IN) :: number

    IF (.NOT. ASSOCIATED(tree)) THEN
        ALLOCATE(tree)
        tree%value = number
        NULLIFY(tree%left)
        NULLIFY(tree%right)
    ELSEIF (number .LT. tree%value) THEN
        CALL INSERT (tree%left,number)
    ELSEIF (number .GT. tree%value) THEN
        CALL INSERT(tree%right,number)
    ENDIF

END SUBROUTINE INSERT

RECURSIVE SUBROUTINE PRINT_TREE(tree)

    TYPE (NODE), POINTER :: tree

    IF (ASSOCIATED(tree)) THEN
        CALL PRINT_TREE(tree%left)
        iii = iii+1
        tree_vector(iii) = tree%value
        CALL PRINT_TREE (tree%right)
    END IF

END SUBROUTINE PRINT_TREE

END PROGRAM

Я использую оптимизацию O3, но в противном случае нет флагов. В моемНа компьютере time for repeated (где я использую двоичное дерево) - 18,3 секунды, тогда как если я использую альтернативный метод, который комментируется в версии (с MINVAL), это займет всего 3,6 секунды.

...