Преобразование вложенного цикла в lapply - PullRequest
0 голосов
/ 26 декабря 2018

Мой код ниже создает объект listData, используя неэффективный двойной цикл for:

data = data.frame(ID = paste0("ID", 1:10), A.1 = rnorm(10,3), A.2 = rnorm(10,3), B.1 = rnorm(10,5), B.2 = rnorm(10,5), C.1 = rnorm(10,7), C.2 = rnorm(10,7), D.1 = rnorm(10,9), D.2 = rnorm(10,9))
colGroups = c("ID", "A", "A", "B", "B", "C", "C", "D", "D")
colNames = unique(colGroups)[-1]

k=1
listData = list()
seqVec <- seq(1, length(colNames)-1)
for (i in seq_along(seqVec)){
    for (j in (i+1):length(colNames)){
        group1 = colNames[i]
        group2 = colNames[j]
        datSel <- cbind(ID=data$ID, data[,which(colGroups %in%
                                                    c(group1, group2))])
        listData[[k]] <- datSel
        k = k +1
    }
}

Можно ли переписать этот код в формате lapply, одновременно используя seq_along () вместо seq (1,длина (var)) формат?

Ответы [ 2 ]

0 голосов
/ 26 декабря 2018
library( tidyverse )

#get unique column-identifiers (i.e. A, B, C, D)
cols.unique <- names( data[-1] ) %>% str_extract( "\\D" ) %>% unique()

#define unique combinations of cols.unique, put in a list (SIMPLIFY = FALSE)
cols.combn <- combn( cols.unique, 2, simplify = FALSE )

#loop over the unique combinations, 
#  select the colums that start with the entries from the list with unique combinations
lapply( cols.combn, function(x) { data %>% select( starts_with( x[1] ), starts_with(x[2] ) ) } )

выход

# [[1]]
#          A.1      A.2      B.1      B.2
# 1  2.5204597 2.929385 4.481089 4.329139
# 2  4.0151392 4.109071 3.917436 4.713756
# 3  3.0194702 2.736921 3.761927 6.398563
# 4  1.6708995 3.658086 5.487556 4.590937
# 5  0.9975245 2.283583 4.288010 5.831028
# 6  1.0433955 3.367433 4.286832 5.146243
# 7  3.5847534 5.046948 6.168791 2.798172
# 8  3.1649556 1.287304 3.970953 5.188648
# 9  4.6763107 4.609052 6.575959 5.209499
# 10 3.2906488 2.905176 6.722880 5.219465
# 
# [[2]]
#          A.1      A.2      C.1      C.2
# 1  2.5204597 2.929385 7.153721 6.097695
# 2  4.0151392 4.109071 6.613263 7.992791
# 3  3.0194702 2.736921 6.257132 8.606027
# 4  1.6708995 3.658086 7.023705 6.415313
# 5  0.9975245 2.283583 6.341729 7.856898
# 6  1.0433955 3.367433 6.218319 5.116963
# 7  3.5847534 5.046948 7.324243 6.590088
# 8  3.1649556 1.287304 7.083322 6.258658
# 9  4.6763107 4.609052 6.546216 7.601150
# 10 3.2906488 2.905176 6.685511 6.274723
# 
# [[3]]
#          A.1      A.2       D.1       D.2
# 1  2.5204597 2.929385  8.262144  7.777309
# 2  4.0151392 4.109071 10.631516  9.629164
# 3  3.0194702 2.736921  8.826980  8.499999
# 4  1.6708995 3.658086 10.149039 11.120339
# 5  0.9975245 2.283583  9.231830 10.150376
# 6  1.0433955 3.367433  8.787548  9.665438
# 7  3.5847534 5.046948  9.398839  8.783267
# 8  3.1649556 1.287304  7.501346  8.974553
# 9  4.6763107 4.609052  8.538673  9.282650
# 10 3.2906488 2.905176 10.204048  9.954522
# 
# [[4]]
#         B.1      B.2      C.1      C.2
# 1  4.481089 4.329139 7.153721 6.097695
# 2  3.917436 4.713756 6.613263 7.992791
# 3  3.761927 6.398563 6.257132 8.606027
# 4  5.487556 4.590937 7.023705 6.415313
# 5  4.288010 5.831028 6.341729 7.856898
# 6  4.286832 5.146243 6.218319 5.116963
# 7  6.168791 2.798172 7.324243 6.590088
# 8  3.970953 5.188648 7.083322 6.258658
# 9  6.575959 5.209499 6.546216 7.601150
# 10 6.722880 5.219465 6.685511 6.274723
# 
# [[5]]
#         B.1      B.2       D.1       D.2
# 1  4.481089 4.329139  8.262144  7.777309
# 2  3.917436 4.713756 10.631516  9.629164
# 3  3.761927 6.398563  8.826980  8.499999
# 4  5.487556 4.590937 10.149039 11.120339
# 5  4.288010 5.831028  9.231830 10.150376
# 6  4.286832 5.146243  8.787548  9.665438
# 7  6.168791 2.798172  9.398839  8.783267
# 8  3.970953 5.188648  7.501346  8.974553
# 9  6.575959 5.209499  8.538673  9.282650
# 10 6.722880 5.219465 10.204048  9.954522
# 
# [[6]]
#         C.1      C.2       D.1       D.2
# 1  7.153721 6.097695  8.262144  7.777309
# 2  6.613263 7.992791 10.631516  9.629164
# 3  6.257132 8.606027  8.826980  8.499999
# 4  7.023705 6.415313 10.149039 11.120339
# 5  6.341729 7.856898  9.231830 10.150376
# 6  6.218319 5.116963  8.787548  9.665438
# 7  7.324243 6.590088  9.398839  8.783267
# 8  7.083322 6.258658  7.501346  8.974553
# 9  6.546216 7.601150  8.538673  9.282650
# 10 6.685511 6.274723 10.204048  9.954522
0 голосов
/ 26 декабря 2018

Мы можем использовать combn на unique подстроке names, grep для возврата имен столбцов на основе сходства и поднабора «данных» на основе имен столбцов

out <- combn(unique(sub("\\.\\d+", "", names(data)[-1])), 2, function(x)
    data[c('ID', grep(paste(x, collapse= "|"), names(data)[-1], value = TRUE))],
          simplify = FALSE)

identical(out, listData)
#[1] TRUE

Если мы просто ищем перевод цикла for на lapply

out2 <- do.call(c, lapply(seqVec, function(i) 
   lapply((i+1):length(colNames),  function(j) 
      cbind(ID = data$ID, data[colGroups %in% colNames[c(i, j)]]))))
...