Мы конвертируем factor
столбцы в character
, затем transmute
, чтобы создать tibble
'position', 'name', зацикливая строки с map
, создавая substring
s на основеразделить ширину 'n' и количество символов (nchar
) в sence, объединить 'sname' в качестве элемента first
и unnest
list
, чтобы создать набор данных из двух столбцов
library(tidyverse)
f1 <- function(dat, n, mv = 1) {
dat %>%
mutate_all(as.character) %>%
transmute(out = map2(sence, sname, ~ {
i1 <- seq_len(nchar(.x) - (n -1))
i11 <- seq(i1[1], i1[length(i1)], by = mv)
i2 <- n:nchar(.x)
i22 <- seq(i2[1], i2[length(i2)], by = mv)
tibble(position = c(.y, map2_chr(i11, i22, ~
str_c(seq(.x, .y), collapse=","))),
name = c(.y, substring(.x, i11, i22)))
})) %>%
unnest
}
-тестирование - движущееся окно - 1
f1(df, n = 10, mv = 1)
# position name
#1 Melon Melon
#2 1,2,3,4,5,6,7,8,9,10 RSNSNASSAV
#3 2,3,4,5,6,7,8,9,10,11 SNSNASSAVS
#4 3,4,5,6,7,8,9,10,11,12 NSNASSAVST
#5 4,5,6,7,8,9,10,11,12,13 SNASSAVSTS
#6 5,6,7,8,9,10,11,12,13,14 NASSAVSTSC
#7 6,7,8,9,10,11,12,13,14,15 ASSAVSTSCV
#8 7,8,9,10,11,12,13,14,15,16 SSAVSTSCVS
#9 8,9,10,11,12,13,14,15,16,17 SAVSTSCVSN
#10 9,10,11,12,13,14,15,16,17,18 AVSTSCVSNR
#11 10,11,12,13,14,15,16,17,18,19 VSTSCVSNRA
#12 11,12,13,14,15,16,17,18,19,20 STSCVSNRAM
#13 12,13,14,15,16,17,18,19,20,21 TSCVSNRAMK
#14 13,14,15,16,17,18,19,20,21,22 SCVSNRAMKG
#15 14,15,16,17,18,19,20,21,22,23 CVSNRAMKGT
#16 15,16,17,18,19,20,21,22,23,24 VSNRAMKGTT
#17 16,17,18,19,20,21,22,23,24,25 SNRAMKGTTH
#18 17,18,19,20,21,22,23,24,25,26 NRAMKGTTHY
#19 18,19,20,21,22,23,24,25,26,27 RAMKGTTHYD
#20 19,20,21,22,23,24,25,26,27,28 AMKGTTHYDT
#21 20,21,22,23,24,25,26,27,28,29 MKGTTHYDTS
#22 Carrot Carrot
#23 1,2,3,4,5,6,7,8,9,10 TGMRHGGMVS
#24 2,3,4,5,6,7,8,9,10,11 GMRHGGMVSV
#25 3,4,5,6,7,8,9,10,11,12 MRHGGMVSVC
#26 4,5,6,7,8,9,10,11,12,13 RHGGMVSVCM
#27 5,6,7,8,9,10,11,12,13,14 HGGMVSVCMC
#28 6,7,8,9,10,11,12,13,14,15 GGMVSVCMCV
#29 7,8,9,10,11,12,13,14,15,16 GMVSVCMCVV
#30 8,9,10,11,12,13,14,15,16,17 MVSVCMCVVD
#31 9,10,11,12,13,14,15,16,17,18 VSVCMCVVDD
#32 10,11,12,13,14,15,16,17,18,19 SVCMCVVDDN
#33 11,12,13,14,15,16,17,18,19,20 VCMCVVDDNR
#34 12,13,14,15,16,17,18,19,20,21 CMCVVDDNRR
#35 13,14,15,16,17,18,19,20,21,22 MCVVDDNRRR
#36 14,15,16,17,18,19,20,21,22,23 CVVDDNRRRH
#37 15,16,17,18,19,20,21,22,23,24 VVDDNRRRHY
#38 16,17,18,19,20,21,22,23,24,25 VDDNRRRHYN
#39 17,18,19,20,21,22,23,24,25,26 DDNRRRHYNG
#40 18,19,20,21,22,23,24,25,26,27 DNRRRHYNGA
#41 19,20,21,22,23,24,25,26,27,28 NRRRHYNGAY
#42 20,21,22,23,24,25,26,27,28,29 RRRHYNGAYD
#43 21,22,23,24,25,26,27,28,29,30 RRHYNGAYDD
#44 22,23,24,25,26,27,28,29,30,31 RHYNGAYDDH
#45 23,24,25,26,27,28,29,30,31,32 HYNGAYDDHH
#46 24,25,26,27,28,29,30,31,32,33 YNGAYDDHHR
#47 25,26,27,28,29,30,31,32,33,34 NGAYDDHHRG
#48 26,27,28,29,30,31,32,33,34,35 GAYDDHHRGG
#49 27,28,29,30,31,32,33,34,35,36 AYDDHHRGGV
#50 28,29,30,31,32,33,34,35,36,37 YDDHHRGGVC
#51 29,30,31,32,33,34,35,36,37,38 DDHHRGGVCT
#52 30,31,32,33,34,35,36,37,38,39 DHHRGGVCTS
-движное окно - 2
f1(df, n = 10, mv = 2) %>%
head
# position name
#1 Melon Melon
#2 1,2,3,4,5,6,7,8,9,10 RSNSNASSAV
#3 3,4,5,6,7,8,9,10,11,12 NSNASSAVST
#4 5,6,7,8,9,10,11,12,13,14 NASSAVSTSC
#5 7,8,9,10,11,12,13,14,15,16 SSAVSTSCVS
#6 9,10,11,12,13,14,15,16,17,18 AVSTSCVSNR
-движущее окно - 3
f1(df, n = 10, mv = 3) %>%
head
# position name
#1 Melon Melon
#2 1,2,3,4,5,6,7,8,9,10 RSNSNASSAV
#3 4,5,6,7,8,9,10,11,12,13 SNASSAVSTS
#4 7,8,9,10,11,12,13,14,15,16 SSAVSTSCVS
#5 10,11,12,13,14,15,16,17,18,19 VSTSCVSNRA
#6 13,14,15,16,17,18,19,20,21,22 SCVSNRAMKG
окно - 4
f1(df, n = 10, mv = 4) %>%
head
# position name
#1 Melon Melon
#2 1,2,3,4,5,6,7,8,9,10 RSNSNASSAV
#3 5,6,7,8,9,10,11,12,13,14 NASSAVSTSC
#4 9,10,11,12,13,14,15,16,17,18 AVSTSCVSNR
#5 13,14,15,16,17,18,19,20,21,22 SCVSNRAMKG
#6 17,18,19,20,21,22,23,24,25,26 NRAMKGTTHY