Я пытаюсь сопоставить строки в одном кадре данных (df1) с строками в другом (df2) на основе столбца datetime.Мой желаемый вывод - это кадр данных df1 с соответствующими строками из df2, добавленными к нему на основе временных меток.Интервал времени данных в df1 имеет более высокое разрешение, чем в df2.У меня есть некоторый код, который находит ближайшую запись в df2, до или после записи в df1, чтобы делать то, что я хочу, однако количество строк в каждом из моих файлов df1 велико (> 500 000), и у меня есть 100 пар файловчтобы соответствовать.У меня слишком длинный цикл for.До сих пор он работал на одной и той же паре файлов в течение 24 часов!Я знаю, что это работает, потому что я взял небольшие подмножества данных и быстро сопоставил их, и я использовал это для цикла несколько раз на разных наборах данных.Я задаюсь вопросом, узнает ли кто-нибудь о более быстром и более элегантном способе сделать это, поскольку с такой скоростью я никогда не объединю файлы!Любые предложения будут ценны.Большое спасибо.
Текущий код, который работает вечно:
# The function that will merge the two dataframes
addSuppInfo = function(dfs1, dfs2){
N = dim(dfs1)[1]
MergedDF = as.data.frame(array(NA,c(N,length(names(dfs1))+length(names(dfs2)))))
names(MergedDF) = c(names(dfs1),names(dfs2))
suppdate = which(names(MergedDF)=="datetime")[2] #The new, empty database has two "datetime" columns now, lets rename the second to avoid confusion
names(MergedDF)[suppdate] = "GPS_datetime_local"
#Convert datetimes to some universal measure of time so there can be no confusion
dfs1$AbsTime = unclass(as.POSIXct(strptime(dfs1$datetime, "%Y-%m-%d %H:%M:%S")))
dfs2$AbsTime = unclass(as.POSIXct(strptime(dfs2$datetime_utc, "%Y-%m-%d %H:%M:%S ")))
for(record in 1:N){
#Find the closest record, whether before or after a time in the supp info
NearestRow = which.min(abs(dfs2$AbsTime - dfs1$AbsTime[record]))[1]
if(length(NearestRow) > 0){
NewRow = cbind(dfs1[record,], dfs2[NearestRow,])
#We have two AbsTime columns which we're just using for the lookup so remove them
NewRow = NewRow[which(names(NewRow) != "AbsTime")]
MergedDF[record,] = NewRow
}else{ #if there is no matching supp data in df2, just put the df1 data in the row with a "no data" label. Bit fiddly, but neater than just leaving rows blank
MergedDF[record,(1:(length(names(dfs1))-1))] = dfs1[record, (1:(length(names(dfs1))-1))] #minus one because we don't want the AbsTime column we added on earlier
MergedDF[record,length(names(dfs1)):length(names(MergedDF))] = "NA"
}
}
return(MergedDF)
}
# Create lists of the df1 and df2 filenames (for my case where I ahve lots of files in a folder).
dir = "/Users/rhiannonaustin/Desktop/Booby_analysis/EMbC/10sec_Interp_Data/Merged/Match_TDR_IMM_GPS/BB/Raw GPS match/2017/GPS_Imm/test"
setwd(dir)
df2files = list.files(path=dir, pattern="*_df2.csv", full.names=FALSE, recursive=FALSE)
df1files = list.files(path=dir, pattern="*_df1.csv", full.names=FALSE, recursive=FALSE)
# For each file in those lists, submit them to our addSuppInfo function
# end up with df1 with new columns for the closest matching df2 row
for(i in 1:length(df2files)){
file = df2files[i]
#Extract the code
df2code = strsplit(file,"_")[[1]][1]
#Double check there is a matching df2 file
df1code = strsplit(df1files[i],"_")[[1]][1]
if(df2code == df1code){
df1Data = read.csv(df1files[i], header=TRUE, stringsAsFactors = FALSE)
df2Data = read.csv(file, header=TRUE, stringsAsFactors = FALSE)
Merged = addSuppInfo(df1Data, df2Data)
outfile = paste(df2code,"_merged.csv",sep="")
write.csv(Merged, file=outfile, row.names=FALSE)
}else{
print(paste("No matching data for ",df2code,"!!"))
}
}
Пример данных (очевидно, этот небольшой пример работает быстро с моим собственным циклом for, но в реальной жизни мои наборы данных НАМНОГО больше!):
> dput(df1Data)
structure(list(iid = 14554:14677, datetime = c("2017-03-12 15:55:04",
"2017-03-12 15:55:05", "2017-03-12 15:55:06", "2017-03-12 15:55:07",
"2017-03-12 15:55:08", "2017-03-12 15:55:09", "2017-03-12 15:55:10",
"2017-03-12 15:55:11", "2017-03-12 15:55:12", "2017-03-12 15:55:13",
"2017-03-12 15:55:14", "2017-03-12 15:55:15", "2017-03-12 15:55:16",
"2017-03-12 15:55:17", "2017-03-12 15:55:18", "2017-03-12 15:55:19",
"2017-03-12 15:55:20", "2017-03-12 15:55:21", "2017-03-12 15:55:22",
"2017-03-12 15:55:23", "2017-03-12 15:55:24", "2017-03-12 15:55:25",
"2017-03-12 15:55:26", "2017-03-12 15:55:27", "2017-03-12 15:55:28",
"2017-03-12 15:55:29", "2017-03-12 15:55:30", "2017-03-12 15:55:31",
"2017-03-12 15:55:32", "2017-03-12 15:55:33", "2017-03-12 15:55:34",
"2017-03-12 15:55:35", "2017-03-12 15:55:36", "2017-03-12 15:55:37",
"2017-03-12 15:55:38", "2017-03-12 15:55:39", "2017-03-12 15:55:40",
"2017-03-12 15:55:41", "2017-03-12 15:55:42", "2017-03-12 15:55:43",
"2017-03-12 15:55:44", "2017-03-12 15:55:45", "2017-03-12 15:55:46",
"2017-03-12 15:55:47", "2017-03-12 15:55:48", "2017-03-12 15:55:49",
"2017-03-12 15:55:50", "2017-03-12 15:55:51", "2017-03-12 15:55:52",
"2017-03-12 15:55:53", "2017-03-12 15:55:54", "2017-03-12 15:55:55",
"2017-03-12 15:55:56", "2017-03-12 15:55:57", "2017-03-12 15:55:58",
"2017-03-12 15:55:59", "2017-03-12 15:56:00", "2017-03-12 15:56:01",
"2017-03-12 15:56:02", "2017-03-12 15:56:03", "2017-03-12 15:56:04",
"2017-03-12 15:56:05", "2017-03-12 15:56:06", "2017-03-12 15:56:07",
"2017-03-12 15:56:08", "2017-03-12 15:56:09", "2017-03-12 15:56:10",
"2017-03-12 15:56:11", "2017-03-12 15:56:12", "2017-03-12 15:56:13",
"2017-03-12 15:56:14", "2017-03-12 15:56:15", "2017-03-12 15:56:16",
"2017-03-12 15:56:17", "2017-03-12 15:56:18", "2017-03-12 15:56:19",
"2017-03-12 15:56:20", "2017-03-12 15:56:21", "2017-03-12 15:56:22",
"2017-03-12 15:56:23", "2017-03-12 15:56:24", "2017-03-12 15:56:25",
"2017-03-12 15:56:26", "2017-03-12 15:56:27", "2017-03-12 15:56:28",
"2017-03-12 15:56:29", "2017-03-12 15:56:30", "2017-03-12 15:56:31",
"2017-03-12 15:56:32", "2017-03-12 15:56:33", "2017-03-12 15:56:34",
"2017-03-12 15:56:35", "2017-03-12 15:56:36", "2017-03-12 15:56:37",
"2017-03-12 15:56:38", "2017-03-12 15:56:39", "2017-03-12 15:56:40",
"2017-03-12 15:56:41", "2017-03-12 15:56:42", "2017-03-12 15:56:43",
"2017-03-12 15:56:44", "2017-03-12 15:56:45", "2017-03-12 15:56:46",
"2017-03-12 15:56:47", "2017-03-12 15:56:48", "2017-03-12 15:56:49",
"2017-03-12 15:56:50", "2017-03-12 15:56:51", "2017-03-12 15:56:52",
"2017-03-12 15:56:53", "2017-03-12 15:56:54", "2017-03-12 15:56:55",
"2017-03-12 15:56:56", "2017-03-12 15:56:57", "2017-03-12 15:56:58",
"2017-03-12 15:56:59", "2017-03-12 15:57:00", "2017-03-12 15:57:01",
"2017-03-12 15:57:02", "2017-03-12 15:57:03", "2017-03-12 15:57:04",
"2017-03-12 15:57:05", "2017-03-12 15:57:06", "2017-03-12 15:57:07"
), state = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
)), class = "data.frame", row.names = c(NA, -124L))
> dput(df2Data)
structure(list(new_id = 188889:188903, datetime_utc = c("2017-03-12 15:52:58",
"2017-03-12 15:53:28", "2017-03-12 15:53:58", "2017-03-12 15:54:28",
"2017-03-12 15:54:58", "2017-03-12 15:55:28", "2017-03-12 15:55:58",
"2017-03-12 15:56:28", "2017-03-12 15:56:58", "2017-03-12 15:57:28",
"2017-03-12 15:57:58", "2017-03-12 15:58:28", "2017-03-12 15:58:58",
"2017-03-12 15:59:28", "2017-03-12 15:59:58"), count = c(6.138159,
0.6693379, 5.507328, 4.041119, 1.464152, 11.73605, 12.92626,
1.397087, 3.032117, 1.913028, 2.029836, 9.386913, 7.672296, 6.36146,
0.5731061)), class = "data.frame", row.names = c(NA, -15L))
Желаемый вывод:
> Merged
iid datetime state new_id datetime_utc count
1 14554 2017-03-12 15:55:04 1 188893 2017-03-12 15:54:58 1.464152
2 14555 2017-03-12 15:55:05 1 188893 2017-03-12 15:54:58 1.464152
3 14556 2017-03-12 15:55:06 1 188893 2017-03-12 15:54:58 1.464152
4 14557 2017-03-12 15:55:07 1 188893 2017-03-12 15:54:58 1.464152
5 14558 2017-03-12 15:55:08 1 188893 2017-03-12 15:54:58 1.464152
6 14559 2017-03-12 15:55:09 1 188893 2017-03-12 15:54:58 1.464152
7 14560 2017-03-12 15:55:10 1 188893 2017-03-12 15:54:58 1.464152
8 14561 2017-03-12 15:55:11 1 188893 2017-03-12 15:54:58 1.464152
9 14562 2017-03-12 15:55:12 1 188893 2017-03-12 15:54:58 1.464152
10 14563 2017-03-12 15:55:13 1 188893 2017-03-12 15:54:58 1.464152
11 14564 2017-03-12 15:55:14 1 188894 2017-03-12 15:55:28 11.736050
12 14565 2017-03-12 15:55:15 1 188894 2017-03-12 15:55:28 11.736050
13 14566 2017-03-12 15:55:16 1 188894 2017-03-12 15:55:28 11.736050
14 14567 2017-03-12 15:55:17 1 188894 2017-03-12 15:55:28 11.736050
15 14568 2017-03-12 15:55:18 1 188894 2017-03-12 15:55:28 11.736050
16 14569 2017-03-12 15:55:19 1 188894 2017-03-12 15:55:28 11.736050
17 14570 2017-03-12 15:55:20 1 188894 2017-03-12 15:55:28 11.736050
18 14571 2017-03-12 15:55:21 1 188894 2017-03-12 15:55:28 11.736050
19 14572 2017-03-12 15:55:22 1 188894 2017-03-12 15:55:28 11.736050
20 14573 2017-03-12 15:55:23 1 188894 2017-03-12 15:55:28 11.736050
21 14574 2017-03-12 15:55:24 1 188894 2017-03-12 15:55:28 11.736050
22 14575 2017-03-12 15:55:25 1 188894 2017-03-12 15:55:28 11.736050
23 14576 2017-03-12 15:55:26 1 188894 2017-03-12 15:55:28 11.736050
24 14577 2017-03-12 15:55:27 1 188894 2017-03-12 15:55:28 11.736050
25 14578 2017-03-12 15:55:28 1 188894 2017-03-12 15:55:28 11.736050
26 14579 2017-03-12 15:55:29 1 188894 2017-03-12 15:55:28 11.736050
27 14580 2017-03-12 15:55:30 1 188894 2017-03-12 15:55:28 11.736050
28 14581 2017-03-12 15:55:31 1 188894 2017-03-12 15:55:28 11.736050
29 14582 2017-03-12 15:55:32 1 188894 2017-03-12 15:55:28 11.736050
30 14583 2017-03-12 15:55:33 1 188894 2017-03-12 15:55:28 11.736050
31 14584 2017-03-12 15:55:34 1 188894 2017-03-12 15:55:28 11.736050
32 14585 2017-03-12 15:55:35 1 188894 2017-03-12 15:55:28 11.736050
33 14586 2017-03-12 15:55:36 1 188894 2017-03-12 15:55:28 11.736050
34 14587 2017-03-12 15:55:37 1 188894 2017-03-12 15:55:28 11.736050
35 14588 2017-03-12 15:55:38 1 188894 2017-03-12 15:55:28 11.736050
36 14589 2017-03-12 15:55:39 1 188894 2017-03-12 15:55:28 11.736050
37 14590 2017-03-12 15:55:40 1 188894 2017-03-12 15:55:28 11.736050
38 14591 2017-03-12 15:55:41 1 188894 2017-03-12 15:55:28 11.736050
39 14592 2017-03-12 15:55:42 1 188894 2017-03-12 15:55:28 11.736050
40 14593 2017-03-12 15:55:43 1 188894 2017-03-12 15:55:28 11.736050
41 14594 2017-03-12 15:55:44 1 188895 2017-03-12 15:55:58 12.926260
42 14595 2017-03-12 15:55:45 1 188895 2017-03-12 15:55:58 12.926260
43 14596 2017-03-12 15:55:46 1 188895 2017-03-12 15:55:58 12.926260
44 14597 2017-03-12 15:55:47 1 188895 2017-03-12 15:55:58 12.926260
45 14598 2017-03-12 15:55:48 1 188895 2017-03-12 15:55:58 12.926260
46 14599 2017-03-12 15:55:49 1 188895 2017-03-12 15:55:58 12.926260
47 14600 2017-03-12 15:55:50 1 188895 2017-03-12 15:55:58 12.926260
48 14601 2017-03-12 15:55:51 1 188895 2017-03-12 15:55:58 12.926260
49 14602 2017-03-12 15:55:52 1 188895 2017-03-12 15:55:58 12.926260
50 14603 2017-03-12 15:55:53 1 188895 2017-03-12 15:55:58 12.926260
51 14604 2017-03-12 15:55:54 1 188895 2017-03-12 15:55:58 12.926260
52 14605 2017-03-12 15:55:55 1 188895 2017-03-12 15:55:58 12.926260
53 14606 2017-03-12 15:55:56 1 188895 2017-03-12 15:55:58 12.926260
54 14607 2017-03-12 15:55:57 1 188895 2017-03-12 15:55:58 12.926260
55 14608 2017-03-12 15:55:58 1 188895 2017-03-12 15:55:58 12.926260
56 14609 2017-03-12 15:55:59 1 188895 2017-03-12 15:55:58 12.926260
57 14610 2017-03-12 15:56:00 1 188895 2017-03-12 15:55:58 12.926260
58 14611 2017-03-12 15:56:01 1 188895 2017-03-12 15:55:58 12.926260
59 14612 2017-03-12 15:56:02 1 188895 2017-03-12 15:55:58 12.926260
60 14613 2017-03-12 15:56:03 1 188895 2017-03-12 15:55:58 12.926260
61 14614 2017-03-12 15:56:04 1 188895 2017-03-12 15:55:58 12.926260
62 14615 2017-03-12 15:56:05 1 188895 2017-03-12 15:55:58 12.926260
63 14616 2017-03-12 15:56:06 1 188895 2017-03-12 15:55:58 12.926260
64 14617 2017-03-12 15:56:07 1 188895 2017-03-12 15:55:58 12.926260
65 14618 2017-03-12 15:56:08 1 188895 2017-03-12 15:55:58 12.926260
66 14619 2017-03-12 15:56:09 1 188895 2017-03-12 15:55:58 12.926260
67 14620 2017-03-12 15:56:10 1 188895 2017-03-12 15:55:58 12.926260
68 14621 2017-03-12 15:56:11 1 188895 2017-03-12 15:55:58 12.926260
69 14622 2017-03-12 15:56:12 1 188895 2017-03-12 15:55:58 12.926260
70 14623 2017-03-12 15:56:13 1 188895 2017-03-12 15:55:58 12.926260
71 14624 2017-03-12 15:56:14 1 188896 2017-03-12 15:56:28 1.397087
72 14625 2017-03-12 15:56:15 1 188896 2017-03-12 15:56:28 1.397087
73 14626 2017-03-12 15:56:16 1 188896 2017-03-12 15:56:28 1.397087
74 14627 2017-03-12 15:56:17 1 188896 2017-03-12 15:56:28 1.397087
75 14628 2017-03-12 15:56:18 1 188896 2017-03-12 15:56:28 1.397087
76 14629 2017-03-12 15:56:19 1 188896 2017-03-12 15:56:28 1.397087
77 14630 2017-03-12 15:56:20 1 188896 2017-03-12 15:56:28 1.397087
78 14631 2017-03-12 15:56:21 1 188896 2017-03-12 15:56:28 1.397087
79 14632 2017-03-12 15:56:22 1 188896 2017-03-12 15:56:28 1.397087
80 14633 2017-03-12 15:56:23 1 188896 2017-03-12 15:56:28 1.397087
81 14634 2017-03-12 15:56:24 1 188896 2017-03-12 15:56:28 1.397087
82 14635 2017-03-12 15:56:25 1 188896 2017-03-12 15:56:28 1.397087
83 14636 2017-03-12 15:56:26 1 188896 2017-03-12 15:56:28 1.397087
84 14637 2017-03-12 15:56:27 1 188896 2017-03-12 15:56:28 1.397087
85 14638 2017-03-12 15:56:28 1 188896 2017-03-12 15:56:28 1.397087
86 14639 2017-03-12 15:56:29 1 188896 2017-03-12 15:56:28 1.397087
87 14640 2017-03-12 15:56:30 1 188896 2017-03-12 15:56:28 1.397087
88 14641 2017-03-12 15:56:31 1 188896 2017-03-12 15:56:28 1.397087
89 14642 2017-03-12 15:56:32 1 188896 2017-03-12 15:56:28 1.397087
90 14643 2017-03-12 15:56:33 1 188896 2017-03-12 15:56:28 1.397087
91 14644 2017-03-12 15:56:34 1 188896 2017-03-12 15:56:28 1.397087
92 14645 2017-03-12 15:56:35 1 188896 2017-03-12 15:56:28 1.397087
93 14646 2017-03-12 15:56:36 1 188896 2017-03-12 15:56:28 1.397087
94 14647 2017-03-12 15:56:37 1 188896 2017-03-12 15:56:28 1.397087
95 14648 2017-03-12 15:56:38 1 188896 2017-03-12 15:56:28 1.397087
96 14649 2017-03-12 15:56:39 2 188896 2017-03-12 15:56:28 1.397087
97 14650 2017-03-12 15:56:40 2 188896 2017-03-12 15:56:28 1.397087
98 14651 2017-03-12 15:56:41 2 188896 2017-03-12 15:56:28 1.397087
99 14652 2017-03-12 15:56:42 2 188896 2017-03-12 15:56:28 1.397087
100 14653 2017-03-12 15:56:43 2 188896 2017-03-12 15:56:28 1.397087
101 14654 2017-03-12 15:56:44 2 188897 2017-03-12 15:56:58 3.032117
102 14655 2017-03-12 15:56:45 2 188897 2017-03-12 15:56:58 3.032117
103 14656 2017-03-12 15:56:46 2 188897 2017-03-12 15:56:58 3.032117
104 14657 2017-03-12 15:56:47 2 188897 2017-03-12 15:56:58 3.032117
105 14658 2017-03-12 15:56:48 2 188897 2017-03-12 15:56:58 3.032117
106 14659 2017-03-12 15:56:49 2 188897 2017-03-12 15:56:58 3.032117
107 14660 2017-03-12 15:56:50 2 188897 2017-03-12 15:56:58 3.032117
108 14661 2017-03-12 15:56:51 2 188897 2017-03-12 15:56:58 3.032117
109 14662 2017-03-12 15:56:52 2 188897 2017-03-12 15:56:58 3.032117
110 14663 2017-03-12 15:56:53 2 188897 2017-03-12 15:56:58 3.032117
111 14664 2017-03-12 15:56:54 2 188897 2017-03-12 15:56:58 3.032117
112 14665 2017-03-12 15:56:55 2 188897 2017-03-12 15:56:58 3.032117
113 14666 2017-03-12 15:56:56 2 188897 2017-03-12 15:56:58 3.032117
114 14667 2017-03-12 15:56:57 2 188897 2017-03-12 15:56:58 3.032117
115 14668 2017-03-12 15:56:58 2 188897 2017-03-12 15:56:58 3.032117
116 14669 2017-03-12 15:56:59 2 188897 2017-03-12 15:56:58 3.032117
117 14670 2017-03-12 15:57:00 2 188897 2017-03-12 15:56:58 3.032117
118 14671 2017-03-12 15:57:01 2 188897 2017-03-12 15:56:58 3.032117
119 14672 2017-03-12 15:57:02 2 188897 2017-03-12 15:56:58 3.032117
120 14673 2017-03-12 15:57:03 2 188897 2017-03-12 15:56:58 3.032117
121 14674 2017-03-12 15:57:04 2 188897 2017-03-12 15:56:58 3.032117
122 14675 2017-03-12 15:57:05 2 188897 2017-03-12 15:56:58 3.032117
123 14676 2017-03-12 15:57:06 2 188897 2017-03-12 15:56:58 3.032117
124 14677 2017-03-12 15:57:07 2 188897 2017-03-12 15:56:58 3.032117