Вы просматриваете каждое наблюдение.Возможно, вы захотите рассмотреть подход data.table
.
Обновление
Как указал @MichaelChirico, следует использовать метод data.table::split
.
library(data.table)
cities_files <- data.frame(bar = c(1:20000),
city_name = rep(paste0("city ", 1:200), 100),
foo = c(1:20000))
microbenchmark::microbenchmark(
khaynes = {
# library(data.table)
# Set the data.frame as a data.table
cities_files_dt <- data.table(cities_files)
lapply(unique(cities_files_dt[, city_name]), function(city)
fwrite(x = subset(cities_files_dt, city_name == city),
file = paste0(city, ".csv")))
},
MichaelChirico = {
cities_files_dt <- data.table(cities_files)
list_dt <- split(cities_files_dt, cities_files_dt$city_name)
for(i in 1:length(list_dt)) {
fwrite(list_dt[[i]], paste0(names(list_dt[i]), ".csv"))
}
},
times = 5
)
# Unit: milliseconds
# expr min lq mean median uq max neval
# khaynes 661.0689 680.6768 698.2449 683.1407 719.8056 746.5323 5
# MichaelChirico 452.0800 456.5777 499.2832 458.0174 517.4398 612.3011 5
Использование пакета data.table
должно значительно ускорить время обработки:
library(data.table)
# Create a dummy data.frame
cities_files <- data.frame(bar = c(1, 1, 2, 3, 3),
city_name = c("city a", "city a", "city b", "city c", "city c"),
foo = c(20, 14, 40, 50, 60))
# Set the data.frame as a data.table
setDT(cities_files)
lapply(unique(cities_files[, city_name ]), function(city)
fwrite(x = subset(cities_files, city_name == city),
file = paste0(city, ".csv")))