library(fuzzyjoin); library(tidyverse)
dataset1 %>%
stringdist_left_join(dataset2,
max_dist = 3) %>%
rename(col_1 = "states.x") %>%
group_by(col_1) %>%
mutate(col = paste0("col_", row_number() + 1)) %>%
spread(col, states.y)
#Joining by: "states"
## A tibble: 4 x 4
## Groups: col_1 [4]
# col_1 col_2 col_3 col_4
# <chr> <chr> <chr> <chr>
#1 California Californiia callifoornia NA
#2 Florida Fl0 rida folrida NA
#3 New York New york new york NA
#4 Texas T3xas Te xas texas
данные:
dataset1 <- data.frame(states = c("California",
"Texas",
"Florida",
"New York"),
stringsAsFactors = F)
dataset2 <- data.frame(stringsAsFactors = F,
states = c(
"Californiia",
"callifoornia",
"T3xas",
"Te xas",
"texas",
"Fl0 rida",
"folrida",
"New york",
"new york"
)
)