Один из способов сделать это с помощью решения tidyverse
-
library(plyr)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:plyr':
#>
#> arrange, count, desc, failwith, id, mutate, rename, summarise,
#> summarize
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tidytext)
library(qdap)
#> Loading required package: qdapDictionaries
#> Loading required package: qdapRegex
#>
#> Attaching package: 'qdapRegex'
#> The following object is masked from 'package:dplyr':
#>
#> explain
#> Loading required package: qdapTools
#>
#> Attaching package: 'qdapTools'
#> The following object is masked from 'package:dplyr':
#>
#> id
#> The following object is masked from 'package:plyr':
#>
#> id
#> Loading required package: RColorBrewer
#>
#> Attaching package: 'qdap'
#> The following object is masked from 'package:dplyr':
#>
#> %>%
#> The following object is masked from 'package:base':
#>
#> Filter
library(tibble)
library(tidyr)
#>
#> Attaching package: 'tidyr'
#> The following object is masked from 'package:qdap':
#>
#> %>%
quotes <- tibble(quoteiD = paste0("q", 1:5),
quote= c(".\n\nthe ebodac consortium consists of partners: janssen (efpia), london school of hygiene and tropical medicine (lshtm),",
"world vision) mobile health software development and deployment in resource limited settings grameen\n\nas such, the ebodac consortium is well placed to tackle.",
"Intelligence is the ability to adapt to change.",
"Science is a of reason of romance and passion.",
"If I have seen further it is by standing on ."))
quotes
#> # A tibble: 5 x 2
#> quoteiD quote
#> <chr> <chr>
#> 1 q1 ".\n\nthe ebodac consortium consists of partners: janssen (efpia~
#> 2 q2 "world vision) mobile health software development and deployment~
#> 3 q3 Intelligence is the ability to adapt to change.
#> 4 q4 Science is a of reason of romance and passion.
#> 5 q5 If I have seen further it is by standing on .
data(stop_words)
tidy_words <- quotes %>%
unnest_tokens(word, quote) %>%
anti_join(stop_words) %>%
count( word, sort = TRUE)
#> Joining, by = "word"
tidy_words
#> # A tibble: 33 x 2
#> word n
#> <chr> <int>
#> 1 consortium 2
#> 2 ebodac 2
#> 3 ability 1
#> 4 adapt 1
#> 5 change 1
#> 6 consists 1
#> 7 deployment 1
#> 8 development 1
#> 9 efpia 1
#> 10 grameen 1
#> # ... with 23 more rows
syns <- synonyms(tidy_words$word)
#> no match for the following:
#> consortium, ebodac, consists, deployment, efpia, grameen, janssen, london, lshtm, partners, settings, software, tropical
#> ========================
syns %>%
plyr::ldply(data.frame) %>% # Change the list to a dataframe (See /2784256/r-spisok-k-freimu-dannyh)
rename("Word_DefNumber" = 1, "Syn" = 2) %>% # Rename the columns with a name that is more intuitive
separate(Word_DefNumber, c("Word", "DefNumber"), sep = "\\.") %>% # Find the word part of the word and definition number
group_by(Word) %>% # Group by words, so that when we select rows it is done for each word
slice(1:5) %>% # Keep the first 5 rows for each word
summarise(synonyms = paste(Syn, collapse = ", ")) %>% # Combine the synonyms together comma separated using paste
ungroup() # So there are not unintended effects of having the data grouped when using the data later
#> # A tibble: 20 x 2
#> Word synonyms
#> <chr> <chr>
#> 1 ability adeptness, aptitude, capability, capacity, competence
#> 2 adapt acclimatize, accommodate, adjust, alter, apply
#> 3 change alter, convert, diversify, fluctuate, metamorphose
#> 4 development advance, advancement, evolution, expansion, growth
#> 5 health fitness, good condition, haleness, healthiness, robustness
#> 6 hygiene cleanliness, hygienics, sanitary measures, sanitation
#> 7 intelligence acumen, alertness, aptitude, brain power, brains
#> 8 limited bounded, checked, circumscribed, confined, constrained
#> 9 medicine cure, drug, medicament, medication, nostrum
#> 10 mobile ambulatory, itinerant, locomotive, migrant, motile
#> 11 passion animation, ardour, eagerness, emotion, excitement
#> 12 reason apprehension, brains, comprehension, intellect, judgment
#> 13 resource ability, capability, cleverness, ingenuity, initiative
#> 14 romance affair, affaire (du coeur), affair of the heart, amour, at~
#> 15 school academy, alma mater, college, department, discipline
#> 16 science body of knowledge, branch of knowledge, discipline, art, s~
#> 17 standing condition, credit, eminence, estimation, footing
#> 18 tackle accoutrements, apparatus, equipment, gear, implements
#> 19 vision eyes, eyesight, perception, seeing, sight
#> 20 world earth, earthly sphere, globe, everybody, everyone
Создано в 2019-04-05 пакетом Представить (v0.2.1)
Обратите внимание, что plyr
должен быть загружен до dplyr