Вычисление сгруппированных MDS - PullRequest
0 голосов
/ 22 апреля 2019

У меня есть эти данные:

glimpse(merged_dat2)
Observations: 15
Variables: 3
Groups: Brand, topic [15]
$ Brand <fct> Samsung, BLU, Apple, Samsung, BLU, Apple, Samsung, BLU, Apple, Samsung, BLU, App...
$ topic <int> 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5
$ term  <chr> "tri price defect never samsung warranti water brand time wellno version", "tri ...

Данные являются результатом скрытого распределения Dirichlet отзывов клиентов.Brand - это Бренд, на котором основан определенный отзыв.topic - это тема скрытого распределения Дирихле.term являются условиями одной темы.

Моя цель - отобразить результат на MDS.Это должно быть сделано путем вычисления расстояния между векторами слов (terms) каждой темы между брендами.Результат должен выглядеть следующим образом (предположим, что размеры указаны в темах 1 и 2).Мой вопрос, как я могу сделать такой сгруппированный MDS?

enter image description here

structure(list(Brand = structure(c(3L, 2L, 1L, 3L, 2L, 1L, 3L, 
2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L), .Label = c("Apple", "BLU", "Samsung"
), class = "factor"), topic = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 
3L, 4L, 4L, 4L, 5L, 5L, 5L), term = c("tri price defect never samsung warranti water brand time wellno version", 
"tri att week plenti ram purchas ship", "replac", "work fast econom flash refurbish excel funtion intuit", 
"work fast dont hour junk earphon life everyth seller", "work inch", 
"doesnt even data answer call number record phonework problem trueli", 
"doesnt even right ear fact fall unlock", "doesnt livabl top imag damag", 
"phone issu supos rep fantasticthanx much", "phone screen confort walk gift", 
"phone screen issu finger still", "servic just option buy databas imeiseri month support flawlessno hard", 
"servic just crack money text wast heavi photo sometim bang busi far", 
"alway annoy though trustful")), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -15L), vars = c("Brand", 
"topic"), labels = structure(list(Brand = structure(c(1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("Apple", 
"BLU", "Samsung"), class = "factor"), topic = c(1L, 2L, 3L, 4L, 
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L)), class = "data.frame", row.names = c(NA, 
-15L), vars = c("Brand", "topic"), labels = structure(list(Brand = structure(c(1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("Apple", 
"BLU", "Samsung"), class = "factor"), topic = c(1L, 2L, 3L, 4L, 
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L)), row.names = c(NA, 
-15L), class = "data.frame", vars = c("Brand", "topic"), drop = TRUE, indices = list(
    20L, c(27L, 43L), c(46L, 63L, 64L, 65L, 66L), c(79L, 80L, 
    82L, 83L, 86L, 94L, 95L), 120:123, c(1L, 2L, 3L, 4L, 15L, 
    16L, 17L, 18L, 19L), c(24L, 25L, 26L, 29L, 36L, 37L, 38L, 
    39L, 40L, 41L, 42L), c(45L, 48L, 58L, 59L, 60L, 61L, 62L), 
    c(72L, 73L, 74L, 75L, 76L, 77L, 78L, 81L, 91L, 92L, 93L), 
    c(98L, 101L, 110L, 111L, 112L, 113L, 114L, 115L, 116L, 117L, 
    118L, 119L), c(0L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
    14L), c(21L, 22L, 23L, 28L, 30L, 31L, 32L, 33L, 34L, 35L), 
    c(44L, 47L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L), 
    c(67L, 68L, 69L, 70L, 71L, 84L, 85L, 87L, 88L, 89L, 90L), 
    c(96L, 97L, 99L, 100L, 102L, 103L, 104L, 105L, 106L, 107L, 
    108L, 109L)), group_sizes = c(1L, 2L, 5L, 7L, 4L, 9L, 11L, 
7L, 11L, 12L, 11L, 10L, 11L, 11L, 12L), biggest_group_size = 12L, labels = structure(list(
    Brand = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 3L, 3L), .Label = c("Apple", "BLU", "Samsung"
    ), class = "factor"), topic = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 
    3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L)), row.names = c(NA, -15L), class = "data.frame", vars = c("Brand", 
"topic"), drop = TRUE, indices = list(120L, 121L, 51L, 3:4, 117L, 
    11L, 118L, 103L, 52L, 91L, 110L, 66L, 49:50, 104L, 6L, 44:46, 
    36L, 59L, 39L, 30L, 47:48, 41L, 33L, 60L, 61L, 89L, 119L, 
    28:29, 94L, 31L, 108L, 34L, 93L, 109L, 114L, 37L, 65L, 105L, 
    43L, 35L, 84:86, 38L, 99:101, 40L, 63L, 111L, 106L, 90L, 
    7L, 53L, 102L, 67:80, 55L, 115L, 16L, 5L, 56L, 18L, 17L, 
    54L, 32L, 88L, 20L, 58L, 8L, 81:83, 42L, 96:98, 19L, 116L, 
    95L, 87L, 107L, 112L, 122L, 12L, 64L, 0:2, 57L, 123L, 62L, 
    14L, 92L, 9L, 113L, 10L, 15L, 13L, 21:27), group_sizes = c(1L, 
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 14L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 7L), biggest_group_size = 14L, labels = structure(list(
    term = c("alway", "annoy", "answer", "att", "bang", "brand", 
    "busi", "buy", "call", "confort", "crack", "damag", "data", 
    "databas", "defect", "doesnt", "dont", "ear", "earphon", 
    "econom", "even", "everyth", "excel", "fact", "fall", "fantasticthanx", 
    "far", "fast", "finger", "flash", "flawlessno", "funtion", 
    "gift", "hard", "heavi", "hour", "imag", "imeiseri", "inch", 
    "intuit", "issu", "junk", "just", "life", "livabl", "money", 
    "month", "much", "never", "number", "option", "phone", "phonework", 
    "photo", "plenti", "price", "problem", "purchas", "ram", 
    "record", "refurbish", "rep", "replac", "right", "samsung", 
    "screen", "seller", "servic", "ship", "sometim", "still", 
    "supos", "support", "text", "though", "time", "top", "tri", 
    "trueli", "trustful", "unlock", "version", "walk", "warranti", 
    "wast", "water", "week", "wellno", "work")), row.names = c(NA, 
-89L), class = "data.frame", vars = "term", drop = TRUE))), indices = list(
    18L, c(21L, 37L), c(40L, 56L, 57L, 58L, 59L), c(62L, 64L, 
    66L, 74L, 75L), 98:101, c(1L, 2L, 13L, 14L, 15L, 16L, 17L
    ), c(20L, 23L, 30L, 31L, 32L, 33L, 34L, 35L, 36L), c(39L, 
    42L, 51L, 52L, 53L, 54L, 55L), c(61L, 63L, 71L, 72L, 73L), 
    c(77L, 79L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 
    97L), c(0L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), c(19L, 
    22L, 24L, 25L, 26L, 27L, 28L, 29L), c(38L, 41L, 43L, 44L, 
    45L, 46L, 47L, 48L, 49L, 50L), c(60L, 65L, 67L, 68L, 69L, 
    70L), c(76L, 78L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L)), drop = TRUE, group_sizes = c(1L, 
2L, 5L, 5L, 4L, 7L, 9L, 7L, 5L, 12L, 11L, 8L, 10L, 6L, 10L), biggest_group_size = 12L), indices = list(
    2L, 5L, 8L, 11L, 14L, 1L, 4L, 7L, 10L, 13L, 0L, 3L, 6L, 9L, 
    12L), drop = TRUE, group_sizes = c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L)
...