Цель: создать интерактивный график анализа множественной корреспонденции (MCA), который добавляет эллипсы вокруг исследователей (точек данных), которые участвуют в данной области исследований (категории в кнопках Shiny R). Этот график будет размещен на официальной веб-странице.
Описание: Область (и) исследования должна быть доступна в виде кнопок Shiny R, которые пользователи могут выбирать / отменять, и те исследователи, которые участвуют в данной области исследования, будутбыть выделенным (изменение цвета, непрозрачность, все что угодно).
Набор данных: первая переменная - «исследователь» (имя исследователя), а другие переменные - «поля исследований» (геномика, протеомика и т. д.). «Да» Означает, что исследователь исследователь участвует в этой области исследований. «Нет» Означает, что исследователь не участвует в этой области исследований.
Используются «основные» пакеты: factoMineR, factoextra, Plotly and Shiny R.
Текущее состояние: я получил MCA ShinyR интерактивный сюжет. Я могу навести курсор на каждую точку, и там отображается имя исследователя.
Проблема: я не нашел способа связать «исследователь» и «поле исследований» таким образом, чтобы при выборе одного илибольше областей исследования (Shiny R кнопки), это окрашивает исследователей, вовлеченных в выбранные области исследования.
Есть предложения? Заранее спасибо!
Комментарий: имейте в виду, что один и тот же исследователь может иметь несколько областей исследования.
Вот мой код:
myPackages <- c("plotly", "tidyr", "FactoMineR", "factoextra", "shiny")
lapply(myPackages, library, character.only = TRUE)
# Import dataset
data <- read.csv("/mnt/data/jobs_sent_by_david/MCA_StackOverflow.csv",
stringsAsFactors = FALSE, check.names = FALSE)
## Multiple correspondence analysis
# factoMineR needs data be factors
for (i in 1:ncol(data)) {
data[ , i] <- as.factor(data[ , i])
}
str(data) # check that all columns are now factors
# tutorial on MCA with factoMineR: http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/114-mca-multiple-correspondence-analysis-in-r-essentials/
mca <- MCA(data, graph = FALSE)
# Subset MCA object to get researcher-related data
researcher <- get_mca_ind(mca)
researcherPlotly <- data.frame(researcher$coord)
rownames(researcherPlotly) <- data$researcher
# Connect plotly
Sys.setenv("plotly_username" = "your_user_name")
Sys.setenv("plotly_api_key" = "your_password")
# plotly object
plot_ly(researcherPlotly, x = ~Dim.1 , y = ~Dim.2,
text = rownames(researcherPlotly),
mode = "markers",
marker = list(size = 11)) %>%
layout(title = "MCA of researcher by research field",
xaxis = list(title = "PC 1"),
yaxis = list(title = "PC 2"))
### Integration with Shiny R
ui <- shinyUI(fluidPage(plotlyOutput("plot"),
verbatimTextOutput("event"),
titlePanel("Research areas"),
sidebarPanel(radioButtons("p", "Select one or more research areas of interest:",
list("Animal Biology"='a',
"Bioinformatics and Computational Biology" = "b",
"Biotechnology"='c',
"Cell Biology"='d',
"Chemical Biology" = "e",
"Developmental Biology" = "f",
"Evolutionory Biology" = "g",
"Genetics" = "h",
"Genomics" = "i",
"Lecturer" = "j",
"Metabolomics" = "k",
"Microbiology" = "l",
"Molecular Biology" = "m",
"Neurobiology" = "n",
"Pathology" = "o",
"Physiology" = "p",
"Plant Biology" = "q",
"Proteomics" = "r",
"Quantitative Biology" = "s",
"Structural Biology" = "t",
"Systems Biology" = "u" )),
mainPanel(plotOutput("distPlot"))),
mainPanel(plotlyOutput("trendPlot"))))
server <- function(input, output) {
output$plot <- renderPlotly({
plot_ly(researcherPlotly, x = ~Dim.1 , y = ~Dim.2,
text = rownames(researcherPlotly),
mode = "markers",
marker = list(size = 11)) %>%
layout(p, title = "Plot title",
xaxis = list(title = "PC 1"),
yaxis = list(title = "PC 2"))
})
output$event <- renderPrint({
d <- event_data("plotly_hover")
if (is.null(d)) "Hover on a point to see a principal investigator's name, research interest, and contact information. Points close to each other represent principal investigators with similar research interests" else d
})
}
shinyApp(ui, server)
sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.3 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
locale:
[1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8
[5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8 LC_PAPER=en_CA.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] factoextra_1.0.5 FactoMineR_1.42 tidyr_1.0.0 plotly_4.9.0.9000 ggplot2_3.2.1
loaded via a namespace (and not attached):
[1] Rcpp_1.0.2 later_1.0.0 pillar_1.4.2 compiler_3.6.1 tools_3.6.1
[6] zeallot_0.1.0 digest_0.6.21 jsonlite_1.6 tibble_2.1.3 lifecycle_0.1.0
[11] gtable_0.3.0 viridisLite_0.3.0 lattice_0.20-38 pkgconfig_2.0.3 rlang_0.4.0
[16] shiny_1.4.0 rstudioapi_0.10 crosstalk_1.0.0 yaml_2.2.0 ggrepel_0.8.1
[21] fastmap_1.0.1 cluster_2.1.0 withr_2.1.2 dplyr_0.8.3 httr_1.4.1
[26] htmlwidgets_1.5 vctrs_0.2.0 flashClust_1.01-2 grid_3.6.1 tidyselect_0.2.5
[31] scatterplot3d_0.3-41 glue_1.3.1 data.table_1.12.4 R6_2.4.0 purrr_0.3.2
[36] magrittr_1.5 promises_1.1.0 scales_1.0.0 backports_1.1.5 htmltools_0.4.0
[41] leaps_3.0 MASS_7.3-51.4 assertthat_0.2.1 xtable_1.8-4 mime_0.7
[46] colorspace_1.4-1 httpuv_1.5.2 lazyeval_0.2.2 munsell_0.5.0 crayon_1.3.4
Мои данные:
> dput(data)
structure(list(researcher = structure(1:26, .Label = c("researcher_a",
"researcher_b", "researcher_c", "researcher_d", "researcher_e",
"researcher_f", "researcher_g", "researcher_h", "researcher_i",
"researcher_j", "researcher_k", "researcher_l", "researcher_m",
"researcher_n", "researcher_o", "researcher_p", "researcher_q",
"researcher_r", "researcher_s", "researcher_t", "researcher_u",
"researcher_v", "researcher_w", "researcher_x", "researcher_y",
"researcher_z"), class = "factor"), Animal_Biology = structure(c(1L,
1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L), .Label = c("no", "yes"), class = "factor"),
Bioinformatics_Computationol_Biology = structure(c(1L, 1L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L), .Label = c("no", "yes"
), class = "factor"), Biotechnology = structure(c(2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no", "yes"
), class = "factor"), Cell_Biology = structure(c(1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L), .Label = c("no", "yes"
), class = "factor"), Chemical_Biology = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no",
"yes"), class = "factor"), Developmental_Biology = structure(c(1L,
2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L), .Label = c("no",
"yes"), class = "factor"), Evolutionory_Biology = structure(c(1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no",
"yes"), class = "factor"), Genetics = structure(c(2L, 1L,
2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no", "yes"
), class = "factor"), Genomics = structure(c(1L, 1L, 1L,
1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
Lecturer = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L), .Label = c("no", "yes"), class = "factor"), Metabolomics = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no",
"yes"), class = "factor"), Microbiology = structure(c(2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no",
"yes"), class = "factor"), Molecular_Biology = structure(c(2L,
2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L), .Label = c("no",
"yes"), class = "factor"), Neurobiology = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("no",
"yes"), class = "factor"), Pathology = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("no", "yes"
), class = "factor"), Physiology = structure(c(1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L), .Label = c("no", "yes"), class = "factor"),
Plant_Biology = structure(c(1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 2L, 1L), .Label = c("no", "yes"), class = "factor"),
Proteomics = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("no", "yes"), class = "factor"),
Quantitative_Biology = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L), .Label = c("no", "yes"), class = "factor"),
Structural_Biology = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L), .Label = c("no", "yes"), class = "factor"),
Systems_Biology = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 1L,
2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
2L, 1L, 2L, 1L), .Label = c("no", "yes"), class = "factor")), row.names = c(NA,
-26L), class = "data.frame")
Оскар.