#Reading data into R
library(readr)
df1 <- read_csv("C:/Users/userm/Desktop/df1.csv",
col_names = FALSE)
#Loading libraries:
library(plyr)
library(tidyr)
spl <- diff(c(which(df1$X1 == 'group_no'), nrow(df1))) #defining the split for each table
#Splitting to a list of dataframes, where each dataframe is one of the tables of interest:
dl2 <- split(df1,
c(rep(1, spl[1]),
rep(2, spl[2]),
rep(3, spl[3]),
rep(4, spl[4]),
rep(5, spl[5])))
#Assigning the first row as the dataframe header:
dl3 <- lapply(dl2, function(d){colnames(d) <- d[1,]; d <- d[-1,]})
#Removing columns with All NAs:
dl4 <- lapply(dl3, function(d){d <- d[,colSums(is.na(d))<nrow(d)]})
#Binding dataframes with different number of columns (plyr):
df5 <- rbind.fill(dl4)
#Converting from Wide to Long (tidyr):
df6 <- pivot_longer(df5, -c("group_no", "group", "variable"),
names_to = "marker_name", values_to = "value")
#Convert from Long to Wide with a different arrangement (tidyr):
df7 <- pivot_wider(df6, names_from = "variable", values_from = "value")
#> # A tibble: 65 x 16
#> group_no group marker_name a b c `4` `5` g `1`
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 1 GR-0~ marker1 - B A - B <NA> <NA>
#> 2 1 GR-0~ marker2 - A B - - <NA> <NA>
#> 3 1 GR-0~ marker3 A B - - B <NA> <NA>
#> 4 1 GR-0~ marker4 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 5 1 GR-0~ marker5 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 6 1 GR-0~ marker6 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 7 1 GR-0~ marker7 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 8 1 GR-0~ marker8 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 9 1 GR-0~ marker9 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 10 1 GR-0~ marker10 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> # ... with 55 more rows, and 6 more variables: `2` <chr>, `3` <chr>,
#> # `6` <chr>, d <chr>, e <chr>, f <chr>
Данные:
df1 <- structure(list(X1 = c("group_no", "1", "1", "1", "1", "1", "group_no",
"2", "2", "2", "2", "group_no", "3", "3", "3", "group_no", "4",
"4", "4", "4", "4", "4", "group_no", "5", "5", "5", "5"), X2 = c("group",
"GR-01-1", "GR-01-1", "GR-01-1", "GR-01-1", "GR-01-1", "group",
"GR-01-1a", "GR-01-1a", "GR-01-1a", "GR-01-1a", "group", "GR-01-2",
"GR-01-2", "GR-01-2", "group", "GR-02-1", "GR-02-1", "GR-02-1",
"GR-02-1", "GR-02-1", "GR-02-1", "group", "GR-02-2", "GR-02-2",
"GR-02-2", "GR-02-2"), X3 = c("variable", "a", "b", "c", "4",
"5", "variable", "1", "2", "3", "4", "variable", "1", "2", "3",
"variable", "1", "2", "3", "4", "5", "6", "variable", "d", "e",
"f", "g"), X4 = c("marker1", "-", "B", "A", "-", "B", "marker4",
"B", "B", "A", "A", "marker8", "B", "A", "-", "marker9", "B",
"A", "-", "B", "A", "-", "marker11", "B", "A", "B", "-"), X5 = c("marker2",
"-", "A", "B", "-", "-", "marker5", "-", "A", "B", "A", NA, NA,
NA, NA, "marker10", "A", "A", "B", "B", "B", "A", "marker12",
"B", "B", "-", "B"), X6 = c("marker3", "A", "B", "-", "-", "B",
"marker6", "A", "-", "-", "A", NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, "marker13", "A", "B", "-", "-"), X7 = c(NA, NA, NA,
NA, NA, NA, "marker7", "-", "B", "B", "A", NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -27L), spec = structure(list(
cols = list(X1 = structure(list(), class = c("collector_character",
"collector")), X2 = structure(list(), class = c("collector_character",
"collector")), X3 = structure(list(), class = c("collector_character",
"collector")), X4 = structure(list(), class = c("collector_character",
"collector")), X5 = structure(list(), class = c("collector_character",
"collector")), X6 = structure(list(), class = c("collector_character",
"collector")), X7 = structure(list(), class = c("collector_character",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 0), class = "col_spec"))
X1 X2 X3 X4 X5 X6 X7
1 group_no group variable marker1 marker2 marker3 NA
2 1 GR-01-1 a - - A NA
3 1 GR-01-1 b B A B NA
4 1 GR-01-1 c A B - NA
5 1 GR-01-1 4 - - - NA
6 1 GR-01-1 5 B - B NA
7 group_no group variable marker4 marker5 marker6 marker7
8 2 GR-01-1a 1 B - A -
9 2 GR-01-1a 2 B A - B
10 2 GR-01-1a 3 A B - B
# ... with 17 more rows