Как заметил @ r2evans, ваша самая большая проблема заключается в том, что ваши данные имеют неправильную ориентацию ...
Необходимо это исправить. Я предполагаю, что набор данных слишком велик, чтобы вы могли сделать это вручную. Итак, вот очень уродливый код R, но вы должны иметь возможность изменять независимо от того, сколько субъектов или генов.
# what you gave us
your_data <- read.table(text = "gene S1 S2 S3 S4 S5 S6 S7 S8 S9
weight 1,34175933 NA 0,506664615 2,404181093 0,853749494 0,931450603 2,666384344 1,483623026 1,908323207
A1 0 0 0 0 0 0 0 0 0
A2 0 0 0 0 0 0 0 0 0
A3 0,047059 0 0 0 0,055744 0 0 0 0
A4 0 0 0 0 0 0 0 0 0
A5 0 0 0 0 0 0 0 0 0
A6 0 0 0 0 0 0 0 0 0
A7 0 0 0 0 0 0 0 0 0
A8 0 0 0 0 0 0 0 0 0
A9 0 0 0 0 0 0 0 0 0
A10 0 0 0 0 0 0 0 0 0
A11 0 0 0 0 0 0 0 0 0
A12 0 0 0 0 0 0 0 0 0
A13 0 0 0 0 0 0 0 0 0", header = TRUE)
# save your data from excel to a csv file
# your_data <- read.table("untitled.csv", header = TRUE )
# should show about like this
your_data
#> gene S1 S2 S3 S4 S5 S6
#> 1 weight 1,34175933 NA 0,506664615 2,404181093 0,853749494 0,931450603
#> 2 A1 0 0 0 0 0 0
#> 3 A2 0 0 0 0 0 0
#> 4 A3 0,047059 0 0 0 0,055744 0
#> 5 A4 0 0 0 0 0 0
#> 6 A5 0 0 0 0 0 0
#> 7 A6 0 0 0 0 0 0
#> 8 A7 0 0 0 0 0 0
#> 9 A8 0 0 0 0 0 0
#> 10 A9 0 0 0 0 0 0
#> 11 A10 0 0 0 0 0 0
#> 12 A11 0 0 0 0 0 0
#> 13 A12 0 0 0 0 0 0
#> 14 A13 0 0 0 0 0 0
#> S7 S8 S9
#> 1 2,666384344 1,483623026 1,908323207
#> 2 0 0 0
#> 3 0 0 0
#> 4 0 0 0
#> 5 0 0 0
#> 6 0 0 0
#> 7 0 0 0
#> 8 0 0 0
#> 9 0 0 0
#> 10 0 0 0
#> 11 0 0 0
#> 12 0 0 0
#> 13 0 0 0
#> 14 0 0 0
# let's flip it in R
your_data <- as.data.frame(t(your_data))
your_data
#> V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14
#> gene weight A1 A2 A3 A4 A5 A6 A7 A8 A9 A10 A11 A12 A13
#> S1 1,34175933 0 0 0,047059 0 0 0 0 0 0 0 0 0 0
#> S2 <NA> 0 0 0 0 0 0 0 0 0 0 0 0 0
#> S3 0,506664615 0 0 0 0 0 0 0 0 0 0 0 0 0
#> S4 2,404181093 0 0 0 0 0 0 0 0 0 0 0 0 0
#> S5 0,853749494 0 0 0,055744 0 0 0 0 0 0 0 0 0 0
#> S6 0,931450603 0 0 0 0 0 0 0 0 0 0 0 0 0
#> S7 2,666384344 0 0 0 0 0 0 0 0 0 0 0 0 0
#> S8 1,483623026 0 0 0 0 0 0 0 0 0 0 0 0 0
#> S9 1,908323207 0 0 0 0 0 0 0 0 0 0 0 0 0
# let's write it back out since you say you have a lot of genes
write.csv(your_data, file = "transposed.csv", na = "", row.names = TRUE)
# read it back in and get the header correct
fixed_data <- read.csv("transposed.csv", skip = 1, header = TRUE, na.strings = "")
fixed_data
#> gene weight A1 A2 A3 A4 A5 A6 A7 A8 A9 A10 A11 A12 A13
#> 1 S1 1,34175933 0 0 0,047059 0 0 0 0 0 0 0 0 0 0
#> 2 S2 <NA> 0 0 0 0 0 0 0 0 0 0 0 0 0
#> 3 S3 0,506664615 0 0 0 0 0 0 0 0 0 0 0 0 0
#> 4 S4 2,404181093 0 0 0 0 0 0 0 0 0 0 0 0 0
#> 5 S5 0,853749494 0 0 0,055744 0 0 0 0 0 0 0 0 0 0
#> 6 S6 0,931450603 0 0 0 0 0 0 0 0 0 0 0 0 0
#> 7 S7 2,666384344 0 0 0 0 0 0 0 0 0 0 0 0 0
#> 8 S8 1,483623026 0 0 0 0 0 0 0 0 0 0 0 0 0
#> 9 S9 1,908323207 0 0 0 0 0 0 0 0 0 0 0 0 0
# better? it's now by subject not gene
fixed_data$subject <- fixed_data$gene
# I'm fixing the ones in your sample data you need to check all columns
fixed_data$weight <- as.numeric(stringr::str_replace(fixed_data$weight, ",", "."))
fixed_data$A3 <- as.numeric(stringr::str_replace(fixed_data$A3, ",", "."))
# much better
fixed_data
#> gene weight A1 A2 A3 A4 A5 A6 A7 A8 A9 A10 A11 A12 A13 subject
#> 1 S1 1.3417593 0 0 0.047059 0 0 0 0 0 0 0 0 0 0 S1
#> 2 S2 NA 0 0 0.000000 0 0 0 0 0 0 0 0 0 0 S2
#> 3 S3 0.5066646 0 0 0.000000 0 0 0 0 0 0 0 0 0 0 S3
#> 4 S4 2.4041811 0 0 0.000000 0 0 0 0 0 0 0 0 0 0 S4
#> 5 S5 0.8537495 0 0 0.055744 0 0 0 0 0 0 0 0 0 0 S5
#> 6 S6 0.9314506 0 0 0.000000 0 0 0 0 0 0 0 0 0 0 S6
#> 7 S7 2.6663843 0 0 0.000000 0 0 0 0 0 0 0 0 0 0 S7
#> 8 S8 1.4836230 0 0 0.000000 0 0 0 0 0 0 0 0 0 0 S8
#> 9 S9 1.9083232 0 0 0.000000 0 0 0 0 0 0 0 0 0 0 S9
# make a copy
model_me <- fixed_data
# remove non regressor relevants
model_me$gene <- NULL
model_me$subject <- NULL
# per your question in the comments remove all columns
# where TPM is zero for all subjects
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
all_zero <- function(x) !sum(x, na.rm = TRUE) == 0
model_me <- model_me %>% select_if(all_zero)
lm(weight ~ ., data = model_me)
#>
#> Call:
#> lm(formula = weight ~ ., data = model_me)
#>
#> Coefficients:
#> (Intercept) A3
#> 1.656 -11.174
summary(lm(weight ~ ., data = model_me))
#>
#> Call:
#> lm(formula = weight ~ ., data = model_me)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -1.1489 -0.3153 0.0200 0.3767 1.0108
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 1.6556 0.3157 5.244 0.00193 **
#> A3 -11.1742 12.2409 -0.913 0.39651
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.7743 on 6 degrees of freedom
#> (1 observation deleted due to missingness)
#> Multiple R-squared: 0.1219, Adjusted R-squared: -0.02439
#> F-statistic: 0.8333 on 1 and 6 DF, p-value: 0.3965
Создано 05.05.2020 пакетом (v0.3.0)