Поврежденный data.frame содержит списки - как конвертировать в столбцы? - PullRequest
1 голос
/ 19 января 2012

У меня есть data.frame, называемый "so_data".

Столбцы 13:23 - это list с, которые содержат пустые ячейки и отдельные значения.Как мне преобразовать их в числовые векторы?

Я пробовал:

matrix(unlist(rang[13:23]), ncol=11, byrow=F)

Но, видимо, я теряю все пустые ячейки.Что все портит.

as.data.frame(do.call(cbind, rang)))

По существу не меняет данные.

Вот данные:

so_data <- structure(list(id = c(83L, 84L, 85L, 86L, 88L, 89L, 90L, 91L, 
92L, 93L, 94L, 95L, 97L, 98L, 99L, 100L, 101L, 102L, 104L, 105L
), motivation..1. = structure(c(7L, 3L, 5L, 5L, 5L, 10L, 5L, 
5L, 5L, 5L, 2L, 12L, 5L, 2L, 8L, 5L, 5L, 9L, 3L, 5L), .Label = c("", 
"Kald", "Udviklingspotentiale", "Alsidigt arbejdsliv", "Noget jeg kan lide", 
"egen", "godti", "indko", "inter", "jobsi", "samfn", "statu"), class = "factor"), 
    motivation..2. = structure(c(5L, 11L, 9L, 4L, 3L, 9L, 4L, 
    4L, 7L, 4L, 5L, 3L, 7L, 5L, 7L, 10L, 7L, 6L, 5L, 7L), .Label = c("", 
    "Kald", "Udviklingspotentiale", "Alsidigt arbejdsliv", "Noget jeg kan lide", 
    "egen", "godti", "indko", "inter", "jobsi", "samfn", "statu"
    ), class = "factor"), motivation..3. = structure(c(4L, 9L, 
    11L, 7L, 4L, 6L, 3L, 7L, 9L, 9L, 7L, 2L, 4L, 9L, 10L, 8L, 
    9L, 2L, 4L, 3L), .Label = c("", "Kald", "Udviklingspotentiale", 
    "Alsidigt arbejdsliv", "Noget jeg kan lide", "egen", "godti", 
    "indko", "inter", "jobsi", "samfn", "statu"), class = "factor"), 
    motivation..4. = structure(c(11L, 5L, 2L, 10L, 9L, 3L, 8L, 
    11L, 2L, 10L, 10L, 7L, 3L, 7L, 5L, 9L, 3L, 5L, 2L, 8L), .Label = c("", 
    "Kald", "Udviklingspotentiale", "Alsidigt arbejdsliv", "Noget jeg kan lide", 
    "egen", "godti", "indko", "inter", "jobsi", "samfn", "statu"
    ), class = "factor"), motivation..5. = structure(c(3L, 7L, 
    10L, 3L, 11L, 5L, 11L, 3L, 4L, 8L, 9L, 5L, 8L, 11L, 3L, 3L, 
    4L, 7L, 6L, 10L), .Label = c("", "Kald", "Udviklingspotentiale", 
    "Alsidigt arbejdsliv", "Noget jeg kan lide", "egen", "godti", 
    "indko", "inter", "jobsi", "samfn", "statu"), class = "factor"), 
    motivation..6. = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), motivation..7. = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA), motivation..8. = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    motivation..9. = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), motivation..10. = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA), motivation..11. = c(NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
    ), godti = structure(list(`3` = structure(2L, .Names = "motivation..1."), 
        `4` = structure(6L, .Names = "motivation..5."), `5` = structure(integer(0), .Names = character(0)), 
        `6` = structure(4L, .Names = "motivation..3."), `8` = structure(integer(0), .Names = character(0)), 
        `9` = structure(integer(0), .Names = character(0)), `10` = structure(integer(0), .Names = character(0)), 
        `11` = structure(4L, .Names = "motivation..3."), `12` = structure(3L, .Names = "motivation..2."), 
        `13` = structure(integer(0), .Names = character(0)), 
        `14` = structure(4L, .Names = "motivation..3."), `15` = structure(5L, .Names = "motivation..4."), 
        `17` = structure(3L, .Names = "motivation..2."), `18` = structure(5L, .Names = "motivation..4."), 
        `19` = structure(3L, .Names = "motivation..2."), `20` = structure(integer(0), .Names = character(0)), 
        `21` = structure(3L, .Names = "motivation..2."), `22` = structure(6L, .Names = "motivation..5."), 
        `24` = structure(integer(0), .Names = character(0)), 
        `25` = structure(3L, .Names = "motivation..2.")), .Names = c("3", 
    "4", "5", "6", "8", "9", "10", "11", "12", "13", "14", "15", 
    "17", "18", "19", "20", "21", "22", "24", "25")), Udviklingspotentiale = structure(list(
        `3` = structure(6L, .Names = "motivation..5."), `4` = structure(2L, .Names = "motivation..1."), 
        `5` = structure(integer(0), .Names = character(0)), `6` = structure(6L, .Names = "motivation..5."), 
        `8` = structure(3L, .Names = "motivation..2."), `9` = structure(5L, .Names = "motivation..4."), 
        `10` = structure(4L, .Names = "motivation..3."), `11` = structure(6L, .Names = "motivation..5."), 
        `12` = structure(integer(0), .Names = character(0)), 
        `13` = structure(integer(0), .Names = character(0)), 
        `14` = structure(integer(0), .Names = character(0)), 
        `15` = structure(3L, .Names = "motivation..2."), `17` = structure(5L, .Names = "motivation..4."), 
        `18` = structure(integer(0), .Names = character(0)), 
        `19` = structure(6L, .Names = "motivation..5."), `20` = structure(6L, .Names = "motivation..5."), 
        `21` = structure(5L, .Names = "motivation..4."), `22` = structure(integer(0), .Names = character(0)), 
        `24` = structure(2L, .Names = "motivation..1."), `25` = structure(4L, .Names = "motivation..3.")), .Names = c("3", 
    "4", "5", "6", "8", "9", "10", "11", "12", "13", "14", "15", 
    "17", "18", "19", "20", "21", "22", "24", "25")), `Noget jeg kan lide` = structure(list(
        `3` = structure(3L, .Names = "motivation..2."), `4` = structure(5L, .Names = "motivation..4."), 
        `5` = structure(2L, .Names = "motivation..1."), `6` = structure(2L, .Names = "motivation..1."), 
        `8` = structure(2L, .Names = "motivation..1."), `9` = structure(6L, .Names = "motivation..5."), 
        `10` = structure(2L, .Names = "motivation..1."), `11` = structure(2L, .Names = "motivation..1."), 
        `12` = structure(2L, .Names = "motivation..1."), `13` = structure(2L, .Names = "motivation..1."), 
        `14` = structure(3L, .Names = "motivation..2."), `15` = structure(6L, .Names = "motivation..5."), 
        `17` = structure(2L, .Names = "motivation..1."), `18` = structure(3L, .Names = "motivation..2."), 
        `19` = structure(5L, .Names = "motivation..4."), `20` = structure(2L, .Names = "motivation..1."), 
        `21` = structure(2L, .Names = "motivation..1."), `22` = structure(5L, .Names = "motivation..4."), 
        `24` = structure(3L, .Names = "motivation..2."), `25` = structure(2L, .Names = "motivation..1.")), .Names = c("3", 
    "4", "5", "6", "8", "9", "10", "11", "12", "13", "14", "15", 
    "17", "18", "19", "20", "21", "22", "24", "25")), jobsi = structure(list(
        `3` = structure(integer(0), .Names = character(0)), `4` = structure(integer(0), .Names = character(0)), 
        `5` = structure(6L, .Names = "motivation..5."), `6` = structure(5L, .Names = "motivation..4."), 
        `8` = structure(integer(0), .Names = character(0)), `9` = structure(2L, .Names = "motivation..1."), 
        `10` = structure(integer(0), .Names = character(0)), 
        `11` = structure(integer(0), .Names = character(0)), 
        `12` = structure(integer(0), .Names = character(0)), 
        `13` = structure(5L, .Names = "motivation..4."), `14` = structure(5L, .Names = "motivation..4."), 
        `15` = structure(integer(0), .Names = character(0)), 
        `17` = structure(integer(0), .Names = character(0)), 
        `18` = structure(integer(0), .Names = character(0)), 
        `19` = structure(4L, .Names = "motivation..3."), `20` = structure(3L, .Names = "motivation..2."), 
        `21` = structure(integer(0), .Names = character(0)), 
        `22` = structure(integer(0), .Names = character(0)), 
        `24` = structure(integer(0), .Names = character(0)), 
        `25` = structure(6L, .Names = "motivation..5.")), .Names = c("3", 
    "4", "5", "6", "8", "9", "10", "11", "12", "13", "14", "15", 
    "17", "18", "19", "20", "21", "22", "24", "25")), Kald = structure(list(
        `3` = structure(integer(0), .Names = character(0)), `4` = structure(integer(0), .Names = character(0)), 
        `5` = structure(5L, .Names = "motivation..4."), `6` = structure(integer(0), .Names = character(0)), 
        `8` = structure(integer(0), .Names = character(0)), `9` = structure(integer(0), .Names = character(0)), 
        `10` = structure(integer(0), .Names = character(0)), 
        `11` = structure(integer(0), .Names = character(0)), 
        `12` = structure(5L, .Names = "motivation..4."), `13` = structure(integer(0), .Names = character(0)), 
        `14` = structure(2L, .Names = "motivation..1."), `15` = structure(4L, .Names = "motivation..3."), 
        `17` = structure(integer(0), .Names = character(0)), 
        `18` = structure(2L, .Names = "motivation..1."), `19` = structure(integer(0), .Names = character(0)), 
        `20` = structure(integer(0), .Names = character(0)), 
        `21` = structure(integer(0), .Names = character(0)), 
        `22` = structure(4L, .Names = "motivation..3."), `24` = structure(5L, .Names = "motivation..4."), 
        `25` = structure(integer(0), .Names = character(0))), .Names = c("3", 
    "4", "5", "6", "8", "9", "10", "11", "12", "13", "14", "15", 
    "17", "18", "19", "20", "21", "22", "24", "25")), statu = structure(list(
        `3` = structure(integer(0), .Names = character(0)), `4` = structure(integer(0), .Names = character(0)), 
        `5` = structure(integer(0), .Names = character(0)), `6` = structure(integer(0), .Names = character(0)), 
        `8` = structure(integer(0), .Names = character(0)), `9` = structure(integer(0), .Names = character(0)), 
        `10` = structure(integer(0), .Names = character(0)), 
        `11` = structure(integer(0), .Names = character(0)), 
        `12` = structure(integer(0), .Names = character(0)), 
        `13` = structure(integer(0), .Names = character(0)), 
        `14` = structure(integer(0), .Names = character(0)), 
        `15` = structure(2L, .Names = "motivation..1."), `17` = structure(integer(0), .Names = character(0)), 
        `18` = structure(integer(0), .Names = character(0)), 
        `19` = structure(integer(0), .Names = character(0)), 
        `20` = structure(integer(0), .Names = character(0)), 
        `21` = structure(integer(0), .Names = character(0)), 
        `22` = structure(integer(0), .Names = character(0)), 
        `24` = structure(integer(0), .Names = character(0)), 
        `25` = structure(integer(0), .Names = character(0))), .Names = c("3", 
    "4", "5", "6", "8", "9", "10", "11", "12", "13", "14", "15", 
    "17", "18", "19", "20", "21", "22", "24", "25")), indko = structure(list(
        `3` = structure(integer(0), .Names = character(0)), `4` = structure(integer(0), .Names = character(0)), 
        `5` = structure(integer(0), .Names = character(0)), `6` = structure(integer(0), .Names = character(0)), 
        `8` = structure(integer(0), .Names = character(0)), `9` = structure(integer(0), .Names = character(0)), 
        `10` = structure(5L, .Names = "motivation..4."), `11` = structure(integer(0), .Names = character(0)), 
        `12` = structure(integer(0), .Names = character(0)), 
        `13` = structure(6L, .Names = "motivation..5."), `14` = structure(integer(0), .Names = character(0)), 
        `15` = structure(integer(0), .Names = character(0)), 
        `17` = structure(6L, .Names = "motivation..5."), `18` = structure(integer(0), .Names = character(0)), 
        `19` = structure(2L, .Names = "motivation..1."), `20` = structure(4L, .Names = "motivation..3."), 
        `21` = structure(integer(0), .Names = character(0)), 
        `22` = structure(integer(0), .Names = character(0)), 
        `24` = structure(integer(0), .Names = character(0)), 
        `25` = structure(5L, .Names = "motivation..4.")), .Names = c("3", 
    "4", "5", "6", "8", "9", "10", "11", "12", "13", "14", "15", 
    "17", "18", "19", "20", "21", "22", "24", "25")), inter = structure(list(
        `3` = structure(integer(0), .Names = character(0)), `4` = structure(4L, .Names = "motivation..3."), 
        `5` = structure(3L, .Names = "motivation..2."), `6` = structure(integer(0), .Names = character(0)), 
        `8` = structure(5L, .Names = "motivation..4."), `9` = structure(3L, .Names = "motivation..2."), 
        `10` = structure(integer(0), .Names = character(0)), 
        `11` = structure(integer(0), .Names = character(0)), 
        `12` = structure(4L, .Names = "motivation..3."), `13` = structure(4L, .Names = "motivation..3."), 
        `14` = structure(6L, .Names = "motivation..5."), `15` = structure(integer(0), .Names = character(0)), 
        `17` = structure(integer(0), .Names = character(0)), 
        `18` = structure(4L, .Names = "motivation..3."), `19` = structure(integer(0), .Names = character(0)), 
        `20` = structure(5L, .Names = "motivation..4."), `21` = structure(4L, .Names = "motivation..3."), 
        `22` = structure(2L, .Names = "motivation..1."), `24` = structure(integer(0), .Names = character(0)), 
        `25` = structure(integer(0), .Names = character(0))), .Names = c("3", 
    "4", "5", "6", "8", "9", "10", "11", "12", "13", "14", "15", 
    "17", "18", "19", "20", "21", "22", "24", "25")), `Alsidigt arbejdsliv` = structure(list(
        `3` = structure(4L, .Names = "motivation..3."), `4` = structure(integer(0), .Names = character(0)), 
        `5` = structure(integer(0), .Names = character(0)), `6` = structure(3L, .Names = "motivation..2."), 
        `8` = structure(4L, .Names = "motivation..3."), `9` = structure(integer(0), .Names = character(0)), 
        `10` = structure(3L, .Names = "motivation..2."), `11` = structure(3L, .Names = "motivation..2."), 
        `12` = structure(6L, .Names = "motivation..5."), `13` = structure(3L, .Names = "motivation..2."), 
        `14` = structure(integer(0), .Names = character(0)), 
        `15` = structure(integer(0), .Names = character(0)), 
        `17` = structure(4L, .Names = "motivation..3."), `18` = structure(integer(0), .Names = character(0)), 
        `19` = structure(integer(0), .Names = character(0)), 
        `20` = structure(integer(0), .Names = character(0)), 
        `21` = structure(6L, .Names = "motivation..5."), `22` = structure(integer(0), .Names = character(0)), 
        `24` = structure(4L, .Names = "motivation..3."), `25` = structure(integer(0), .Names = character(0))), .Names = c("3", 
    "4", "5", "6", "8", "9", "10", "11", "12", "13", "14", "15", 
    "17", "18", "19", "20", "21", "22", "24", "25")), samfn = structure(list(
        `3` = structure(5L, .Names = "motivation..4."), `4` = structure(3L, .Names = "motivation..2."), 
        `5` = structure(4L, .Names = "motivation..3."), `6` = structure(integer(0), .Names = character(0)), 
        `8` = structure(6L, .Names = "motivation..5."), `9` = structure(integer(0), .Names = character(0)), 
        `10` = structure(6L, .Names = "motivation..5."), `11` = structure(5L, .Names = "motivation..4."), 
        `12` = structure(integer(0), .Names = character(0)), 
        `13` = structure(integer(0), .Names = character(0)), 
        `14` = structure(integer(0), .Names = character(0)), 
        `15` = structure(integer(0), .Names = character(0)), 
        `17` = structure(integer(0), .Names = character(0)), 
        `18` = structure(6L, .Names = "motivation..5."), `19` = structure(integer(0), .Names = character(0)), 
        `20` = structure(integer(0), .Names = character(0)), 
        `21` = structure(integer(0), .Names = character(0)), 
        `22` = structure(integer(0), .Names = character(0)), 
        `24` = structure(integer(0), .Names = character(0)), 
        `25` = structure(integer(0), .Names = character(0))), .Names = c("3", 
    "4", "5", "6", "8", "9", "10", "11", "12", "13", "14", "15", 
    "17", "18", "19", "20", "21", "22", "24", "25")), egen = structure(list(
        `3` = structure(integer(0), .Names = character(0)), `4` = structure(integer(0), .Names = character(0)), 
        `5` = structure(integer(0), .Names = character(0)), `6` = structure(integer(0), .Names = character(0)), 
        `8` = structure(integer(0), .Names = character(0)), `9` = structure(4L, .Names = "motivation..3."), 
        `10` = structure(integer(0), .Names = character(0)), 
        `11` = structure(integer(0), .Names = character(0)), 
        `12` = structure(integer(0), .Names = character(0)), 
        `13` = structure(integer(0), .Names = character(0)), 
        `14` = structure(integer(0), .Names = character(0)), 
        `15` = structure(integer(0), .Names = character(0)), 
        `17` = structure(integer(0), .Names = character(0)), 
        `18` = structure(integer(0), .Names = character(0)), 
        `19` = structure(integer(0), .Names = character(0)), 
        `20` = structure(integer(0), .Names = character(0)), 
        `21` = structure(integer(0), .Names = character(0)), 
        `22` = structure(3L, .Names = "motivation..2."), `24` = structure(6L, .Names = "motivation..5."), 
        `25` = structure(integer(0), .Names = character(0))), .Names = c("3", 
    "4", "5", "6", "8", "9", "10", "11", "12", "13", "14", "15", 
    "17", "18", "19", "20", "21", "22", "24", "25"))), .Names = c("id", 
"motivation..1.", "motivation..2.", "motivation..3.", "motivation..4.", 
"motivation..5.", "motivation..6.", "motivation..7.", "motivation..8.", 
"motivation..9.", "motivation..10.", "motivation..11.", "godti", 
"Udviklingspotentiale", "Noget jeg kan lide", "jobsi", "Kald", 
"statu", "indko", "inter", "Alsidigt arbejdsliv", "samfn", "egen"
), row.names = c(3L, 4L, 5L, 6L, 8L, 9L, 10L, 11L, 12L, 13L, 
14L, 15L, 17L, 18L, 19L, 20L, 21L, 22L, 24L, 25L), class = "data.frame")

Редактировать: Что создало этот беспорядок?

Выше data.frame началось с 12 столбцов, id и одиннадцати факторов.Это so_data[1:12].

Одиннадцать факторов обусловлены вопросом ранжирования в опросе.Что-то вроде «По каждой из следующих 11 причин выбрать университет, пожалуйста, оцените пять наиболее важных для вас причин».

Каждый из одиннадцати столбцов представляет выбор.Поскольку допускается только пять вариантов выбора, только пять первых столбцов (so_data[2:6]) содержат релевантные данные.

Теперь - для выполнения некоторых вычислений и построения графиков мне нужно было немного преобразовать данные.Вместо того, чтобы иметь 5 столбцов, которые представляли «приоритет № 1», «приоритет № 2» и т. Д., Я хотел, чтобы 11 столбцов представляли причину.Каждый столбец имеет значения в соответствии с тем, какой приоритет респондент присвоил этой причине.

Например,

Вместо:

-------------|Priority 1------- | Priority 2---| ... | Priority 5 |
respondent1  | raeason X        | Reason Y     | ... | reason z    | 
...

Я хочу:

-------------| Reason 1 -------  | Reason 2    ---| ... | Reason 11 |
respondent1  | Priority z        | Priority 2     | ... | Priority 5
...

Вот код, с которым мне помогает умный человек:

startcolidx = 2
endcolidx = 6
factors = unique(unlist(rang[,startcolidx:endcolidx]))


for(f in as.character(factors)) {
   rang[[f]] = apply(rang, 1, function(arow) which(arow == f))
}

1 Ответ

3 голосов
/ 19 января 2012

Ваш data.frame явно поврежден: некоторые столбцы представляют собой списки 1- или 0-элементных векторов - это не должно происходить в data.frame.Вместо того, чтобы пытаться это исправить, возможно, разумнее и проще исправить код, который его сгенерировал.

Если вы настаивали на исправлении данных, а не на основной проблеме, может сработать следующее:

do.call(data.frame, 
  lapply( so_data, 
    function (u) unlist( 
      if( is.list(u) ) lapply(u, function(v) c(v,NA)[1]) 
      else u 
)))

Редактировать: Вы можете преобразовать свои исходные данные с помощью melt и dcast.

library(reshape2)
d <- so_data[1:6]

# Transform the data to a tall format:
# it is much easier to process data in tall format.
d <- melt(d, id.vars="id" )

# Transfrom the data to a wide format: 
# just specify the rows and the columns on each side of ~.
# It is rarely necessary to specify value.var.
d <- dcast( d, id ~ value, value.var="variable" )
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...