Question

df1=data.frame(c("male","female","male"),c("1","2","3","4","5","6"),seq(141,170))
names(df1) = c("gender","age","height")
df1$age <- factor(df1$age,
levels=c(1,2,3,4,5,6),
labels=c("16-24","25-34","35-44","45-54","55-64","65+"))
q1a=c(1,0,1,0,0,1);q1b=c(0,0,2,2,2,0);q1c=c(0,0,3,3,0,3) #1,2 and 3 used to be compatible with existing datasets. Could change all to 1 if necessary.
df2=data.frame(q1a=q1a,q1b=q1b,q1c=q1c); df1 <- cbind(df1,df2)

rm(q1a,q1b,q1c,df2)

Я хочу повторить анализ вопросов с множественными ответами из SPSS в R.

В настоящее время я использую этот код:

#creating function for analysing questions with grouped data 
multfreqtable <- function(a, b, c){

# number of respondents (for percent of cases)
totrep=sum(a==1|b==2|c==3) 

#creating frequency table
table_a=data.frame("a",sum(a==1))
names(table_a)=c("question","freq")
table_b=data.frame("b",sum(b==2))
names(table_b)=c("question","freq") 
table_c=data.frame("c",sum(c==3))
names(table_c)=c("question","freq")
table_question <-rbind(table_a,table_b,table_c)

#remove individual question tables
rm(table_a,table_b,table_c)

#adding total
total=as.data.frame("Total")
totalsum=(sum(table_question$freq,na.rm=TRUE))
totalrow=cbind(total,totalsum)
names(totalrow)=c("question","freq")
table_question=rbind(table_question,totalrow)

#adding percentage column to frequency table
percentcalc=as.numeric(table_question$freq)
percent=(percentcalc/totalsum)*100
table_question<-cbind(table_question,percent)

#adding percent of cases column to frequency table
poccalc=as.numeric(table_question$freq)
percentofcases=(poccalc/totrep)*100
table_question<-cbind(table_question,percentofcases)

#print percent of cases value
total_respondents <<- data.frame(totrep)

#remove all unnecessary data and values
rm(total,totalsum,totalrow,b,c,percent,percentcalc,percentofcases,totrep,poccalc)

return(table_question)
}

#calling function - must tie to data.frame using $ !!!
q1_frequency<-multfreqtable(df1$q1a,df1$q1b,df1$q1c)

#renaming percent of cases - This is very important while using current method
total_respondents_q1 <- total_respondents
rm(total_respondents)

Производим эту таблицу какрезультат:

Output table

Я ищу более эффективный способ сделать это, в идеале не требующий редактирования функции, если бы было больше или меньше вопросов с несколькими вариантами ответов.

A5C1D2H2I1M1N2O1R2T1 · Answer 1 · 23 апреля 2012

Ваша функция на самом деле слишком сложна для того, что вам нужно сделать. Я думаю, что такая функция должна работать и быть более гибкой.

multfreqtable = function(data, question.prefix) {
  # Find the columns with the questions
  a = grep(question.prefix, names(data))
  # Find the total number of responses
  b = sum(data[, a] != 0)
  # Find the totals for each question
  d = colSums(data[, a] != 0)
  # Find the number of respondents
  e = sum(rowSums(data[,a]) !=0)
  # d + b as a vector. This is your overfall frequency 
  f = as.numeric(c(d, b))
  data.frame(question = c(names(d), "Total"),
             freq = f,
             percent = (f/b)*100,
             percentofcases = (f/e)*100 )
}

Добавьте еще один вопрос к вашему набору данных:

set.seed(1); df1$q2a = sample(c(0, 1), 30, replace=T)
set.seed(2); df1$q2b = sample(c(0, 2), 30, replace=T)
set.seed(3); df1$q2c = sample(c(0, 3), 30, replace=T)

Составьте таблицу для ответов "q1":

> multfreqtable(df1, "q1")
  question freq   percent percentofcases
1      q1a   15  33.33333             60
2      q1b   15  33.33333             60
3      q1c   15  33.33333             60
4    Total   45 100.00000            180

Составьте таблицу для ответов "q2":

> multfreqtable(df1, "q2")
  question freq   percent percentofcases
1      q2a   14  31.11111       53.84615
2      q2b   13  28.88889       50.00000
3      q2c   18  40.00000       69.23077
4    Total   45 100.00000      173.07692

Таблицы для нескольких вопросов

Вот модифицированная версия функции, которая позволяет создавать список таблиц для нескольких вопросов одновременно:

multfreqtable = function(data, question.prefix) {
  z = length(question.prefix)
  temp = vector("list", z)

  for (i in 1:z) {
    a = grep(question.prefix[i], names(data))
    b = sum(data[, a] != 0)
    d = colSums(data[, a] != 0)
    e = sum(rowSums(data[,a]) !=0)
    f = as.numeric(c(d, b))
    temp[[i]] = data.frame(question = c(sub(question.prefix[i], 
                                            "", names(d)), "Total"),
                           freq = f,
                           percent = (f/b)*100,
                           percentofcases = (f/e)*100 )
    names(temp)[i] = question.prefix[i]
  }
  temp
}

Примеры:

> multfreqtable(df1, "q1")
$q1
  question freq   percent percentofcases
1        a   15  33.33333             60
2        b   15  33.33333             60
3        c   15  33.33333             60
4    Total   45 100.00000            180

> test1 = multfreqtable(df1, c("q1", "q2"))
> test1
$q1
  question freq   percent percentofcases
1        a   15  33.33333             60
2        b   15  33.33333             60
3        c   15  33.33333             60
4    Total   45 100.00000            180

$q2
  question freq   percent percentofcases
1        a   14  31.11111       53.84615
2        b   13  28.88889       50.00000
3        c   18  40.00000       69.23077
4    Total   45 100.00000      173.07692

> test1$q1
  question freq   percent percentofcases
1        a   15  33.33333             60
2        b   15  33.33333             60
3        c   15  33.33333             60
4    Total   45 100.00000            180

Evgeny · Answer 2 · 07 июля 2019

Я заметил, что это сообщение довольно старое, однако я не смог найти более актуальное решение.Вот моя версия, основанная на подходе dplyr / tidyverse.

mult_resp = function(df1, mv_q = c("q1a", "q1b", "q1c")){

  df2 = df1 %>%
    mutate(id = rownames(.)) %>%  #row id for counting n_cases
    select(id, everything()) %>% 
    mutate_at(mv_q, ~ ifelse(. != 0, 1, 0)) %>%
    gather(question, resp,-id, -gender,-age,-height) 

  #count number of cases excluding "all zeros" cases
  n_cases = df2 %>% group_by(id) %>%
    summarise(n = sum(resp)) %>% 
    summarise(sum(n > 0))

  #output table
  res = df2 %>% 
    group_by(question) %>%
    summarise(freq = sum(resp)) %>%
    mutate(
      percent = freq/sum(freq) *100,
      percent_of_cases = freq/as.numeric(n_cases)*100
      ) %>% 
    rbind(., 
          data.frame(question ="Total", 
                     freq =sum(.$freq, na.rm=TRUE),
                     percent =sum(.$percent, na.rm=TRUE),
                     percent_of_cases = sum(.$percent_of_cases, na.rm=TRUE)
                     )
          )
    res
}

Пример:

> mult_resp(df1, mv_q = c("q1a", "q1b", "q1c"))
# A tibble: 4 x 4
  question  freq percent percent_of_cases
  <chr>    <dbl>   <dbl>            <dbl>
1 q1a         15    33.3               60
2 q1b         15    33.3               60
3 q1c         15    33.3               60
4 Total       45   100.               180

Анализ множественного ответа

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 2 ]

Таблицы для нескольких вопросов

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Анализ множественного ответа

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 2 ]

Таблицы для нескольких вопросов

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Похожие темы