r вычислить составную оценку и надежность для нескольких столбцов в кадре данных - PullRequest
0 голосов
/ 16 февраля 2020

Я хотел бы рассчитать составной балл и альфа Кронбаха для количества элементов в моем фрейме данных и сохранить их в новом фрейме данных.
Это (часть) моего текущего фрейма данных:

structure(list(T1PP_1 = c(6, 7, 4, 5, 4, 6, 5, 6, 6, 5), T1PP_2 = c(3, 
4, 4, 5, 2, 5, 5, 6, 6, 3), T1PP_3 = c(5, 7, 6, 7, 6, 7, 6, 5, 
6, 5), T1PP_4 = c(3, 6, 5, 5, 6, 5, 4, 6, 6, 4), T1PP_5 = c(4, 
6, 5, 6, 5, 3, 6, 3, 5, 4), T1PP_7 = c(4, 6, 5, 5, 4, 7, 4, 5, 
6, 2), T1PP_8 = c(5, 6, 4, 6, 4, 2, 4, 5, 5, 5), T1PP_9 = c(5, 
6, 5, 6, 4, 5, 3, 7, 5, 6), T1PP_10 = c(3, 6, 3, 4, 5, 2, 3, 
6, 6, 3), T1PP_11 = c(5, 6, 4, 5, 3, 1, 5, 3, 5, 2), t1se_1 = c(4, 
5, 4, 4, 4, 1, 5, 4, 4, 4), t1se_2 = c(3, 5, 4, 5, 4, 1, 5, 2, 
4, 4), t1se_3 = c(4, 4, 4, 4, 3, 4, 5, 4, 5, 4), t1se_4 = c(3, 
5, 4, 5, 4, 4, 5, 4, 5, 4), t1se_5 = c(4, 5, 4, 4, 4, 4, 5, 5, 
4, 4), t1se_6 = c(4, 5, 4, 5, 4, 4, 5, 5, 5, 4), t1se_7 = c(4, 
5, 3, 5, 4, 4, 5, 5, 5, 3), t1se_8 = c(3, 5, 3, 4, 4, 3, 5, 5, 
5, 4), t1ogoal_1 = c(4, 5, 4, 5, 5, 5, 5, 5, 2, 5), t1ogoal_2 = c(4, 
4, 4, 5, 5, 5, 5, 4, 4, 5), t1ogoal_3 = c(4, 5, 4, 5, 4, 3, 4, 
2, 4, 5), t1ogoal_4 = c(4, 5, 3, 4, 2, 3, 3, 1, 2, 4), t1ogoal_5 = c(4, 
5, 3, 5, 5, 5, 4, 2, 3, 5), t1ogoal_6 = c(4, 5, 5, 4, 5, 5, 3, 
5, 4, 5), t1ogoal_7 = c(4, 5, 5, 5, 5, 5, 5, 5, 4, 5)), row.names = c(NA, 
10L), class = "data.frame")

На основе примера фрейма данных новый фрейм данных должен выглядеть следующим образом:

structure(list(T1PP_comp = c(2.4, 5.4, 3.1, 4.9, 4.2, 4.6, 4.1, 
4.1, 4, 4.8), T1PP_alpha = c(2.4, 5.4, 3.1, 4.9, 4.2, 4.6, 4.1, 
4.1, 4, 4.8), t1se_comp = c(2.375, 2.75, 1.625, 3.875, 2.625, 
2.625, 3.5, 3.5, 2.375, 3.5), t1se_alpha = c(2.375, 2.75, 1.625, 
3.875, 2.625, 2.625, 3.5, 3.5, 2.375, 3.5), t1ogoal_comp = c(1.4, 
3.5, 2.6, 2.7, 2.5, 2.6, 3, 3, 2.6, 3.2)), row.names = c(NA, 
10L), class = "data.frame")

Итак, я хочу перебрать несколько столбцов, которые принадлежат друг другу (т. Е. Образуют одну переменную (например, от T1PP_1 до T1PP_11), чтобы получить составной счет и альфа Кронбаха Это была начальная попытка:

comp_and_alph <- function(data = my_dat, variable_name, ...) {
  data %>%
    select(matches(variable_name)) %>%
      mutate(comp = composite(., nomiss = 0.8),
      alpha = psych::alpha(., ...)$scores) %>%
    rename_at(vars(c("comp", "alpha")), ~paste(variable_name, .,sep = "_"))
}

comp_and_alph_all <- function(data, variables, ...){
  res <- lapply(variables, function(v){
    comp_and_alph(data, v, ...)
  })
  Reduce(function(x, y){merge(x, y)}, init = list(data), res)
}

Проблема в том, что мой фрейм данных содержит около 350 строк и более 200 столбцов (элементов), что примерно из 40 переменных. У меня заканчивается память, когда я запускаю приведенный выше код (и добавляю больше, чем первые три переменные):

comp_and_alph_all(my_dat, c("T1PP_", "t1se_", "t1ogoal_", "t1TFPa_", "t1TFPr_"))

Ошибка: векторная память исчерпана (предел достигнут?)

Теперь мне стало интересно, есть ли более эффективное решение? Спасибо!

1 Ответ

0 голосов
/ 22 февраля 2020

Итак, я нашел простое решение:
Вот первые 20 строк моего реального фрейма данных, чтобы вы понимали следующий код:

structure(list(durationt1 = 511, t1date_diff = 811, t1pa_1 = 4, 
    t1pa_2 = 5, t1pa_3 = 5, t1pa_4 = 2, t1pa_5 = 3, t1pa_6 = 4, 
    t1pa_7 = 4, t1pa_8 = 3, t1pa_9 = 4, t1pa_10 = 4, t1na_1 = 1, 
    t1na_2 = 1, t1na_3 = 3, t1na_4 = 1, t1na_5 = 1, t1na_6 = 1, 
    t1na_7_fa_2 = 3, t1na_8 = 1, t1na_9 = 1, t1na_10 = 1, t1fa_1 = 4, 
    t1fa_3 = 1, t1pp_1 = 5, t1pp_2 = 4, t1pp_3 = 6, t1pp_4 = 5, 
    t1pp_5 = 4, t1pp_7 = 5, t1pp_8 = 5, t1pp_9 = 5, t1pp_10 = 4, 
    t1pp_11 = 4, t1se_1 = 3, t1se_2 = 3, t1se_3 = 4, t1se_4 = 4, 
    t1se_5 = 4, t1se_6 = 4, t1se_7 = 3, t1se_8 = 4, t1ogoal_1 = 4, 
    t1ogoal_2 = 3, t1ogoal_3 = 3, t1ogoal_4 = 2, t1ogoal_5 = 3, 
    t1ogoal_6 = 4, t1ogoal_7 = 4, t1ogoal_9 = 3, t1ogoal_10 = 4, 
    t1ogoal_11 = 4, t1tfpa_1 = 4, t1tfpa_2 = 4, t1tfpa_3 = 4, 
    t1tfpa_4 = 4, t1tfpr_1 = 5, t1tfpr_2 = 4, t1tfpr_3 = 5, t1tfpr_4 = 5, 
    t1tffu_1 = 5, t1tffu_2 = 5, t1tffu_3 = 5, t1tffu_4 = 5, t1cpl_1 = 3, 
    t1cpl_2 = 3, t1cpl_3 = 3, t1cpl_4 = 4, t1cpl_5 = 3, t1cpl_6 = 3, 
    t1eff = 4, t1search_1 = 5, t1search_2 = 5, t1search_3 = 6, 
    t1wor_1 = 3, t1wor_2 = 2, t1wor_3 = 1, t1wor_4 = 1, t1scom_1 = 3, 
    t1scom_2 = 3, t1scom_3 = 3, t1angra = 9, t1anful = 9, t1anune = 70, 
    t1anpar = 10, t1ansel = 10, t1anint = 0, t1gaemp_1 = 5, t1gaemp_2 = 5, 
    t1gaemp_3 = 5, t1gaemp_4 = 5, t1gaemn_1 = 2, t1gaemn_2 = 2, 
    t1gaemn_3 = 1, t1gaemn_4 = 1, t1jaemp_1 = 5, t1jaemp_2 = 5, 
    t1jaemp_3 = 5, t1jaemp_4 = 5, t1jaemn_1 = 5, t1jaemn_2 = 3, 
    t1jaemn_3 = 1, t1jaemn_4 = 3, t1chjf_1 = 4, t1chjf_2 = 4, 
    t1chjf_3 = 4, t1hajf_1 = 2, t1hajf_2 = 3, t1hajf_3 = 2, t1chjs_1 = 5, 
    t1chjs_2 = 5, t1chjs_3 = 5, t1hajs_1 = 1, t1hajs_2 = 2, t1hajs_3 = 3, 
    t1heal1 = 3, t1sex = 1, t1age = 51, t1lang = 1, t1preint = 0, 
    t1presu = 0, t1prevo = 0, t1prept = 1, t1preft = 1, t1prese = 0, 
    t1preot = 0, t1stime = 2, t2job = NA_real_, t2pa_1 = NA_real_, 
    t2pa_2 = NA_real_, t2pa_3 = NA_real_, t2pa_4 = NA_real_, 
    t2pa_5 = NA_real_, t2pa_6 = NA_real_, t2pa_7 = NA_real_, 
    t2pa_8 = NA_real_, t2pa_9 = NA_real_, t2pa_10 = NA_real_, 
    t2na_1 = NA_real_, t2na_2 = NA_real_, t2na_3 = NA_real_, 
    t2na_4 = NA_real_, t2na_5 = NA_real_, t2na_6 = NA_real_, 
    t2na_7_fa_2 = NA_real_, t2na_8 = NA_real_, t2na_9 = NA_real_, 
    t2na_10 = NA_real_, t2fa_1 = NA_real_, t2fa_3 = NA_real_, 
    t2search_1 = NA_real_, t2search_2 = NA_real_, t2search_3 = NA_real_, 
    t2eff = NA_real_, t2empse_1 = NA_real_, t2empse_2 = NA_real_, 
    t2empse_3 = NA_real_, t2se_1 = NA_real_, t2se_2 = NA_real_, 
    t2se_3 = NA_real_, t2se_4 = NA_real_, t2se_5 = NA_real_, 
    t2se_6 = NA_real_, t2se_7 = NA_real_, t2se_8 = NA_real_, 
    t2wor_1 = NA_real_, t2wor_2 = NA_real_, t2wor_3 = NA_real_, 
    t2wor_4 = NA_real_, t2scom_1 = NA_real_, t2scom_2 = NA_real_, 
    t2scom_3 = NA_real_, t2cpl_1 = NA_real_, t2cpl_2 = NA_real_, 
    t2cpl_3 = NA_real_, t2cpl_4 = NA_real_, t2cpl_5 = NA_real_, 
    t2cpl_6 = NA_real_, t2angra = NA_real_, t2anful = NA_real_, 
    t2anune = NA_real_, t2anpar = NA_real_, t2ansel = NA_real_, 
    t2anint = NA_real_, t2gaemp_1 = NA_real_, t2gaemp_2 = NA_real_, 
    t2gaemp_3 = NA_real_, t2gaemp_4 = NA_real_, t2gaemn_1 = NA_real_, 
    t2gaemn_2 = NA_real_, t2gaemn_3 = NA_real_, t2gaemn_4 = NA_real_, 
    t2jaemp_1 = NA_real_, t2jaemp_2 = NA_real_, t2jaemp_3 = NA_real_, 
    t2jaemp_4 = NA_real_, t2jaemn_1 = NA_real_, t2jaemn_2 = NA_real_, 
    t2jaemn_3 = NA_real_, t2jaemn_4 = NA_real_, t2chjf_1 = NA_real_, 
    t2chjf_2 = NA_real_, t2chjf_3 = NA_real_, t2hajf_1 = NA_real_, 
    t2hajf_2 = NA_real_, t2hajf_3 = NA_real_, t2chjs_1 = NA_real_, 
    t2chjs_2 = NA_real_, t2chjs_3 = NA_real_, t2hajs_1 = NA_real_, 
    t2hajs_2 = NA_real_, t2hajs_3 = NA_real_, t2heal1 = NA_real_, 
    j3job = NA_real_, t3job_1 = NA_real_, t3pa_1 = NA_real_, 
    t3pa_2 = NA_real_, t3pa_3 = NA_real_, t3pa_4 = NA_real_, 
    t3pa_5 = NA_real_, t3pa_6 = NA_real_, t3pa_7 = NA_real_, 
    t3pa_8 = NA_real_, t3pa_9 = NA_real_, t3pa_10 = NA_real_, 
    t3na_1 = NA_real_, t3na_2 = NA_real_, t3na_3 = NA_real_, 
    t3na_4 = NA_real_, t3na_5 = NA_real_, t3na_6 = NA_real_, 
    t3na_7_fa_2 = NA_real_, t3na_8 = NA_real_, t3na_9 = NA_real_, 
    t3na_10 = NA_real_, t3fa_1 = NA_real_, t3fa_3 = NA_real_, 
    t3empse_1 = NA_real_, t3empse_2 = NA_real_, t3empse_3 = NA_real_, 
    t3tfpa_1 = NA_real_, t3tfpa_2 = NA_real_, t3tfpa_3 = NA_real_, 
    t3tfpa_4 = NA_real_, t3tfpr_1 = NA_real_, t3tfpr_2 = NA_real_, 
    t3tfpr_3 = NA_real_, t3tfpr_4 = NA_real_, t3tffu_1 = NA_real_, 
    t3tffu_2 = NA_real_, t3tffu_3 = NA_real_, t3tffu_4 = NA_real_, 
    t3se_1 = NA_real_, t3se_2 = NA_real_, t3se_3 = NA_real_, 
    t3se_4 = NA_real_, t3se_5 = NA_real_, t3se_6 = NA_real_, 
    t3se_7 = NA_real_, t3se_8 = NA_real_, t3pofit_1 = NA_real_, 
    t3pofit_2 = NA_real_, t3pofit_3 = NA_real_, t3nsfit_1 = NA_real_, 
    t3nsfit_2 = NA_real_, t3nsfit_3 = NA_real_, t3dafit_1 = NA_real_, 
    t3dafit_2 = NA_real_, t3dafit_3 = NA_real_, t3jobsa_1 = NA_real_, 
    t3jobsa_2 = NA_real_, t3jobsa_3 = NA_real_, t3mean_1 = NA_real_, 
    t3mean_2 = NA_real_, t3mean_3 = NA_real_, t3mean_4 = NA_real_, 
    t3mean_5 = NA_real_, t3mean_7 = NA_real_, t3angra = NA_real_, 
    t3anful = NA_real_, t3anpar = NA_real_, t3ansel = NA_real_, 
    t3anint = NA_real_, t3anune = NA_real_, t3heal1 = NA_real_, 
    j4job = NA_real_, t4job_1 = NA_real_, t4pa_1 = NA_real_, 
    t4pa_2 = NA_real_, t4pa_3 = NA_real_, t4pa_4 = NA_real_, 
    t4pa_5 = NA_real_, t4pa_6 = NA_real_, t4pa_7 = NA_real_, 
    t4pa_8 = NA_real_, t4pa_9 = NA_real_, t4pa_10 = NA_real_, 
    t4na_1 = NA_real_, t4na_2 = NA_real_, t4na_3 = NA_real_, 
    t4na_4 = NA_real_, t4na_5 = NA_real_, t4na_6 = NA_real_, 
    t4na_7_fa_2 = NA_real_, t4na_8 = NA_real_, t4na_9 = NA_real_, 
    t4na_10 = NA_real_, t4fa_1 = NA_real_, t4fa_3 = NA_real_, 
    t4tfpa_1 = NA_real_, t4tfpa_2 = NA_real_, t4tfpa_3 = NA_real_, 
    t4tfpa_4 = NA_real_, t4tfpr_1 = NA_real_, t4tfpr_2 = NA_real_, 
    t4tfpr_3 = NA_real_, t4tfpr_4 = NA_real_, t4tffu_1 = NA_real_, 
    t4tffu_2 = NA_real_, t4tffu_3 = NA_real_, t4tffu_4 = NA_real_, 
    t4se_1 = NA_real_, t4se_2 = NA_real_, t4se_3 = NA_real_, 
    t4se_4 = NA_real_, t4se_6 = NA_real_, t4se_7 = NA_real_, 
    t4se_8 = NA_real_, t4se_9 = NA_real_, t4pofit_1 = NA_real_, 
    t4pofit_2 = NA_real_, t4pofit_4 = NA_real_, t4nsfit_1 = NA_real_, 
    t4nsfit_2 = NA_real_, t4nsfit_4 = NA_real_, t4dafit_1 = NA_real_, 
    t4dafit_2 = NA_real_, t4dafit_4 = NA_real_, t4jobsa_1 = NA_real_, 
    t4jobsa_2 = NA_real_, t4jobsa_3 = NA_real_, t4mean_1 = NA_real_, 
    t4mean_2 = NA_real_, t4mean_3 = NA_real_, t4mean_4 = NA_real_, 
    t4mean_5 = NA_real_, t4mean_7 = NA_real_, t4angra = NA_real_, 
    t4anful = NA_real_, t4anpar = NA_real_, t4ansel = NA_real_, 
    t4anint = NA_real_, t4anune = NA_real_, t4heal1 = NA_real_), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame"))

В конце концов, я попал в функции карты немного и, наконец, придумал это:
Во-первых, я выбираю только столбцы, для которых я хочу вычислить составной альфа и альфа Кронбаха.

library(tidyverse)
library(multicon)
library(psych)

comp_dat <- mplus_dat %>%
  select(matches("t.pa_|t.na_|t.pp_|t.se_|t.ogoal_|t.fpa_|t.fpr_|t.ffu_|t.cpl_|t.search_|t.wor_|
                  t.scom_|t.gaemp_|t.gaemn_|t.jaemp_|t.jaemn_|t.chjf_|t.hajf_|t.chjs_|t.hajs|
                  t.empse_|t.se_|t.pofit_|t.nsfit_|t.dafit_|t.jobsa_|t.mean_"))

Я использую split.default () для разделения на части имен столбцов:

comp_split <- comp_dat %>%
  split.default(sub("_.*", "", names(comp_dat))) 

Наконец, я применяю карту, чтобы получить составную альфа и альфа-версию cronbach:

comp <- map(comp_split, ~ multicon::composite(.x, nomiss = 0.8), data = .x)
alph <- map(comp_split, ~ psych::alpha(.x), data = .x) %>%
  map(~ .x$total)

comp_df <- do.call("cbind", comp)
alph_df <- do.call("rbind", alph)
comp_df
alph_df

В итоге я получаю два замечательных df, содержащих информацию, которую я хотел.

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...