ifelse меняет результаты жаккардовых вычисленных столбцов - PullRequest
0 голосов
/ 30 января 2019

Вот мой df:

df <- structure(list(keysData_userId = c("28875270", "28875270", "28875270", 
"28875270", "02745556", "02745556"), userData_ssids = list(list(
    "pw+eOP8BWvqpqwqsq+4uEw==", "2vqQgSqaNZzVefr57EAFnA==", "DIIwsI+SsRtz38608KdyIQ==", 
    "orAdYUICQi/xv6zgbhRYmw==", "OtYQYwavQCERlsf6HH0ZsQ==", "hn5XvQYscWmZXcA8wFQcGQ==", 
    "WqE5ldD9e5Sa+nqvS80h3A==", "rJ1N3ONwDBK+jwFf+7xeHg==", "dqJbhRGl85wb3HYfZWNuSw==", 
    "Qzt91nP7itZQB7vbnaNUtQ==", "CYZ8YrYvqSERhKh7OexKow==", "koM4I2a8XxM172iIv3/0lA==", 
    "kT+XdHxXLJU9EtsaRphM+Q==", "z/2vDhOHZhefkno4Stxv3A==", "gK5/tDWk+rD5g884+rhKcA==", 
    "EyqU9u4YO0nioGsus/+xNg==", "BJwVezrL9i1fS22jJJxGgg==", "sSMJY9haVzMAN2JuFov38w==", 
    "Zcfle7YX/6xu7FfFa8gQew==", "XK/GpG4qVrLD+MTZLwPzbw==", "vIlSMBFAUI6leYuLQI/cWw==", 
    "pLUNsUTL2ZqdWJzXUisjJg==", "5Y+BgDeYp5g5A+aIPOVrqA==", "6mgxyargOKonpJCN6alnZA==", 
    "1qJm0U/HDURNetB6uN1VGw==", "eZUxEkMd346RfYbiSws0hg==", "stbnri6PGyn7WhGXt2hEUw==", 
    "uha+mGXPsA6a3by3aKco5g==", "i9elAFSnODHaTpETZXKJkQ==", "3h9q1Gk+Ip/IjrsNhz5a4A==", 
    "/gMqkZ9iGQWqDr+17XrnUg==", "CSXPN444X8B3rCTHMFZRsA==", "JYrOYqYZWoiuvEaP8cytiA==", 
    "PXVkKpy/Zw8vwU0mgbjUug==", "y8GbB2YkGNXxTMVWVylZJA=="), 
    list("pw+eOP8BWvqpqwqsq+4uEw==", "2vqQgSqaNZzVefr57EAFnA==", 
        "DIIwsI+SsRtz38608KdyIQ==", "orAdYUICQi/xv6zgbhRYmw==", 
        "OtYQYwavQCERlsf6HH0ZsQ==", "hn5XvQYscWmZXcA8wFQcGQ==", 
        "WqE5ldD9e5Sa+nqvS80h3A==", "rJ1N3ONwDBK+jwFf+7xeHg==", 
        "dqJbhRGl85wb3HYfZWNuSw==", "Qzt91nP7itZQB7vbnaNUtQ==", 
        "CYZ8YrYvqSERhKh7OexKow==", "koM4I2a8XxM172iIv3/0lA==", 
        "kT+XdHxXLJU9EtsaRphM+Q==", "z/2vDhOHZhefkno4Stxv3A==", 
        "gK5/tDWk+rD5g884+rhKcA==", "EyqU9u4YO0nioGsus/+xNg==", 
        "BJwVezrL9i1fS22jJJxGgg==", "sSMJY9haVzMAN2JuFov38w==", 
        "Zcfle7YX/6xu7FfFa8gQew==", "XK/GpG4qVrLD+MTZLwPzbw==", 
        "vIlSMBFAUI6leYuLQI/cWw==", "pLUNsUTL2ZqdWJzXUisjJg==", 
        "5Y+BgDeYp5g5A+aIPOVrqA==", "6mgxyargOKonpJCN6alnZA==", 
        "1qJm0U/HDURNetB6uN1VGw==", "eZUxEkMd346RfYbiSws0hg==", 
        "stbnri6PGyn7WhGXt2hEUw==", "uha+mGXPsA6a3by3aKco5g==", 
        "i9elAFSnODHaTpETZXKJkQ==", "3h9q1Gk+Ip/IjrsNhz5a4A==", 
        "/gMqkZ9iGQWqDr+17XrnUg==", "CSXPN444X8B3rCTHMFZRsA==", 
        "JYrOYqYZWoiuvEaP8cytiA==", "PXVkKpy/Zw8vwU0mgbjUug==", 
        "y8GbB2YkGNXxTMVWVylZJA=="), list("pw+eOP8BWvqpqwqsq+4uEw==", 
        "2vqQgSqaNZzVefr57EAFnA==", "DIIwsI+SsRtz38608KdyIQ==", 
        "orAdYUICQi/xv6zgbhRYmw==", "OtYQYwavQCERlsf6HH0ZsQ==", 
        "hn5XvQYscWmZXcA8wFQcGQ==", "WqE5ldD9e5Sa+nqvS80h3A==", 
        "rJ1N3ONwDBK+jwFf+7xeHg==", "dqJbhRGl85wb3HYfZWNuSw==", 
        "Qzt91nP7itZQB7vbnaNUtQ==", "CYZ8YrYvqSERhKh7OexKow==", 
        "koM4I2a8XxM172iIv3/0lA==", "kT+XdHxXLJU9EtsaRphM+Q==", 
        "z/2vDhOHZhefkno4Stxv3A==", "gK5/tDWk+rD5g884+rhKcA==", 
        "EyqU9u4YO0nioGsus/+xNg==", "BJwVezrL9i1fS22jJJxGgg==", 
        "sSMJY9haVzMAN2JuFov38w==", "Zcfle7YX/6xu7FfFa8gQew==", 
        "XK/GpG4qVrLD+MTZLwPzbw==", "vIlSMBFAUI6leYuLQI/cWw==", 
        "pLUNsUTL2ZqdWJzXUisjJg==", "5Y+BgDeYp5g5A+aIPOVrqA==", 
        "6mgxyargOKonpJCN6alnZA==", "1qJm0U/HDURNetB6uN1VGw==", 
        "eZUxEkMd346RfYbiSws0hg==", "stbnri6PGyn7WhGXt2hEUw==", 
        "uha+mGXPsA6a3by3aKco5g==", "i9elAFSnODHaTpETZXKJkQ==", 
        "3h9q1Gk+Ip/IjrsNhz5a4A==", "/gMqkZ9iGQWqDr+17XrnUg==", 
        "CSXPN444X8B3rCTHMFZRsA==", "JYrOYqYZWoiuvEaP8cytiA==", 
        "PXVkKpy/Zw8vwU0mgbjUug==", "y8GbB2YkGNXxTMVWVylZJA=="), 
    list("pw+eOP8BWvqpqwqsq+4uEw==", "2vqQgSqaNZzVefr57EAFnA==", 
        "DIIwsI+SsRtz38608KdyIQ==", "orAdYUICQi/xv6zgbhRYmw==", 
        "OtYQYwavQCERlsf6HH0ZsQ==", "hn5XvQYscWmZXcA8wFQcGQ==", 
        "WqE5ldD9e5Sa+nqvS80h3A==", "rJ1N3ONwDBK+jwFf+7xeHg==", 
        "dqJbhRGl85wb3HYfZWNuSw==", "Qzt91nP7itZQB7vbnaNUtQ==", 
        "CYZ8YrYvqSERhKh7OexKow==", "koM4I2a8XxM172iIv3/0lA==", 
        "kT+XdHxXLJU9EtsaRphM+Q==", "z/2vDhOHZhefkno4Stxv3A==", 
        "gK5/tDWk+rD5g884+rhKcA==", "EyqU9u4YO0nioGsus/+xNg==", 
        "BJwVezrL9i1fS22jJJxGgg==", "sSMJY9haVzMAN2JuFov38w==", 
        "Zcfle7YX/6xu7FfFa8gQew==", "XK/GpG4qVrLD+MTZLwPzbw==", 
        "vIlSMBFAUI6leYuLQI/cWw==", "pLUNsUTL2ZqdWJzXUisjJg==", 
        "5Y+BgDeYp5g5A+aIPOVrqA==", "6mgxyargOKonpJCN6alnZA==", 
        "1qJm0U/HDURNetB6uN1VGw==", "eZUxEkMd346RfYbiSws0hg==", 
        "stbnri6PGyn7WhGXt2hEUw==", "uha+mGXPsA6a3by3aKco5g==", 
        "i9elAFSnODHaTpETZXKJkQ==", "3h9q1Gk+Ip/IjrsNhz5a4A==", 
        "/gMqkZ9iGQWqDr+17XrnUg==", "CSXPN444X8B3rCTHMFZRsA==", 
        "JYrOYqYZWoiuvEaP8cytiA==", "PXVkKpy/Zw8vwU0mgbjUug==", 
        "y8GbB2YkGNXxTMVWVylZJA=="), list("VIO6sRJJVSYP8/3kR/dOaA==", 
        "6yoNmnP0VG/8xBrKXtiWZA==", "5gNFR+G2GA0S2l6he7mDSg==", 
        "WjQXugClKE6ZVHMUg1UgDQ==", "KMILQryyVK5hhVmgbBXCzQ==", 
        "CBSUUr+UqZs5deQDWwDrQA==", "tUeB98n1w4y5005hsxCo0w==", 
        "wI5txmZwjvTx2GiLIZV5kw==", "y2WhAfb97+EM7blhzUjzYA==", 
        "+g2IRaeTsrjvZjXGGY9ATQ==", "VEDNhwW28IOAi1wJ7sEpow==", 
        "HxSOan5L8TtDqpvPkoo5tg==", "++iveCai1fyiYao34MvT/w==", 
        "8RMTl9S2jLIJYcG1+RTuvA==", "OmKQRPOjBTzIwFQFEhmmXA==", 
        "M/XgtX1gjkLm9/f2cP479g==", "/9aIGY2pUNGCXQiFux+ZBw==", 
        "fgn0vl3x5a0QjQ4QvLNl5w==", "sZavLq+/4vGKhSTNyTb/YA==", 
        "EvRtCq6qvnkmALPdqJ6LWA==", "D9FiHLb8N1dgzhOVCKgYNA==", 
        "Bg/4LpOkLerFJdlHbb+5eQ==", "rJ1N3ONwDBK+jwFf+7xeHg==", 
        "wh+Wm18D0z1D4E+PE252gg==", "asLFduD+9JZg7KCivdZ/eA==", 
        "e5XD6pO6bYd7HObNZTki5w==", "ee4QCZ6vY9JrrNqC5ToHTw==", 
        "DxNG9o0QKfGDe61Hj4lJQw==", "0pjEOPlfRvlvycy7mBKiyA==", 
        "ExfdF0qNBV063kWpRck8Sw==", "rCZsOoSm5ZoEG0QgewGtIA==", 
        "PkftdDZdI19LkL1WL0gYZg==", "xic9q23xTUjGgrSLa44z5g==", 
        "JA9JNxsFq4kK0eJfwYGNOQ==", "8F8h7uSXQSrHEugoPiJFMw==", 
        "MxfoaAY0AsBPJ2SztNrhvA==", "0Vq7etGPlgM65+5NcRoFWQ==", 
        "rBUhe9+q+Z7huu/ZdAG8kg==", "unsDylT2GsY103getAp7/A==", 
        "upu8gH//1HwS4L43hirDEA==", "N3XD119fYm8IddeyqlxqGg==", 
        "mXS7xkLmrdw7L8SckwWV1w==", "gWsRLGEFs+vVN4KKOa9IGA==", 
        "mXS7xkLmrdw7L8SckwWV1w==", "gWsRLGEFs+vVN4KKOa9IGA==", 
        "QF5NuR6URsmB25X63j/6vQ==", "0zWoVCTBXceiQirqPcKkmQ==", 
        "lsGg8adgmWvtvj1k2tz9Kg==", "sc9tewHLtpdcnPAAu2BKLg==", 
        "qG8D4WacfI7R46ClAdToFw==", "Bzdv8hgZS0MIPUTjjI3LQg==", 
        "6JijJ5PmwxGmoADVupg6aQ==", "HKVpzqHRnAJdKRChxzcOPw==", 
        "VF5X9BMA+3xP6IK52mtqbw==", "c17fWUJp4oZsFqvcSdby2A==", 
        "T4cGZmFe3pOhtkrUr/OqbA==", "ujcmLCu/Z3rmc1bxo1kteg==", 
        "DExDwKlPw9IhD6WNym4J2g==", "5ufWc1U4Q7JTFBl6nEGSHg==", 
        "9+caByDrw2iwezlX1pnETQ==", "mB0zR497KpQ2LLJGc4Hdag==", 
        "CeSftgyVtat7ND1attLHww==", "1bn043f4hFrS/Y5D1BUJGg==", 
        "UswTICFT2mMi0JKGVGPakw==", "CMYLe7ol2x3ntjtR4ohOlA==", 
        "TjK1XQB1mOZLWTgk8BJ3qQ==", "PIbknuAVa75XH8S18Fkvfg==", 
        "vljRuCwUQcxd0RWD2RVuww==", "Gv/Erp7ep3YenC26GIYz4w==", 
        "E9n7Kzad7lYVDgHiL7FDJA==", "qc/TVhwg6t1j2t3Fee5ojg==", 
        "j5p+N0E/ZHptWTURmZyRsw==", "eRa/zWPBcBQyHSyzr3+ZFw==", 
        "tipG1C0f1iwiSXuDwEBJ+A==", "WWngfBu3SQQq/ymNhenMZQ==", 
        "Ipmw6tbv7PZ+93TuyEcX8w==", "+wDlOqw7GTkAbMLhpfiQSw==", 
        "xKTw0hQEesjQ0XbiQ39eDA==", "g64FMbjK2CwNsJkcRR/BTQ==", 
        "5+Jo1ET7h9sWedg3F8pu2Q==", "wvzT4Y7n3VC43CfDvW5A9Q==", 
        "of+rgxTCjc0izEfb+j+fqg==", "UcfjGetRjRI0ZN/1Sw4IGQ==", 
        "gQ1kDjhVHMgOEYJjadu2kQ==", "Yv8rjwHT9BBhQ+D9AEUENA==", 
        "W3xjgagfHV7dpcinNjR6gg==", "8x0DThye4rkOcGe2JE4i/g==", 
        "0NOGI2A/F3d9aOWUjq3MPA==", "9lEKLdxnLMuTadtT9KEvgA==", 
        "CBpFtzDI84UrGighyFNdQA=="), list("VIO6sRJJVSYP8/3kR/dOaA==", 
        "6yoNmnP0VG/8xBrKXtiWZA==", "5gNFR+G2GA0S2l6he7mDSg==", 
        "WjQXugClKE6ZVHMUg1UgDQ==", "KMILQryyVK5hhVmgbBXCzQ==", 
        "CBSUUr+UqZs5deQDWwDrQA==", "tUeB98n1w4y5005hsxCo0w==", 
        "wI5txmZwjvTx2GiLIZV5kw==", "y2WhAfb97+EM7blhzUjzYA==", 
        "+g2IRaeTsrjvZjXGGY9ATQ==", "VEDNhwW28IOAi1wJ7sEpow==", 
        "HxSOan5L8TtDqpvPkoo5tg==", "++iveCai1fyiYao34MvT/w==", 
        "8RMTl9S2jLIJYcG1+RTuvA==", "OmKQRPOjBTzIwFQFEhmmXA==", 
        "M/XgtX1gjkLm9/f2cP479g==", "/9aIGY2pUNGCXQiFux+ZBw==", 
        "fgn0vl3x5a0QjQ4QvLNl5w==", "sZavLq+/4vGKhSTNyTb/YA==", 
        "EvRtCq6qvnkmALPdqJ6LWA==", "D9FiHLb8N1dgzhOVCKgYNA==", 
        "Bg/4LpOkLerFJdlHbb+5eQ==", "rJ1N3ONwDBK+jwFf+7xeHg==", 
        "wh+Wm18D0z1D4E+PE252gg==", "asLFduD+9JZg7KCivdZ/eA==", 
        "e5XD6pO6bYd7HObNZTki5w==", "ee4QCZ6vY9JrrNqC5ToHTw==", 
        "DxNG9o0QKfGDe61Hj4lJQw==", "0pjEOPlfRvlvycy7mBKiyA==", 
        "ExfdF0qNBV063kWpRck8Sw==", "rCZsOoSm5ZoEG0QgewGtIA==", 
        "PkftdDZdI19LkL1WL0gYZg==", "xic9q23xTUjGgrSLa44z5g==", 
        "JA9JNxsFq4kK0eJfwYGNOQ==", "8F8h7uSXQSrHEugoPiJFMw==", 
        "MxfoaAY0AsBPJ2SztNrhvA==", "0Vq7etGPlgM65+5NcRoFWQ==", 
        "rBUhe9+q+Z7huu/ZdAG8kg==", "unsDylT2GsY103getAp7/A==", 
        "upu8gH//1HwS4L43hirDEA==", "N3XD119fYm8IddeyqlxqGg==", 
        "mXS7xkLmrdw7L8SckwWV1w==", "gWsRLGEFs+vVN4KKOa9IGA==", 
        "mXS7xkLmrdw7L8SckwWV1w==", "gWsRLGEFs+vVN4KKOa9IGA==", 
        "QF5NuR6URsmB25X63j/6vQ==", "0zWoVCTBXceiQirqPcKkmQ==", 
        "lsGg8adgmWvtvj1k2tz9Kg==", "sc9tewHLtpdcnPAAu2BKLg==", 
        "qG8D4WacfI7R46ClAdToFw==", "Bzdv8hgZS0MIPUTjjI3LQg==", 
        "6JijJ5PmwxGmoADVupg6aQ==", "HKVpzqHRnAJdKRChxzcOPw==", 
        "VF5X9BMA+3xP6IK52mtqbw==", "c17fWUJp4oZsFqvcSdby2A==", 
        "T4cGZmFe3pOhtkrUr/OqbA==", "ujcmLCu/Z3rmc1bxo1kteg==", 
        "DExDwKlPw9IhD6WNym4J2g==", "5ufWc1U4Q7JTFBl6nEGSHg==", 
        "9+caByDrw2iwezlX1pnETQ==", "mB0zR497KpQ2LLJGc4Hdag==", 
        "CeSftgyVtat7ND1attLHww==", "1bn043f4hFrS/Y5D1BUJGg==", 
        "UswTICFT2mMi0JKGVGPakw==", "CMYLe7ol2x3ntjtR4ohOlA==", 
        "TjK1XQB1mOZLWTgk8BJ3qQ==", "PIbknuAVa75XH8S18Fkvfg==", 
        "vljRuCwUQcxd0RWD2RVuww==", "Gv/Erp7ep3YenC26GIYz4w==", 
        "E9n7Kzad7lYVDgHiL7FDJA==", "qc/TVhwg6t1j2t3Fee5ojg==", 
        "j5p+N0E/ZHptWTURmZyRsw==", "eRa/zWPBcBQyHSyzr3+ZFw==", 
        "tipG1C0f1iwiSXuDwEBJ+A==", "WWngfBu3SQQq/ymNhenMZQ==", 
        "Ipmw6tbv7PZ+93TuyEcX8w==", "+wDlOqw7GTkAbMLhpfiQSw==", 
        "xKTw0hQEesjQ0XbiQ39eDA==", "g64FMbjK2CwNsJkcRR/BTQ==", 
        "5+Jo1ET7h9sWedg3F8pu2Q==", "wvzT4Y7n3VC43CfDvW5A9Q==", 
        "of+rgxTCjc0izEfb+j+fqg==", "UcfjGetRjRI0ZN/1Sw4IGQ==", 
        "gQ1kDjhVHMgOEYJjadu2kQ==", "Yv8rjwHT9BBhQ+D9AEUENA==", 
        "W3xjgagfHV7dpcinNjR6gg==", "8x0DThye4rkOcGe2JE4i/g==", 
        "0NOGI2A/F3d9aOWUjq3MPA==", "9lEKLdxnLMuTadtT9KEvgA==", 
        "CBpFtzDI84UrGighyFNdQA==")), checkPointAdditionalData_permissions = list(
    list("ACCESS_WIFI_STATE", "ACCESS_FINE_LOCATION", "ACCESS_COARSE_LOCATION"), 
    list("ACCESS_WIFI_STATE", "ACCESS_FINE_LOCATION", "ACCESS_COARSE_LOCATION"), 
    list("ACCESS_WIFI_STATE", "ACCESS_FINE_LOCATION", "ACCESS_COARSE_LOCATION"), 
    list("ACCESS_WIFI_STATE", "ACCESS_FINE_LOCATION", "ACCESS_COARSE_LOCATION"), 
    list("ACCESS_WIFI_STATE"), list("ACCESS_WIFI_STATE"))), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -6L))

Я хочу рассчитать жаккард, используя:

jaccard <- function(vector1, vector2) {

  return(length(intersect(vector1, vector2)) / 
           length(union(vector1, vector2)))

}

jaccardV <- Vectorize(jaccard)   

Но по неизвестной причине я получаю 0, когда использую ifelse и 1, когда я его не использую:

df %>% 
  head() %>%
  as_tibble() %>%
  dplyr::select(keysData_userId, userData_ssids, checkPointAdditionalData_permissions) %>%
  dplyr::mutate(lag_userData_ssids = lag(userData_ssids),
                j_test = ifelse("ACCESS_WIFI_STATE" %in% unlist(checkPointAdditionalData_permissions),
                                yes = jaccardV(userData_ssids, lag_userData_ssids), no = -1))

Дает результат: 0 в вычислении Жаккарда.

Но когда я использую:

df %>% 
  head() %>%
  as_tibble() %>%
  dplyr::select(keysData_userId, userData_ssids, checkPointAdditionalData_permissions) %>%
  dplyr::mutate(lag_userData_ssids = lag(userData_ssids),
                j_test = jaccardV(userData_ssids, lag_userData_ssids)) 

Здесь указаны нужные мне числа.

Пожалуйста, сообщите, почему ifelse вредит моим результатам.

1 Ответ

0 голосов
/ 30 января 2019

Проблема в том, что ваш чек в ifelse проверяется только один раз - он просто видит, присутствует ли «ACCESS_WIFI_STATE» в какой-либо из строк.Поскольку проверка выполняется только один раз, ifelse возвращает одно значение.В этом случае результат для первой строки, равный , будет равным 0.

Вам нужно изменить вызов на ifelse, чтобы он проверял каждую строку отдельно.Здесь я просто использую sapply, чтобы проверить каждую строку и вернуть TRUE / FALSE для каждого

df %>% 
  head() %>%
  as_tibble() %>%
  dplyr::select(keysData_userId, userData_ssids, checkPointAdditionalData_permissions) %>%
  dplyr::mutate(lag_userData_ssids = lag(userData_ssids),
                j_test = ifelse(sapply(checkPointAdditionalData_permissions, function(x){"ACCESS_WIFI_STATE" %in% x}),
                                yes = jaccardV(userData_ssids, lag_userData_ssids), no = -1))

возвращает:

# A tibble: 6 x 5
  keysData_userId userData_ssids checkPointAdditionalData_permissions lag_userData_ssids  j_test
  <chr>           <list>         <list>                               <list>               <dbl>
1 28875270        <list [35]>    <list [3]>                           <lgl [1]>          0      
2 28875270        <list [35]>    <list [3]>                           <list [35]>        1      
3 28875270        <list [35]>    <list [3]>                           <list [35]>        1      
4 28875270        <list [35]>    <list [3]>                           <list [35]>        1      
5 02745556        <list [90]>    <list [1]>                           <list [35]>        0.00820
6 02745556        <list [90]>    <list [1]>                           <list [90]>        1      

Что должно быть тем, что вы хотите.

...