Dplyr объединяет несколько столбцов неправильно - PullRequest
0 голосов
/ 23 февраля 2019

Я пытаюсь выполнить левое соединение нескольких совпадающих столбцов в двух фреймах данных - все столбцы имеют числовой класс:

Первый - test_trips:

> test_trips
  start_station_longitude start_station_latitude end_station_longitude end_station_latitude
1               -122.4264               37.76142             -122.4312             37.78352
2               -122.3942               37.79539             -122.3942             37.79539
3               -121.8948               37.34876             -121.8771             37.32600
4               -122.4212               37.77379             -122.3997             37.78095
5               -122.4262               37.77643             -122.4035             37.78898
6               -122.2825               37.80369             -122.2948             37.80532

секунда - test_distance:

> test_distance
  start_station_longitude start_station_latitude end_station_longitude end_station_latitude distance
1               -122.4264               37.76142             -122.4312             37.78352     3162
2               -122.3942               37.79539             -122.3942             37.79539        0
3               -121.8948               37.34876             -121.8771             37.32600     3591
4               -122.4212               37.77379             -122.3997             37.78095     2843
5               -122.4262               37.77643             -122.4035             37.78898     3088
6               -122.2825               37.80369             -122.2948             37.80532     1198

Обратите внимание, что значения координат в каждом из 4 столбцов обеих таблиц абсолютно одинаковы.Теперь, почему это не осталось работать?

output <- dplyr::left_join(test_trips, test_distance, by=c( "start_station_longitude", "start_station_latitude",  "end_station_longitude", "end_station_latitude"))

на выходе выдается 4 значения NA , чего я не понимаю - что происходит не так?Я предполагаю, что это связано со структурой test_distance, но я не уверен, как ее изменить, поскольку class() сообщает мне, что это нормальные данные. Рамка

> output
  start_station_longitude start_station_latitude end_station_longitude end_station_latitude distance
1               -122.4264               37.76142             -122.4312             37.78352       NA
2               -122.3942               37.79539             -122.3942             37.79539        0
3               -121.8948               37.34876             -121.8771             37.32600       NA
4               -122.4212               37.77379             -122.3997             37.78095       NA
5               -122.4262               37.77643             -122.4035             37.78898       NA
6               -122.2825               37.80369             -122.2948             37.80532     1198

здесьэто образец данных:

> dput(test_trips)
structure(list(start_station_longitude = c(-122.4264353, -122.394203, 
-121.894797831774, -122.42123901844, -122.426244020462, -122.282497
), start_station_latitude = c(37.7614205, 37.795392, 37.3487586867448, 
37.7737932060887, 37.7764348192047, 37.8036865), end_station_longitude = c(-122.431157827377, 
-122.394203, -121.87712, -122.399749159813, -122.403452, -122.2948365
), end_station_latitude = c(37.783520835261, 37.795392, 37.3259984, 
37.7809545996075, 37.788975, 37.8053183)), row.names = c(NA, 
6L), class = "data.frame")

dput(test_distance)
structure(list(start_station_longitude = c(-122.4264353, -122.394203, 
-121.894797831774, -122.42123901844, -122.426244020462, -122.282497
), start_station_latitude = c(37.7614205, 37.795392, 37.3487586867448, 
37.7737932060887, 37.7764348192047, 37.8036865), end_station_longitude = c(-122.431157827377, 
-122.394203, -121.87712, -122.399749159813, -122.403452, -122.2948365
), end_station_latitude = c(37.783520835261, 37.795392, 37.3259984, 
37.7809545996075, 37.788975, 37.8053183), distance = c(3162, 
0, 3591, 2843, 3088, 1198)), class = "data.frame", row.names = c(NA, 
6L))


> sessionInfo()
R version 3.5.2 (2018-12-20)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Mojave 10.14.3

Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] dplyr_0.7.8 readr_1.3.1

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.0       crayon_1.3.4     assertthat_0.2.0 R6_2.3.0         jsonlite_1.6    
 [6] magrittr_1.5     pillar_1.3.1     rlang_0.3.1      rstudioapi_0.9.0 bindrcpp_0.2.2  
[11] tools_3.5.2      glue_1.3.0       purrr_0.3.0      hms_0.4.2        yaml_2.2.0      
[16] compiler_3.5.2   pkgconfig_2.0.2  tidyselect_0.2.5 bindr_0.1.1      tibble_2.0.1  
...