Слияние с данными в зависимости от их широты - PullRequest
1 голос
/ 02 мая 2020

Я просто пытаюсь объединить / объединить данные в зависимости от их широты. Всякий раз, когда я запускаю приведенный ниже код, он возвращает только тот кадр данных, в котором совпадают некоторые из широт. Я попытался преобразовать столбец "Lat" из числа в символ, и это не имело никакого эффекта. Я также безуспешно пытался использовать функцию соединения в plyr. Я не уверен, где go отсюда. Спасибо.

    df1<-dput(head(psa.fall))
    structure(list(Id = structure(c(35L, 70L, 20L, 5L, 15L, 21L), .Label = c("Barren Island Mud 1", 
    "BH High 1", "BH High 2", "BH Low 1", "BH Low 2", "BH Low 3", 
    "BH SAV 2", "BHH 1 C", "BHH 2 E", "BHL 1 E", "BHL 2", "BHL 3 (B)", 
    "BHM 1", "BHM 1 C", "BI High 1", "BI Low 1", "BI Low 2C", "BI Low 3", 
    "BI Marsh B", "BI Mud", "BIHI High B", "BIL1 (low) E", "BIL1 E", 
    "BIL1E", "BIL2 E", "BIL2E", "BW Fresh 1", "BW Fresh 2", "BW High 1", 
    "BW High 2", "BW High 5", "BW Low 3", "BW Money Stump", "BW Mud 1", 
    "BW SAV 1", "BW SAV 2", "BWH 1 D", "BWH 2", "BWH 3", "BWH 5", 
    "BWL 1", "BWL 2", "BWL 3", "BWM 1", "BWMS D", "BWS 1", "EN High 2", 
    "EN High 4", "EN High 5", "EN Low 1", "EN Low 2", "EN Mud 2", 
    "ENH3 A High", "ENH4 A High", "ENH5 A High", "ENL1 Low E", "ENM1 A Mud", 
    "ENS1 SAV", "ENS2 SAV 2C", "ENS3 SAV 3E", "High 3C", "James Marsh", 
    "MWP 27 High 1", "MWP 28 High 2", "MWP 29 Low 1", "MWP 30 Mud 1", 
    "MWP 31 Low 2", "MWP 32 Mud 2", "MWP 33 Low 3", "MWP 34 Low 4", 
    "MWP 35 Mud 3", "PWRC Fresh", "PWRC Fresh 1", "PWRC Fresh 1-4", 
    "WP 27 HM-MARC", "WP 28 HM-MARC", "WP 30 IT MARE", "WP29 LM-MARC", 
    "WP30 IT MARE"), class = "factor"), Season = structure(c(2L, 
    2L, 2L, 2L, 2L, 2L), .Label = c("", "Fall", "Spring", "Spring?"
    ), class = "factor"), Refuge = structure(c(5L, 7L, 2L, 3L, 2L, 
    2L), .Label = c("", "Barren Island", "Bishop's Head", "Bishops Head", 
    "Blackwater", "Eastern Neck", "Martin", "PWRC"), class = "factor"), 
        HType = structure(c(6L, 4L, 5L, 4L, 3L, 3L), .Label = c("", 
        "Fresh", "High", "Low", "Mud", "SAV"), class = "factor"), 
        Long = c(-76.109109, -75.99733, -76.261634, -76.038959, -76.256617, 
        -76.256617), Lat = c(38.441089, 37.99369, 38.336058, 38.224469, 
        38.326234, 38.326234), Prey.Group = structure(c(1L, 1L, 1L, 
        1L, 1L, 1L), .Label = c("Melampus", "Ruppia", "Scirpus", 
        "Zannichellia"), class = "factor"), IntakeEnergy = c(1125780.01353144, 
        296957.72010475, 228258.546050666, 642669.69276401, 3563714.25149588, 
        89135338.9701911), flycost = c(1.0957759890896, 1.2968676, 
        1.0957759890896, 1.2968676, 1.2968676, 1.2968676), foragcost = c(114.46318005888, 
        190.22407366464, 114.46318005888, 190.22407366464, 190.22407366464, 
        190.22407366464)), row.names = c(1L, 5L, 6L, 7L, 8L, 9L), class = "data.frame")
    > dput(df2)
    structure(list(Long = c(-76.00713, -75.99354, -75.99358, -75.9906, 
    -75.99733, -76.01407, -76.00528, -76.00521, -76.03746, -76.03896, 
    -76.04884, -76.03757, -76.05656, -76.03869, -76.25662, -76.26163, 
    -76.26205, -76.2589, -76.0235, -76.05671, -76.06332, -76.10363, 
    -76.05714, -76.22003, -76.14641, -76.01762, -76.02586, -76.23522, 
    -76.23491, -76.10911, -76.09617, -76.21124, -76.21531, -76.23986, 
    -76.20995, -76.21661, -76.2181, -76.21547, -76.22519, -76.23172, 
    -76.2195), Lat = c(37.98227, 37.98833, 37.98837, 37.99139, 37.99369, 
    38.01108, 38.01231, 38.01232, 38.22194, 38.22447, 38.22694, 38.22842, 
    38.22987, 38.23255, 38.32623, 38.33606, 38.33905, 38.34116, 38.39138, 
    38.3923, 38.39708, 38.40351, 38.40959, 38.41026, 38.41795, 38.41913, 
    38.42648, 38.43055, 38.43141, 38.44109, 38.44402, 39.00996, 39.01725, 
    39.02677, 39.03028, 39.03264, 39.03887, 39.04036, 39.04065, 39.04537, 
    39.05421), Closest_Disturbance_Distance_meters = c(171.9037327, 
    1482.459447, 1479.654612, 1805.389171, 1368.18442, 530.3428881, 
    1125.319912, 1130.976935, 24.38768214, 25.72719709, 96.13002701, 
    425.557066, 115.7363179, 792.6797843, 1821.373094, 1610.666562, 
    1303.502221, 1114.045544, 1896.217297, 812.0873918, 55.86925543, 
    416.1371901, 100.3389446, 669.345459, 489.3282703, 698.6258905, 
    137.350969, 32.27241966, 17.7924804, 204.5568224, 56.99328478, 
    8.060487078, 324.2993513, 499.7276705, 235.5774131, 321.684027, 
    256.9772101, 483.788136, 356.2340047, 222.045667, 172.7219362
    )), row.names = c(NA, -41L), class = "data.frame")

    Merged = merge(x = df1, y = df2, by = "Lat", all.x = TRUE)

Ответы [ 3 ]

1 голос
/ 02 мая 2020

Lat обоих наборов данных не имеют одинаковых цифр. Кажется, цифры df2 округлены до пяти цифр, поэтому вы можете round цифры df1 также до пяти.

merge(x=transform(df1, Lat=round(Lat, 5)), y=df2, by="Lat", all.x=TRUE)
#        Lat           Id Season        Refuge HType     Long.x Prey.Group       IntakeEnergy
# 1 37.99369 MWP 34 Low 4   Fall        Martin   Low -75.997330   Melampus   296957.720104750
# 2 38.22447     BH Low 2   Fall Bishop's Head   Low -76.038959   Melampus   642669.692764010
# 3 38.32623    BI High 1   Fall Barren Island  High -76.256617   Melampus  3563714.251495880
# 4 38.32623  BIHI High B   Fall Barren Island  High -76.256617   Melampus 89135338.970191106
# 5 38.33606       BI Mud   Fall Barren Island   Mud -76.261634   Melampus   228258.546050666
# 6 38.44109     BW SAV 1   Fall    Blackwater   SAV -76.109109   Melampus  1125780.013531440
#           flycost       foragcost    Long.y Closest_Disturbance_Distance_meters
# 1 1.2968676000000 190.22407366464 -75.99733                       1368.18442000
# 2 1.2968676000000 190.22407366464 -76.03896                         25.72719709
# 3 1.2968676000000 190.22407366464 -76.25662                       1821.37309400
# 4 1.2968676000000 190.22407366464 -76.25662                       1821.37309400
# 5 1.0957759890896 114.46318005888 -76.26163                       1610.66656200
# 6 1.0957759890896 114.46318005888 -76.10911                        204.55682240
1 голос
/ 02 мая 2020

С dplyr

library(dplyr)
df1 %>%
   mutate(Lat = round(Lat, 5)) %>%
   left_join(df2, by = 'Lat')
1 голос
/ 02 мая 2020

Если вы объедините по широте и долготе после округления чисел до 5 цифр, вы получите полный набор совпадений для df1.

df1$Lat <- round(df1$Lat,5)
df2$Lat <- round(df2$Lat,5)
df1$Long <- round(df1$Long,5)
df2$Long <- round(df2$Long,5)

Merged = merge(x = df1, y = df2, by = c("Lat","Long"), all.x = TRUE)

Merged

... и вывод:

    > Merged
       Lat      Long           Id Season        Refuge HType Prey.Group
1 37.99369 -75.99733 MWP 34 Low 4   Fall        Martin   Low   Melampus
2 38.22447 -76.03896     BH Low 2   Fall Bishop's Head   Low   Melampus
3 38.32623 -76.25662    BI High 1   Fall Barren Island  High   Melampus
4 38.32623 -76.25662  BIHI High B   Fall Barren Island  High   Melampus
5 38.33606 -76.26163       BI Mud   Fall Barren Island   Mud   Melampus
6 38.44109 -76.10911     BW SAV 1   Fall    Blackwater   SAV   Melampus
  IntakeEnergy  flycost foragcost Closest_Disturbance_Distance_meters
1     296957.7 1.296868  190.2241                           1368.1844
2     642669.7 1.296868  190.2241                             25.7272
3    3563714.3 1.296868  190.2241                           1821.3731
4   89135339.0 1.296868  190.2241                           1821.3731
5     228258.5 1.095776  114.4632                           1610.6666
6    1125780.0 1.095776  114.4632                            204.5568
> 
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...