Вот решение по принципу Tidyverse.
Я использовал dplyr::full_join()
для объединения df1 и df2, преобразовал формат даты в дату с lubridate
, а затем использовал dplyr::filter()
для 2015 года и категорий S7 и S14:
library(dplyr)
library(lubridate)
# expected output from author's OP comment
new_dataframe <- read.table(text = "
Client Ville Category Qte Montant Date_achat
1 Cl1 Marseille S7 28 2750 16/05/2015
2 Cl1 Marseille S7 27 2570 03/06/2015
3 Cl3 Marseille S14 25 1240 21/11/2015
4 Cl3 Marseille S14 18 1560 21/10/2016
5 Cl3 Marseille S14 15 1460 30/11/2016
6 Cl5 Grenoble S15 30 1980 19/03/2016
7 Cl9 Marseille S10 22 2030 19/07/2015",
header = T,
stringsAsFactors = F) %>%
tbl_df()
# backwardly create df1 df2
df1 <- new_dataframe %>%
select(Client, Ville, Category) %>%
unique()
df2 <- new_dataframe %>%
select(Client, Qte, Montant, Date_achat)
# join data frames
full_join(df1, df2, by = "Client")
# converts date to date format
new_dataframe$Date_achat <- dmy(new_dataframe$Date_achat)
# filtered data frame
df <- new_dataframe %>%
filter(year(Date_achat) == 2015, (Category == "S7" | Category == "S14"))
# # A tibble: 3 x 6
# Client Ville Category Qte Montant Date_achat
# <chr> <chr> <chr> <int> <int> <date>
# 1 Cl1 Marseille S7 28 2750 2015-05-16
# 2 Cl1 Marseille S7 27 2570 2015-06-03
# 3 Cl3 Marseille S14 25 1240 2015-11-21