Априори - неожиданные результаты в уверенности - PullRequest
0 голосов
/ 30 октября 2018

У меня есть два набора данных ниже, ItemListFiltered и ItemListAll. ItemListFiltered является лишь подмножеством ItemListAll. У меня проблемы с подтверждением результатов функции apriori.

ItemListAll возвращает правило ...

{V2=C15OMB3CH, V3=C15KRLMRO} => {V1=C15MORL} with a confidence of 37%

ItemListFiltered возвращает правило ...

{V1=C15KRLMRO, V3=C15OMB3CH} => {V2=C15MORL} with a confidence of 80%

Я ожидал, что расчет доверия будет ...

Confidence = Count of Orders from X&Y / Count of Orders from X

Я предполагаю, что X - левая часть (=>), а Y - правая часть (=>)

C15OMB3CH, C15KRLMRO и C15MORL находятся на 4 заказа вместе, где C15OMB3CH и C15KRLMRO на 11 заказов 4/11 = 37%

Как эти двое могут вернуть разницу в уверенности?

R 3.5.1 код

library(Matrix)
library(arules)
library(plyr)
library(dplyr)
library(gridExtra)

dataset <- read.csv("ItemListAll.csv", header=TRUE, sep=",")
#dataset <- read.csv("ItemListFiltered.csv", header=TRUE, sep=",")

df_itemList <- ddply(dataset,c("SALESID"),function(df1)paste(df1$ITEMID));
df_itemList$V1 <- as.factor(df_itemList$V1);
df_itemList$V2 <- as.factor(df_itemList$V2);
df_itemList$V3 <- as.factor(df_itemList$V3);

basket_rules <- apriori(select(df_itemList,c("V1","V2","V3")), parameter = list(sup=0.001,conf=0.1,target="rules", minlen=3));

df_basket <- as(basket_rules,"data.frame");
df_basket$support <- ceiling(df_basket$support * 100);
df_basket$confidence<- ceiling(df_basket$confidence * 100);
df_basket$lift<- round(df_basket$lift, digits = 2);
df_basket <- df_basket[rev(order(df_basket$support)),];
View(df_basket);

ItemListFiltered.csv

SALESID     ITEMID
SO0973259-1 C15ABPRRO
SO0972779-1 C15LGS5M
SO0971629-1 C15MORL
SO0972734-1 C15MORL
SO0973561-3 C15MORL
SO0973561-4 C15MORL
SO0974783-1 C15PECHRO
SO0974783-2 C15PECHRO
SO0973561-1 C15RPDERO
SO0973561-2 C15RPDERO
SO0974466-1 C15RPDERO
SO0973259-1 C15OMB3CH
SO0972779-1 C15OMB3CH
SO0971629-1 C15OMB3CH
SO0972734-1 C15OMB3CH
SO0973561-3 C15OMB3CH
SO0973561-4 C15OMB3CH
SO0974783-1 C15OMB3CH
SO0974783-2 C15OMB3CH
SO0973561-1 C15OMB3CH
SO0973561-2 C15OMB3CH
SO0974466-1 C15OMB3CH
SO0973259-1 C15KRLMRO
SO0972779-1 C15KRLMRO
SO0971629-1 C15KRLMRO
SO0972734-1 C15KRLMRO
SO0973561-3 C15KRLMRO
SO0973561-4 C15KRLMRO
SO0974783-1 C15KRLMRO
SO0974783-2 C15KRLMRO
SO0973561-1 C15KRLMRO
SO0973561-2 C15KRLMRO
SO0974466-1 C15KRLMRO

ItemListAll.csv

SALESID     ITEMID
SO0971629-1 C15KRLMRO
SO0971629-1 C15MORL
SO0971629-1 C15OMB3CH
SO0971841-2 C15ARBSIM
SO0971841-2 C15LADCRO
SO0971841-2 C15MORL
SO0972404-1 C15ABPRRO
SO0972404-1 C15CAOBG
SO0972404-1 C15PV20
SO0972564-3 C15ABPRRO
SO0972564-3 C15CAINM550
SO0972564-3 C15DLMT
SO0972710-1 C15DLMT
SO0972710-1 C15LGS5M
SO0972710-1 C15OMB3CH
SO0972734-1 C15KRLMRO
SO0972734-1 C15MORL
SO0972734-1 C15OMB3CH
SO0972744-1 C15CAINM550
SO0972744-1 C15KRLMRO
SO0972744-1 C15LGS5M
SO0972779-1 C15KRLMRO
SO0972779-1 C15LGS5M
SO0972779-1 C15OMB3CH
SO0972882-1 C15CAOBG
SO0972882-1 C15LADCRO
SO0972882-1 C15RPDERO
SO0972914-2 C15KRLMRO
SO0972914-2 C15LADCRO
SO0972914-2 C15LGS5M
SO0972948-1 C15ARBSIM
SO0972948-1 C15EPCNWCB
SO0972948-1 C15LGS5M
SO0972981-1 C15CAOBG
SO0972981-1 C15LADCRO
SO0972981-1 C15PECHRO
SO0972992-1 C15CAOBG
SO0972992-1 C15OMB3CH
SO0972992-1 C15PECHRO
SO0972995-2 C15CAOBG
SO0972995-2 C15MORL
SO0972995-2 C15OMB3CH
SO0973002-1 C15KRLMRO
SO0973002-1 C15LADCRO
SO0973002-1 C15MORL
SO0973010-1 C15ABPRRO
SO0973010-1 C15CAOBG
SO0973010-1 C15LADCRO
SO0973065-3 C15ABPRRO
SO0973065-3 C15CAOBG
SO0973065-3 C15LADCRO
SO0973127-1 C15ARBSIM
SO0973127-1 C15LADCRO
SO0973127-1 C15PECHRO
SO0973195-1 C15CAOBG
SO0973195-1 C15LADCRO
SO0973195-1 C15LGS5M
SO0973207-1 C15ARBSIM
SO0973207-1 C15OMB3CH
SO0973207-1 C15RPDERO
SO0973208-1 C15ABPRRO
SO0973208-1 C15NUBCA
SO0973208-1 C15OMB3CH
SO0973209-1 C15LADCRO
SO0973209-1 C15NUBCA
SO0973209-1 C15RPDERO
SO0973223-1 C15CAOBG
SO0973223-1 C15LGS5M
SO0973223-1 C15OMB3CH
SO0973251-1 C15ARBSIM
SO0973251-1 C15PV20
SO0973251-1 C15RPDERO
SO0973259-1 C15ABPRRO
SO0973259-1 C15KRLMRO
SO0973259-1 C15OMB3CH
SO0973266-1 C15CAOBG
SO0973266-1 C15MORL
SO0973266-1 C15PV20
SO0973277-1 C15EPCNWCB
SO0973277-1 C15MORL
SO0973277-1 C15NUBCA
SO0973307-2 C15CAOBG
SO0973307-2 C15OMB3CH
SO0973307-2 C15RPDERO
SO0973311-1 C15ABPRRO
SO0973311-1 C15CAOBG
SO0973311-1 C15OMB3CH
SO0973414-1 C15CAOBG
SO0973414-1 C15MORL
SO0973414-1 C15OMB3CH
SO0973457-1 C15ABPRRO
SO0973457-1 C15KRLMRO
SO0973457-1 C15LADCRO
SO0973460-1 C15DLMT
SO0973460-1 C15MORL
SO0973460-1 C15OMB3CH
SO0973466-1 C15ABPRRO
SO0973466-1 C15KRLMRO
SO0973466-1 C15LADCRO
SO0973516-1 C15DLMT
SO0973516-1 C15MORL
SO0973516-1 C15OMB3CH
SO0973541-1 C15CAOBG
SO0973541-1 C15PV20
SO0973541-1 C15RPDERO
SO0973547-1 C15CAOBG
SO0973547-1 C15MORL
SO0973547-1 C15OMB3CH
SO0973555-1 C15KRLMRO
SO0973555-1 C15LADCRO
SO0973555-1 C15MORL
SO0973561-1 C15KRLMRO
SO0973561-1 C15OMB3CH
SO0973561-1 C15RPDERO
SO0973561-2 C15KRLMRO
SO0973561-2 C15OMB3CH
SO0973561-2 C15RPDERO
SO0973561-3 C15KRLMRO
SO0973561-3 C15MORL
SO0973561-3 C15OMB3CH
SO0973561-4 C15KRLMRO
SO0973561-4 C15MORL
SO0973561-4 C15OMB3CH
SO0973739-1 C15ABPRRO
SO0973739-1 C15CAOBG
SO0973739-1 C15PV20
SO0974027-2 C15CAINM550
SO0974027-2 C15CAOBG
SO0974027-2 C15LGS5M
SO0974027-3 C15CAINM550
SO0974027-3 C15CAOBG
SO0974027-3 C15LGS5M
SO0974093-1 C15CAOBG
SO0974093-1 C15LGS5M
SO0974093-1 C15OMB3CH
SO0974104-1 C15CAOBG
SO0974104-1 C15LADCRO
SO0974104-1 C15MORL
SO0974114-1 C15KRLMRO
SO0974114-1 C15LADCRO
SO0974114-1 C15LGS5M
SO0974233-3 C15NUBCA
SO0974233-3 C15OMB3CH
SO0974233-3 C15PECHRO
SO0974437-3 C15DLMT
SO0974437-3 C15LADCRO
SO0974437-3 C15LGS5M
SO0974466-1 C15KRLMRO
SO0974466-1 C15OMB3CH
SO0974466-1 C15RPDERO
SO0974471-1 C15DLMT
SO0974471-1 C15MORL
SO0974471-1 C15OMB3CH
SO0974538-8 C15DLMT
SO0974538-8 C15MORL
SO0974538-8 C15OMB3CH
SO0974539-1 C15CAOBG
SO0974539-1 C15EPCNWCB
SO0974539-1 C15PECHRO
SO0974558-1 C15ABPRRO
SO0974558-1 C15CAOBG
SO0974558-1 C15PV20
SO0974581-1 C15MORL
SO0974581-1 C15NUBCA
SO0974581-1 C15PV20
SO0974649-1 C15CAOBG
SO0974649-1 C15LADCRO
SO0974649-1 C15PECHRO
SO0974663-1 C15CAINM550
SO0974663-1 C15KRLMRO
SO0974663-1 C15RPDERO
SO0974702-1 C15CAOBG
SO0974702-1 C15PECHRO
SO0974702-1 C15PV20
SO0974715-1 C15DLMT
SO0974715-1 C15OMB3CH
SO0974715-1 C15RPDERO
SO0974753-1 C15DLMT
SO0974753-1 C15OMB3CH
SO0974753-1 C15RPDERO
SO0974783-1 C15KRLMRO
SO0974783-1 C15OMB3CH
SO0974783-1 C15PECHRO
SO0974783-2 C15KRLMRO
SO0974783-2 C15OMB3CH
SO0974783-2 C15PECHRO
SO0974923-2 C15DLMT
SO0974923-2 C15LGS5M
SO0974923-2 C15OMB3CH

1 Ответ

0 голосов
/ 01 ноября 2018

Вот обновленный код, который я также использовал со ссылками на здесь и здесь

#Main
library(Matrix)
library(arules)
library(plyr)
library(dplyr)
library(gridExtra)

#itemList <- read.csv("ItemListAll.csv", header=TRUE, sep=",")
itemList <- read.csv("ItemListFiltered.csv", header=TRUE, sep=",")

# Converting to a Matrix ####
itemList$const = TRUE

# Remove duplicates
dim(itemList)
orders <- unique(itemList)
dim(itemList)

# Need to reshape the matrix
itemList_max_prep <- reshape(data = itemList,
                           idvar = "SALESID",
                           timevar = "ITEMID",
                           direction = "wide")

# Drop the SALESID
itemList_matrix <- as.matrix(itemList_max_prep[,-1])

# Clean up the missing values to be FALSE
itemList_matrix[is.na(itemList_matrix)] <- FALSE

# Clean up names
colnames(itemList_matrix) <- gsub(x=colnames(itemList_matrix),
                               pattern="const\\.", replacement="")

itemList_trans <- as(itemList_matrix,"transactions")

inspect(itemList_trans)

basket_rules <- apriori(itemList_trans, parameter = list(sup=0.01,conf=0.5,target="rules", minlen=3));
df_basket <- as(basket_rules,"data.frame")
df_basket$support <- ceiling(df_basket$support * 100)
df_basket$confidence<- ceiling(df_basket$confidence * 100)
df_basket$lift<- round(df_basket$lift, digits = 2)
df_basket <- df_basket[rev(order(df_basket$support)),];
write.xlsx(df_basket, "Basket_Analysis.xlsx", col.names=TRUE, row.names=TRUE)
#grid.table(head(df_basket));
...