Какая функция в R даст мне количество строк, которые имеют только X столбцов - PullRequest
0 голосов
/ 26 февраля 2020

У меня есть набор данных, который выглядит следующим образом (набор данных Groceries из пакета CRAN arules)

библиотеки:

library(arules);
library(datasets);
data(Groceries);

Пример набора данных:

transactions\items   

1           item1      item2      item3
2           item1      item2      item4     item5    item7
3           item1      item2      item4     item5    item6    item7
4           item1      item2      item3     item7    item8

Я хотел бы знать:

1 - какая функция в R выдала бы мне транзакции только с 5 элементами (столбцами)?

Ожидаемый результат - строки 2 и 4, поскольку они имеют только 5 items.

2 - какая функция в R выдала бы мне транзакции как минимум с 5 элементами (столбцами)?

Ожидаемый результат - строки 2 и 3 и 4, так как они содержат не менее 5 items

Спасибо. Вот выходные данные inspect (Groceries [1: 3]):

   items                
[1] {citrus fruit,       
     semi-finished bread,
     margarine,          
     ready soups}        
[2] {tropical fruit,     
     yogurt,             
     coffee}             
[3] {whole milk}  

Вот выходные данные dput (head (Groceries)):

new("transactions", data = new("ngCMatrix", i = c(13L, 60L, 69L, 
78L, 14L, 29L, 98L, 24L, 15L, 29L, 38L, 92L, 22L, 24L, 33L, 123L, 
24L, 25L, 29L, 65L, 138L), p = c(0L, 4L, 7L, 8L, 12L, 16L, 21L
), Dim = c(169L, 6L), Dimnames = list(NULL, NULL), factors = list()), 
    itemInfo = structure(list(labels = c("frankfurter", "sausage", 
    "liver loaf", "ham", "meat", "finished products", "organic sausage", 
    "chicken", "turkey", "pork", "beef", "hamburger meat", "fish", 
    "citrus fruit", "tropical fruit", "pip fruit", "grapes", 
    "berries", "nuts/prunes", "root vegetables", "onions", "herbs", 
    "other vegetables", "packaged fruit/vegetables", "whole milk", 
    "butter", "curd", "dessert", "butter milk", "yogurt", "whipped/sour cream", 
    "beverages", "UHT-milk", "condensed milk", "cream", "soft cheese", 
    "sliced cheese", "hard cheese", "cream cheese ", "processed cheese", 
    "spread cheese", "curd cheese", "specialty cheese", "mayonnaise", 
    "salad dressing", "tidbits", "frozen vegetables", "frozen fruits", 
    "frozen meals", "frozen fish", "frozen chicken", "ice cream", 
    "frozen dessert", "frozen potato products", "domestic eggs", 
    "rolls/buns", "white bread", "brown bread", "pastry", "roll products ", 
    "semi-finished bread", "zwieback", "potato products", "flour", 
    "salt", "rice", "pasta", "vinegar", "oil", "margarine", "specialty fat", 
    "sugar", "artif. sweetener", "honey", "mustard", "ketchup", 
    "spices", "soups", "ready soups", "Instant food products", 
    "sauces", "cereals", "organic products", "baking powder", 
    "preservation products", "pudding powder", "canned vegetables", 
    "canned fruit", "pickled vegetables", "specialty vegetables", 
    "jam", "sweet spreads", "meat spreads", "canned fish", "dog food", 
    "cat food", "pet care", "baby food", "coffee", "instant coffee", 
    "tea", "cocoa drinks", "bottled water", "soda", "misc. beverages", 
    "fruit/vegetable juice", "syrup", "bottled beer", "canned beer", 
    "brandy", "whisky", "liquor", "rum", "liqueur", "liquor (appetizer)", 
    "white wine", "red/blush wine", "prosecco", "sparkling wine", 
    "salty snack", "popcorn", "nut snack", "snack products", 
    "long life bakery product", "waffles", "cake bar", "chewing gum", 
    "chocolate", "cooking chocolate", "specialty chocolate", 
    "specialty bar", "chocolate marshmallow", "candy", "seasonal products", 
    "detergent", "softener", "decalcifier", "dish cleaner", "abrasive cleaner", 
    "cleaner", "toilet cleaner", "bathroom cleaner", "hair spray", 
    "dental care", "male cosmetics", "make up remover", "skin care", 
    "female sanitary products", "baby cosmetics", "soap", "rubbing alcohol", 
    "hygiene articles", "napkins", "dishes", "cookware", "kitchen utensil", 
    "cling film/bags", "kitchen towels", "house keeping products", 
    "candles", "light bulbs", "sound storage medium", "newspapers", 
    "photo/film", "pot plants", "flower soil/fertilizer", "flower (seeds)", 
    "shopping bags", "bags"), level2 = structure(c(44L, 44L, 
    44L, 44L, 44L, 44L, 44L, 42L, 42L, 41L, 5L, 5L, 23L, 25L, 
    25L, 25L, 25L, 25L, 25L, 53L, 53L, 53L, 53L, 37L, 18L, 18L, 
    18L, 18L, 18L, 18L, 18L, 18L, 46L, 46L, 46L, 11L, 11L, 11L, 
    11L, 11L, 11L, 11L, 11L, 19L, 19L, 19L, 24L, 24L, 24L, 24L, 
    24L, 24L, 24L, 24L, 22L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 50L, 
    50L, 50L, 50L, 50L, 54L, 54L, 54L, 54L, 51L, 51L, 51L, 16L, 
    16L, 16L, 49L, 49L, 49L, 49L, 30L, 30L, 3L, 3L, 43L, 10L, 
    10L, 10L, 10L, 31L, 31L, 33L, 9L, 40L, 40L, 40L, 1L, 15L, 
    15L, 52L, 52L, 34L, 34L, 34L, 34L, 34L, 6L, 6L, 29L, 29L, 
    29L, 29L, 29L, 29L, 55L, 55L, 55L, 55L, 47L, 47L, 47L, 47L, 
    32L, 32L, 32L, 12L, 13L, 13L, 13L, 13L, 13L, 8L, 45L, 21L, 
    21L, 21L, 14L, 14L, 14L, 4L, 4L, 28L, 20L, 17L, 17L, 17L, 
    17L, 17L, 48L, 39L, 38L, 38L, 36L, 36L, 36L, 36L, 35L, 35L, 
    35L, 35L, 26L, 26L, 26L, 27L, 27L, 27L, 2L, 2L), .Label = c("baby food", 
    "bags", "bakery improver", "bathroom cleaner", "beef", "beer", 
    "bread and backed goods", "candy", "canned fish", "canned fruit/vegetables", 
    "cheese", "chewing gum", "chocolate", "cleaner", "coffee", 
    "condiments", "cosmetics", "dairy produce", "delicatessen", 
    "dental care", "detergent/softener", "eggs", "fish", "frozen foods", 
    "fruit", "games/books/hobby", "garden", "hair care", "hard drinks", 
    "health food", "jam/sweet spreads", "long-life bakery products", 
    "meat spreads", "non-alc. drinks", "non-food house keeping products", 
    "non-food kitchen", "packaged fruit/vegetables", "perfumery", 
    "personal hygiene", "pet food/care", "pork", "poultry", "pudding powder", 
    "sausage", "seasonal products", "shelf-stable dairy", "snacks", 
    "soap", "soups/sauces", "staple foods", "sweetener", "tea/cocoa drinks", 
    "vegetables", "vinegar/oils", "wine"), class = "factor"), 
        level1 = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
        6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
        5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
        4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
        4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 9L, 9L, 
        9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
        9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 1L, 1L, 1L, 1L, 1L, 1L, 
        1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
        3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 10L, 
        10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
        10L, 10L, 10L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 8L, 8L, 
        8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 7L, 7L, 7L, 7L, 7L, 
        7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("canned food", 
        "detergent", "drinks", "fresh products", "fruit and vegetables", 
        "meat and sausage", "non-food", "perfumery", "processed food", 
        "snacks and candies"), class = "factor")), row.names = c(NA, 
    -169L), class = "data.frame"), itemsetInfo = structure(list(), .Names = character(0), row.names = integer(0), class = "data.frame"))

1 Ответ

1 голос
/ 26 февраля 2020

Это похоже на работу:

library(arules)
data(Groceries)

data <- as(Groceries, "data.frame")

library(stringr)
library(dplyr)

data %>%
  filter(str_count(items, ",")==4) -> Groceries5

head(Groceries5)
                                                                   items
1                       {whole milk,butter,yogurt,rice,abrasive cleaner}
2 {other vegetables,UHT-milk,rolls/buns,bottled beer,liquor (appetizer)}
3  {tropical fruit,other vegetables,white bread,bottled water,chocolate}
4   {tropical fruit,cream cheese ,processed cheese,detergent,newspapers}
5          {root vegetables,other vegetables,whole milk,beverages,sugar}
6           {whole milk,yogurt,processed cheese,pickled vegetables,soda}
...