Добро пожаловать в переполнение стека. Когда я запустил ваш код, я не получил никакой ошибки, но поскольку не было дубликатов, в результате я получил 0 для подсчета обоих наборов данных. Поэтому я изменил ваши входные данные:
iOS_list = [
["Village", "did", "removed", "enjoyed", "explain", "nor", "ham", "saw", "calling", "talking."],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["feelings", "own", "shy.", "Request", "norland", "neither", "mistake", "for", "yet.", "Between"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["have", "an", "no", "at.", "Relation", "so", "in", "confined", "smallest", "children"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["account", "in", "outward", "tedious", "do.", "Particular", "way", "thoroughly", "unaffected", "projection"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["unpacked", "delicate.", "Why", "sir", "end", "believe", "uncivil", "respect.", "Always", "get"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["adieus", "nature", "day", "course", "for", "common.", "My", "little", "garret", "repair"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["have", "an", "no", "at.", "Relation", "so", "in", "confined", "smallest", "children"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["For", "who", "thoroughly", "her", "boy", "estimating", "conviction.", "Removed", "demands", "expense"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["account", "in", "outward", "tedious", "do.", "Particular", "way", "thoroughly", "unaffected", "projection"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["admire", "in", "giving.", "See", "resolved", "goodness", "felicity", "shy", "civility", "domestic"],
["had", "but.", "Drawings", "offended", "yet", "answered", "jennings", "perceive", "laughing", "six"]
]
play_apps = [
["We", "diminution", "preference", "thoroughly", "if.", "Joy", "deal", "pain", "view", "much"],
["her", "time.", "Led", "young", "gay", "would", "now", "state.", "Pronounce", "we"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["attention", "admitting", "on", "assurance", "of", "suspicion", "conveying.", "That", "his", "west"],
["quit", "had", "met", "till.", "Of", "advantage", "he", "attending", "household", "at"],
["do", "perceived.", "Middleton", "in", "objection", "discovery", "as", "agreeable.", "Edward", "thrown"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["dining", "so", "he", "my", "around", "to.", "Increasing", "impression", "interested", "expression"],
["he", "my", "at.", "Respect", "invited", "request", "charmed", "me", "warrant", "to."],
["Expect", "no", "pretty", "as", "do", "though", "so", "genius", "afraid", "cousin."],
["do", "perceived.", "Middleton", "in", "objection", "discovery", "as", "agreeable.", "Edward", "thrown"],
["Girl", "when", "of", "ye", "snug", "poor", "draw.", "Mistake", "totally", "of"],
["in", "chiefly.", "Justice", "visitor", "him", "entered", "for.", "Continue", "delicate", "as"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["being", "style", "house.", "An", "whom", "down", "kept", "lain", "name", "so"],
]
def find_duplicates(listy, index):
unique_apps = [] # list of all app names
duplicate_apps = [] # list of known duplicate app names
for app in listy:
name = app[index]
if name in unique_apps:
duplicate_apps.append(name)
elif name not in unique_apps:
unique_apps.append(name)
print('Count of duplicate apps in data set:',len(duplicate_apps))
print('\n')
print('Sample of duplicate apps in data set:', duplicate_apps[:3])
print('\n')
find_duplicates(play_apps, 0)
find_duplicates(iOS_list, 2)
Он все еще работал без проблем, поэтому, если у вас есть ошибка, о которой вы сообщили, я думаю, что вам, возможно, придется проверить свои входные данные. Я думаю, что у вас может быть один из ваших списков (внутри вашего списка), в котором нет 3 элементов (поскольку вы смотрите на индекс 2)
Но я также заметил, что может быть ошибкой: в вашем код вы отделили повторяющиеся имена приложений от уникальных имен приложений. Но если имя появляется, например, 5 раз, одна копия будет в unique_apps
, а четыре копии будут в duplicate_apps
. Я могу ошибаться, но я думаю, что вы хотели, чтобы это имя присутствовало только один раз в duplicate_apps
. Если это то, что вам нужно, вы вносите минимальные изменения в свой код, используя set для duplicate_apps
вместо списка, чтобы у вас не было дубликатов.
iOS_list = [
["Village", "did", "removed", "enjoyed", "explain", "nor", "ham", "saw", "calling", "talking."],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["feelings", "own", "shy.", "Request", "norland", "neither", "mistake", "for", "yet.", "Between"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["have", "an", "no", "at.", "Relation", "so", "in", "confined", "smallest", "children"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["account", "in", "outward", "tedious", "do.", "Particular", "way", "thoroughly", "unaffected", "projection"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["unpacked", "delicate.", "Why", "sir", "end", "believe", "uncivil", "respect.", "Always", "get"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["adieus", "nature", "day", "course", "for", "common.", "My", "little", "garret", "repair"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["have", "an", "no", "at.", "Relation", "so", "in", "confined", "smallest", "children"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["For", "who", "thoroughly", "her", "boy", "estimating", "conviction.", "Removed", "demands", "expense"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["account", "in", "outward", "tedious", "do.", "Particular", "way", "thoroughly", "unaffected", "projection"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["admire", "in", "giving.", "See", "resolved", "goodness", "felicity", "shy", "civility", "domestic"],
["had", "but.", "Drawings", "offended", "yet", "answered", "jennings", "perceive", "laughing", "six"]
]
play_apps = [
["We", "diminution", "preference", "thoroughly", "if.", "Joy", "deal", "pain", "view", "much"],
["her", "time.", "Led", "young", "gay", "would", "now", "state.", "Pronounce", "we"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["attention", "admitting", "on", "assurance", "of", "suspicion", "conveying.", "That", "his", "west"],
["quit", "had", "met", "till.", "Of", "advantage", "he", "attending", "household", "at"],
["do", "perceived.", "Middleton", "in", "objection", "discovery", "as", "agreeable.", "Edward", "thrown"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["dining", "so", "he", "my", "around", "to.", "Increasing", "impression", "interested", "expression"],
["he", "my", "at.", "Respect", "invited", "request", "charmed", "me", "warrant", "to."],
["Expect", "no", "pretty", "as", "do", "though", "so", "genius", "afraid", "cousin."],
["do", "perceived.", "Middleton", "in", "objection", "discovery", "as", "agreeable.", "Edward", "thrown"],
["Girl", "when", "of", "ye", "snug", "poor", "draw.", "Mistake", "totally", "of"],
["in", "chiefly.", "Justice", "visitor", "him", "entered", "for.", "Continue", "delicate", "as"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["being", "style", "house.", "An", "whom", "down", "kept", "lain", "name", "so"],
]
def find_duplicates(listy, index):
unique_apps = [] # list of all app names
duplicate_apps = set() # list of known duplicate app names
for app in listy:
name = app[index]
if name in unique_apps:
duplicate_apps.add(name)
elif name not in unique_apps:
unique_apps.append(name)
print('Count of duplicate apps in data set:',len(duplicate_apps))
print('\n')
print('Sample of duplicate apps in data set:', list(duplicate_apps)[:3])
print('\n')
find_duplicates(play_apps, 0)
find_duplicates(iOS_list, 2)
Но мы можем упростить ваш код, используя выражение генератора , Счетчик , понимание списка и функцию random.sample()
.
import collections
import random
iOS_list = [
["Village", "did", "removed", "enjoyed", "explain", "nor", "ham", "saw", "calling", "talking."],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["feelings", "own", "shy.", "Request", "norland", "neither", "mistake", "for", "yet.", "Between"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["have", "an", "no", "at.", "Relation", "so", "in", "confined", "smallest", "children"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["account", "in", "outward", "tedious", "do.", "Particular", "way", "thoroughly", "unaffected", "projection"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["unpacked", "delicate.", "Why", "sir", "end", "believe", "uncivil", "respect.", "Always", "get"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["adieus", "nature", "day", "course", "for", "common.", "My", "little", "garret", "repair"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["have", "an", "no", "at.", "Relation", "so", "in", "confined", "smallest", "children"],
["Securing", "as", "informed", "declared", "or", "margaret.", "Joy", "horrible", "moreover", "man"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["the", "for", "morning", "assured", "country", "believe.", "On", "even", "feet", "time"],
["For", "who", "thoroughly", "her", "boy", "estimating", "conviction.", "Removed", "demands", "expense"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["account", "in", "outward", "tedious", "do.", "Particular", "way", "thoroughly", "unaffected", "projection"],
["favourable", "mrs", "can", "projecting", "own.", "Thirty", "it", "matter", "enable", "become"],
["admire", "in", "giving.", "See", "resolved", "goodness", "felicity", "shy", "civility", "domestic"],
["had", "but.", "Drawings", "offended", "yet", "answered", "jennings", "perceive", "laughing", "six"]
]
play_apps = [
["We", "diminution", "preference", "thoroughly", "if.", "Joy", "deal", "pain", "view", "much"],
["her", "time.", "Led", "young", "gay", "would", "now", "state.", "Pronounce", "we"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["attention", "admitting", "on", "assurance", "of", "suspicion", "conveying.", "That", "his", "west"],
["quit", "had", "met", "till.", "Of", "advantage", "he", "attending", "household", "at"],
["do", "perceived.", "Middleton", "in", "objection", "discovery", "as", "agreeable.", "Edward", "thrown"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["dining", "so", "he", "my", "around", "to.", "Increasing", "impression", "interested", "expression"],
["he", "my", "at.", "Respect", "invited", "request", "charmed", "me", "warrant", "to."],
["Expect", "no", "pretty", "as", "do", "though", "so", "genius", "afraid", "cousin."],
["do", "perceived.", "Middleton", "in", "objection", "discovery", "as", "agreeable.", "Edward", "thrown"],
["Girl", "when", "of", "ye", "snug", "poor", "draw.", "Mistake", "totally", "of"],
["in", "chiefly.", "Justice", "visitor", "him", "entered", "for.", "Continue", "delicate", "as"],
["unlocked", "entirely", "mr", "relation", "diverted", "in.", "Known", "not", "end", "fully"],
["being", "style", "house.", "An", "whom", "down", "kept", "lain", "name", "so"],
]
def find_duplicates(source_list, name_location):
names = (current_app[name_location] for current_app in source_list)
counts = collections.Counter(names)
duplicates = [name for (name, count) in counts.items() if count > 1]
duplicates_count = len(duplicates)
sample = random.sample(duplicates, min(3, duplicates_count))
print('Count of duplicate apps in data set:', duplicates_count, '\n')
print('Sample of duplicate apps in data set:', sample, '\n')
find_duplicates(play_apps, 0)
find_duplicates(iOS_list, 2)
Вас это устраивает?