Этот код вводит взвешенный крайний список, начальные слова положительных и отрицательных настроений и целевое слово. Программа вычисляет сумму весов по кратчайшим путям от начальных слов до цели и от целевых до начальных слов, поскольку она генерирует 9 выходных значений.
Программа очень медленная. Запуск больших файлов Edgelist занимает дни, а не минуты или секунды. Требуется увеличить скорость в 100 и более раз.
Как ускорить эту программу?
from tkinter import Tk, X, Y, TOP, BOTTOM, LEFT, RIGHT, BOTH, END
from tkinter import filedialog, messagebox
from tkinter.ttk import Frame, Button, Entry, Label, Progressbar
import os, glob, time
import pandas as pd
root = Tk()
root.geometry("600x400+300+300")
def read_edge_list(filename):
edges = {}
words = set()
with open(filename) as fp:
lines = fp.readlines()
for line in lines:
token = line.split()
if len(token) != 3:
continue
word1 = token[0]
word2 = token[1]
freq = token[2]
words = words | {word1, word2}
if not word1 in edges.keys():
edges[word1] = {}
if not word2 in edges[word1]:
edges[word1][word2] = {}
edges[word1][word2] = freq
return edges, words
def read_sentiment(filename):
with open(filename, encoding='utf-8-sig') as fp:
lines = fp.readlines()
words = {line.strip() for line in lines}
return words
def read_target_word():
word = input("Please input target word: ")
return word
def run_shortest_path_algorithm(edges, positive, negative, target):
positivedict = {}
negativedict = {}
for source in positive:
dist1 = dijkstra(edges, source, target)
dist2 = dijkstra(edges, target, source)
if dist1 and dist2:
positivedict[source] = dist1 + dist2
for source in negative:
dist1 = dijkstra(edges, source, target)
dist2 = dijkstra(edges, target, source)
if dist1 and dist2:
negativedict[source] = dist1 + dist2
return positivedict, negativedict
def calculate_statistics_summary(positivedict, negativedict, positivewords, negativewords):
numpositive = len(positivedict)
numnegative = len(negativedict)
actualnumpositive = len(positivewords)
actualnumnegative = len(negativewords)
sumpositive = sum(positivedict.values())
sumnegative = sum(negativedict.values())
if actualnumpositive == 0:
s1 = 0
else:
s1 = sumpositive / actualnumpositive
if actualnumnegative == 0:
s2 = 0
else:
s2 = sumnegative / actualnumnegative
if numnegative == 0:
s3 = 0
else:
s3 = s1 * numpositive / numnegative
if s2 == 0:
s4 = 0
else:
s4 = s3 / s2
if numpositive == 0:
s5 = 0
else:
s5 = sumpositive / numpositive
if numnegative == 0:
s6 = 0
else:
s6 = sumnegative / numnegative
if numnegative == 0:
s7 = 0
else:
s7 = s5 * numpositive / numnegative
if s6 == 0:
s8 = 0
else:
s8 = s7 / s6
s9 = s3 - s2
return [s1, s2, s3, s4, s5, s6, s7, s8, s9]
def write_output_file():
pass
def dijkstra(graph, start, end):
shortest_paths = {start: (None, 0)}
current_node = start
visited = set()
while current_node != end:
visited.add(current_node)
if current_node not in graph:
destinations = []
else:
destinations = graph[current_node].keys()
weight_to_current_node = shortest_paths[current_node][1]
for next_node in destinations:
weight = int(graph[current_node][next_node]) + weight_to_current_node
if next_node not in shortest_paths:
shortest_paths[next_node] = (current_node, weight)
else:
current_shortest_weight = shortest_paths[next_node][1]
if current_shortest_weight > weight:
shortest_paths[next_node] = (current_node, weight)
next_destinations = {node: shortest_paths[node] for node in
shortest_paths if node not in visited}
if not next_destinations:
return None
current_node = min(next_destinations, key=lambda k: next_destinations[k][1])
#path = []
#while current_node is not None:
#path.append(current_node)
#next_node = shortest_paths[current_node][0]
#current_node = next_node
#path = path[::-1]
#return path
return shortest_paths[end][1]
class SentimentWindow(Frame):
def __init__(self):
super().__init__()
self.initUI()
self.initPositiveDir = None
self.initNegativeDir = None
self.initSaveDir = None
self.summary = pd.DataFrame(columns=['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9'])
def initUI(self):
self.master.title("Sentiment")
self.pack(fill=BOTH, expand=True, padx=15, pady=15)
frmEdges = Frame(self)
frmEdges.pack(fill=X, expand=True)
lblEdges = Label(frmEdges, text="Select the directory of edge list.")
lblEdges.pack(expand=True, fill=X, side=TOP, pady=2)
frmEdgesPath = Frame(frmEdges)
frmEdgesPath.pack(expand=True, fill=X, side=BOTTOM, pady=2)
self.entEdgesPath = Entry(frmEdgesPath, width=60)
self.entEdgesPath.pack(expand=True, fill=X, side=LEFT)
btnEdgesPath = Button(frmEdgesPath, width=20, text="Load Edges", command=self.loadEdges)
btnEdgesPath.pack(expand=True, side=RIGHT)
frmPositive = Frame(self)
frmPositive.pack(fill=X, expand=True)
lblPositive = Label(frmPositive, text="Select the positive file.")
lblPositive.pack(expand=True, fill=X, side=TOP, pady=2)
frmPositivePath = Frame(frmPositive)
frmPositivePath.pack(expand=True, fill=X, side=BOTTOM, pady=2)
self.entPositivePath = Entry(frmPositivePath, width=60)
self.entPositivePath.pack(expand=True, fill=X, side=LEFT)
btnPositivePath = Button(frmPositivePath, width=20, text="Load Positive", command=self.loadPositive)
btnPositivePath.pack(expand=True, side=RIGHT)
frmNegative = Frame(self)
frmNegative.pack(fill=X, expand=True)
lblNegative = Label(frmNegative, text="Select the negative file.")
lblNegative.pack(expand=True, fill=X, side=TOP, pady=2)
frmNegativePath = Frame(frmNegative)
frmNegativePath.pack(expand=True, fill=X, side=BOTTOM, pady=2)
self.entNegativePath = Entry(frmNegativePath, width=60)
self.entNegativePath.pack(expand=True, fill=X, side=LEFT)
btnNegativePath = Button(frmNegativePath, width=20, text="Load Negative", command=self.loadNegative)
btnNegativePath.pack(expand=True, side=RIGHT)
frmTarget = Frame(self)
frmTarget.pack(fill=X, expand=True)
lblTarget = Label(frmTarget, text="Input the target word.")
lblTarget.pack(expand=True, fill=X, side=TOP, pady=2)
self.entTarget = Entry(frmTarget)
self.entTarget.pack(fill=X, expand=True, pady=2)
frmRun = Frame(self)
frmRun.pack(fill=X, expand=True, pady=20)
self.proRun = Progressbar(frmRun, value=0)
self.proRun.pack(fill=X, expand=True, side=LEFT)
btnRun = Button(frmRun, text = "Run", width=20, command=self.run)
btnRun.pack(side=RIGHT, padx=20)
def loadEdges(self):
edgesFolderName = filedialog.askdirectory()
if edgesFolderName:
self.entEdgesPath.delete(0, END)
self.entEdgesPath.insert(0, edgesFolderName)
def loadPositive(self):
if self.initPositiveDir is None:
self.initPositiveDir = "/"
positiveFileName = filedialog.askopenfilename(initialdir=self.initPositiveDir,
title="Open Positive File", filetypes=(("Text file", "*.txt"),))
if positiveFileName:
self.initPositiveDir = positiveFileName
self.entPositivePath.delete(0, END)
self.entPositivePath.insert(0, positiveFileName)
def loadNegative(self):
if self.initNegativeDir is None:
self.initNegativeDir = "/"
negativeFileName = filedialog.askopenfilename(initialdir=self.initNegativeDir,
title="Open Positive File", filetypes=(("Text file", "*.txt"),))
if negativeFileName:
self.initNegativeDir = negativeFileName
self.entNegativePath.delete(0, END)
self.entNegativePath.insert(0, negativeFileName)
def run(self):
edgesFolderName = self.entEdgesPath.get()
if not os.path.isdir(edgesFolderName):
messagebox.showerror("Invalid Path", "The directory of edge list is invalid.")
return
positiveFileName = self.entPositivePath.get()
if not os.path.isfile(positiveFileName):
messagebox.showerror("Invalid Path", "The positive filename is invalid.")
return
negativeFileName = self.entNegativePath.get()
if not os.path.isfile(negativeFileName):
messagebox.showerror("Invalid Path", "The negative filename is invalid.")
return
targetWord = self.entTarget.get()
if targetWord is None or len(targetWord) <= 0:
messagebox.showerror("No Target", "Please input the target word.")
os.chdir(edgesFolderName)
edgefiles = glob.glob("*.pr")
if len(edgefiles) <= 0:
messagebox.showerror("No Edge File", "Cannot find the edge files.")
positivewords = read_sentiment(positiveFileName)
negativewords = read_sentiment(negativeFileName)
self.summary.drop(self.summary.index, inplace=True)
self.proRun["value"] = 0.0
self.proRun.update()
root.config(cursor="wait")
root.update()
time.sleep(0.300)
for index, edgefile in enumerate(edgefiles):
edges, words = read_edge_list(edgefile)
if targetWord not in words:
messagebox.showerror("Invalid Target", "Target does not exist in " + edgefile)
else:
possiblepositive = positivewords & words
possiblenegative = negativewords & words
positivedict, negativedict = \
run_shortest_path_algorithm(edges, possiblepositive, possiblenegative, targetWord)
statistics_summary = calculate_statistics_summary(positivedict, negativedict,
positivewords, negativewords)
self.summary.loc[edgefile] = statistics_summary
self.proRun["value"] = 100 * (index + 1) / len(edgefiles)
self.proRun.update()
root.config(cursor="")
if self.summary.shape[0] > 0:
self.summary.loc['mean'] = self.summary.mean()
self.summary.loc['std'] = self.summary.std()
if self.initSaveDir is None:
self.initSaveDir = "/"
outputFile = filedialog.asksaveasfilename(initialdir=self.initSaveDir,
title="Save Summary File", filetypes=(("Text file", "*.txt"),))
self.initSaveDir = outputFile
if outputFile:
with open(outputFile, 'w') as outfp:
self.summary.to_string(outfp)
app = SentimentWindow()
root.mainloop()
Вот небольшие списки фронтов, которые запускаются в считанные минуты: https://drive.google.com/file/d/1zDOSMFz0AooXrs9WJ0noC3oD9cWg_562/view?usp=sharing
Вот большой файл, который будет работать в течение нескольких дней! https://drive.google.com/file/d/18NR_bPjb9OU03n7MO08GwELrK7gqXEKE/view?usp=sharing
Вот файл отрицательного начального числа: https://docs.google.com/document/d/1Y0eFolLWjqoHiFnHD7TOS-9z5h1xxmvUiENS1TEv9yU/edit?usp=sharing
Файл положительного начального значения: https://docs.google.com/document/d/1FAct8O-rRN6qsdTU3praW6hy2ckMf1s1mA9K2gy7WYI/edit?usp=sharing
Установите целевое слово на: bp
.
Вот код в файле: https://docs.google.com/document/d/1erSpyXxy3eMehBCiYJudf7tnQgIneT9H7Ot2_wGYBBI/edit?usp=sharing