Как сделать цикл Python быстрее для получения векторов, соответствующих словам и записи в CSV? - PullRequest
0 голосов
/ 30 апреля 2018

Мой скрипт на python выполняется часами, так как код не эффективен Могу ли я сделать этот код быстрее?

texts = pd.read_csv("texts.csv", sep='\t') # has each line (100k total lines) with strings.Eg: ABVDGSVHSJ
vectors = pd.read_csv("word_n_vectors.csv", sep='\t')   # has unique words (1000) in 1st column and corresponding...
                                                        # ... 100 dimension vector in next 100 columns
vectors_seq = open('output.csv', 'w')                   # save 100 dimension vector for each line by the following code
with vectors_seq:
    writer = csv.writer(vectors_seq, delimiter="\t")
    for sequence in texts["text"]: # "text" column contains strings: ABHDJHVD.... 
        vec = np.zeros([1,100]) # to use when "word_n_vectors.csv" doesn't have the selected word.
        i= 0
        data = np.array([sequence[i:i+3] for i in range(0, len(sequence), 3)]) # split the data to 3 letter words
        for tri in data:                    
            a = vectors['0']==tri           # get the location word in "vectors"
            if not any(a):
                a=np.zeros([1,100])         # if no word present in "vectors"
            else:
                a = np.where(a)[0]
                a = vectors.iloc[a,1:101]   # get the 100 D vectors from "vectors"
            a = np.array(a)

            vec = np.add(vec,a)             # add all the 100 D vectors in each line in loop
        writer.writerows(vec)               # and save new 100 D vectors in each new row
print("Writing complete")
...