Код работает очень медленно имя файла неизвестно - профилировщики Python - PullRequest
0 голосов
/ 24 декабря 2018

У меня есть следующий код:

def min_dist(aux):


""" Function to obtain the minimum distance in the 2 matching problem. 
    Input: the dataframe given to us. 
    Output: minimum distance and the matches (this will actually be written in a csv). 
    """

# Let's create an array that stores the distances.
dist = []

# We will also create an array with passengers compared so we don't double our information.
compared = []

# An array for us to store the overlaps
gain = []

# Store which overlaps
gain_loc =[]

# Finally one to obtain the tuple.
matches = []

test1 = 0
test2 = 0
for p1 in range(0,len(aux)):

    """ Since the maximum of passengers is 2, there are 4 different possible outcomes: it could be:
    1) start passenger 1, start passenger 2, end passanger 1, end passenger 2
    2) start passenger 1, start passenger 2, end passanger 2, end passenger 1 
    3) start passenger 2, start passenger 1, end passanger 1, end passenger 2
    4) start passenger 2, start passenger 1, end passanger 2, end passenger 1  
    """

    for p2 in range(0,len(aux)):

        if p1 != p2: # We evidently don't want to pickup the same passenger two times.

            if [p2,p1] not in compared: # [p1,p2] can't be repeated because of the construction of compared and the for loop.

                alone = M_dist(p1,p1,"start","end",aux) # We also want to check it alone, since there could be unmached rides.
                compared.append([p1])
                alone2 = M_dist(p2,p2,"start","end",aux)
                ind_drives = alone + alone2 # We get the independent distance for both rides.

                # Let's get now the 4 outcomes and take the minimum one.
                out1 = M_dist(p1,p2,"start","start",aux) + M_dist(p2,p1,"start","end",aux) + M_dist(p1,p2,"end","end",aux)
                out2 = M_dist(p1,p2,"start","start",aux) + M_dist(p2,p2,"start","end",aux) + M_dist(p2,p1,"end","end",aux)
                out3 = M_dist(p2,p1,"start","start",aux) + M_dist(p1,p1,"start","end",aux) + M_dist(p1,p2,"end","end",aux)
                out4 = M_dist(p2,p1,"start","start",aux) + M_dist(p1,p2,"start","end",aux) + M_dist(p2,p1,"end","end",aux)

                together = [out1,out2,out3,out4] # Lets put them together in order to get the minimum
                minimum = np.min(together) # We could merge the two lines but I believe it makes it easier to see if we separate them. 

                # Save the fact that the comparition was done.
                compared.append([p1,p2])
                overlap = ind_drives - minimum
                # Now we will get the overlap between the two rides. We do not care about negative overlaps since we can just get two rides instead.
                if overlap >= 0: # We made it equal because it there is no loss, we can still save energy :D !

                    dist.append(minimum)

                    # We will store them in order of start. 
                    if minimum == out1:
                        match = [p1,p2,p1,p2]
                        matches.append(match)
                        gain.append(overlap) # How much overlap we get between the two rides.
                        gain_loc.append(match)
                    elif minimum == out2:
                        match = [p1,p2,p1,p2]
                        matches.append(match)
                        gain.append(overlap) # How much overlap we get between the two rides.
                        gain_loc.append(match)

                    elif minimum == out3:
                        match = [p1,p2,p1,p2]
                        matches.append(match)
                        gain.append(overlap) # How much overlap we get between the two rides.
                        gain_loc.append(match)

                    elif minimum == out4: 
                        match = [p1,p2,p1,p2]
                        matches.append(match)
                        gain.append(overlap) # How much overlap we get between the two rides.
                        gain_loc.append(match)

                # In case we actually get negative overlaps, we will save the individual values. 
                else:

                    # We do not want them repeated, so we check if they are already there.
                    if alone not in dist and alone2 not in dist:
                        dist.append(alone)
                        dist.append(alone2)
                        matches.append([p1,p1])
                        matches.append([p2,p2])

                    elif alone not in dist:
                        dist.append(alone)
                        matches.append([p1,p1])

                    elif alone2 not in dist:
                        dist.append(alone2)
                        matches.append([p2,p2])

    # Now we have the the overlaps, the tuples that contain said overlaps and all the distances.
    # We want now to take the aproach said in the notebook and start with the biggest overlap and then go down.

f_match = []
indexes = np.argsort(gain)[::-1]
for index in indexes:
     if not bool(set(np.unique(f_match)) & set(gain_loc[index])):
        f_match.append(gain_loc[index])

used = []
# Get unique passengers already used
for j in np.unique(f_match): 
    for i in np.unique(j):
        used.append(i)

all_p = list(range(len(aux)))
for x in np.unique(used): all_p.remove(x) # We take all the passengers that do not have someone to share a ride
for p in all_p: f_match.append([p,p]) # We add them as un matched

# We now have the final order, we just want to get the minimum distance.

dist_aux = []
for f in f_match:
    for i in range(0,len(matches)):
        if f == matches[i]:
            dist_aux.append(dist[i])
f_dist = np.sum(dist_aux)


print('The minimum distance is: ', f_dist, '\n')

return None

Где находится внутренняя функция:

def M_dist(p1,p2,order1,order2,requests): 

    """  Function to calculate manhattan distance between 1 or 2 passangers based on lattitude and logitude.
         We will used the formula M(p1,p2) = 84.2|p1(lat) - p2(lat)| + 111.2|p1(lng) - p2(lng)|.
         The input of our function will be 2 ride id's and if the passangers will picked or left (start,end)
         Our output is the distance. """

    if order1 == "start":
        if order2 == "start":
            # We will take the lattitud difference.
            lat = requests[p1][1] - requests[p2][1]
            # And the longitude
            lng = requests[p1][2] - requests[p2][2]

        if order2 == "end":
            # We will take the lattitud difference.
            lat = requests[p1][1] - requests[p2][3]
            # And the longitude
            lng = requests[p1][2] - requests[p2][4]

    if order1 == "end":
        if order2 == "end":
            # We will take the lattitud difference.
            lat = requests[p1][3] - requests[p2][3]
            # And the longitude
            lng = requests[p1][4] - requests[p2][4]

    # We will use the values obtained to get the manhattan distance. 
    dist = 84.2*np.abs(lat) + 111.2*np.abs(lng)

    return float(dist) # We want it as a float.

Я запустил код с 100 элементами, и это заняло 0,008 секунды, я попытался запуститьэто с 200 элементами, а время составляет 8 секунд.Сумма, которую он вырастил, является сверхъестественной, она должна быть O (n ^ 2), и это не похоже на это.

Однако при попытке получить информацию с помощью профилировщиков происходит один вызов, который составляет 90% времени.

     234757 function calls in 10.308 seconds

Упорядочено по: стандартному названию

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    8.726    8.726   10.306   10.306 <ipython-input-41-436df7df7549>:1(min_dist)

Я понятия не имею, что это такое, и как сделать его более эффективным.

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...