Я пытаюсь создать параллельную версию алгоритма Кана с использованием OpenMP. Потому что я новичок в OpenMP, я не знаю, правильно ли я распараллелил. Псевдокод, из которого я черпал вдохновение, выглядит так:
L ← Empty list that will contain the sorted elements
S ← Set of all nodes with no incoming edge
while S is non-empty do
remove a node n from S
add n to tail of L
for each node m with an edge e from n to m do
remove edge e from the graph
if m has no other incoming edges then
insert m into S
if graph has edges then
return error (graph has at least one cycle)
else
return L (a topologically sorted order)
Моя проблема в том, что моя последовательная версия, такая же, как и ниже, за исключением команд прагмы и команд, связанных с потоками, работает быстрее, чем моя параллельная версия. Я измеряю время с помощью функции gettimeofday (). Что-то не так с моим кодом? Я также скомпилировал код, используя -O0 и -O3 в обоих случаях.
void TopSort(int rows, int columns, int matrix[][columns], list graph[], list S[], int L[]){
int i, j, sum_S = 1, count = 0, k = 1;
int start, end, threads, elements, id;
while(sum_S != 0) //While S is non - empty do
{
printf("\n\n--------- Iteration no. %d ---------", k); //Print the iteration number
k++;
for(i = 1; i < rows; i++){ //With each iteration
int sum = 0; //we calculate the new degree of each node after deletion
for(j = 1; j < columns; j++)
sum = sum + matrix[j][i];
if(graph[i - 1].id != 0) //If the node hasn't been deleted
graph[i - 1].degree = sum; //We update the degree
}
printf("\nGraph: \n"); //and print the graph
print(graph, rows);
sum_S = 0; //We set the sum_S to 0 to calculate it with the remaining nodes
#pragma omp parallel num_threads(4) shared(rows, columns, S, L, graph, matrix, count) private(i, id, threads, elements, start, end)
{
id = omp_get_thread_num();
threads = omp_get_num_threads();
elements = (rows-1)/threads;
start = elements*id;
if(id != (threads - 1))
end = start + elements;
else
end = (rows-1);
for(i = start; i < end; i++)
#pragma omp critical
if(graph[i].degree == 0){ //If there is a node with no incoming edges
#pragma omp task
{
printf("thread %d of %d entering critical region\n", id, threads);
S[count].id = graph[i].id; //add it's id to S
S[count].degree = graph[i].degree;//and the appropriate degree
L[count] = S[count].id; //and add the node from S to L
graph[i].id = 0; //Delete the node from the graph list
graph[i].degree = INT_MAX; //and set it's degree to infinity(INT_MAX)
for(j = 1; j < columns; j++)
matrix[i+1][j] = 0; //Also delete the node from the graph matrix
count+=1;
printf("exiting critical region\n");
}
}
}
printf("\nS: \n"); //Print the S list
print(S, rows);
for(i = 0; i < (rows - 1); i++)//Recalcute the ID sum of nodes inside S list
sum_S = S[i].id + sum_S;
for(i = 0; i < (rows - 1); i++){ //Reset S
S[i].id = 0; //by setting the ID to zero
S[i].degree = INT_MAX; //and the degree to infinity
}
printf("\nTopological order: "); //Print the current topological order
for(i = 0; i < (rows - 1); i++)
if(L[i] != 0)//by printing the nodes with non-zero IDs
printf("%d --> ", L[i]);
}
for(i = 0; i < (rows - 1); i++)
if(L[i] == 0){ //If the L list has a node with a zero ID
printf("\n\nError! Circle detected! No topological order!\n"); //there is a circle inside the graph
break; //and so there isn't a topological order
}
else if(i == (rows - 2))
printf("END\n");
}