Я пытаюсь использовать Networkx 2.1 и Plotly 2.7.0 для генерации графика минимального остовного дерева с использованием матрицы расстояний в качестве входных данных. Я использую Python 2.7.
Проблема заключается в том, что ребра узлов, показанные на графике Plotly, не совпадают с реберным списком минимального связующего дерева Networkx.
Матрица расстояний (CSV-файл) имеет следующий формат:
ST0_1,0,0,1109,1109,1091,1091,1125,1126,1126,1125,1127,1126,1127,1127,1125,1125,1125,1126,1126,1127,1127,1127,1127,1122,1130,1126,1127
ST0_2,0,0,1109,1109,1091,1091,1125,1126,1126,1125,1127,1126,1127,1127,1125,1125,1125,1126,1126,1127,1127,1127,1127,1122,1130,1126,1127
ST1_3,1109,1109,0,0,1107,1107,1047,1048,1048,1047,1049,1048,1049,1049,1049,1049,1049,1047,1049,1049,1047,1047,1047,1050,1050,1049,1049
ST1_4,1109,1109,0,0,1107,1107,1047,1048,1048,1047,1049,1048,1049,1049,1049,1049,1049,1047,1049,1049,1047,1047,1047,1050,1050,1049,1049
ST22_5,1091,1091,1107,1107,0,0,1100,1101,1101,1100,1102,1101,1102,1102,1101,1101,1101,1101,1100,1102,1101,1101,1102,1099,1103,1102,1102
ST22_6,1091,1091,1107,1107,0,0,1100,1101,1101,1100,1102,1101,1102,1102,1101,1101,1101,1101,1100,1102,1101,1101,1102,1099,1103,1102,1102
ST2,1125,1125,1047,1047,1100,1100,0,5,6,8,8,9,8,10,7,7,8,11,16,12,8,7,11,72,43,11,17
ST7,1126,1126,1048,1048,1101,1101,5,0,1,7,3,4,3,5,6,6,7,10,15,13,7,6,10,72,43,6,16
ST9,1126,1126,1048,1048,1101,1101,6,1,0,8,2,3,2,4,7,7,8,11,16,14,8,7,11,73,44,5,17
ST5,1125,1125,1047,1047,1100,1100,8,7,8,0,10,11,10,12,7,7,8,7,16,16,10,9,13,73,44,13,17
ST13,1127,1127,1049,1049,1102,1102,8,3,2,10,0,3,2,4,9,9,10,13,18,16,10,9,13,75,46,7,19
ST11,1126,1126,1048,1048,1101,1101,9,4,3,11,3,0,3,1,10,10,11,14,19,17,11,10,14,76,47,8,20
ST10,1127,1127,1049,1049,1102,1102,8,3,2,10,2,3,0,4,9,9,10,13,18,16,10,9,13,75,46,7,19
ST12,1127,1127,1049,1049,1102,1102,10,5,4,12,4,1,4,0,11,11,12,15,20,18,12,11,15,77,48,9,21
ST16_7,1125,1125,1049,1049,1101,1101,7,6,7,7,9,10,9,11,0,0,3,10,15,15,9,8,12,72,43,12,16
ST16_8,1125,1125,1049,1049,1101,1101,7,6,7,7,9,10,9,11,0,0,3,10,15,15,9,8,12,72,43,12,16
ST15,1125,1125,1049,1049,1101,1101,8,7,8,8,10,11,10,12,3,3,0,11,16,16,10,9,13,73,44,13,17
ST6,1126,1126,1047,1047,1101,1101,11,10,11,7,13,14,13,15,10,10,11,0,17,15,11,12,12,70,43,14,18
ST18,1126,1126,1049,1049,1100,1100,16,15,16,16,18,19,18,20,15,15,16,17,0,22,16,17,18,77,49,21,7
ST17,1127,1127,1049,1049,1102,1102,12,13,14,16,16,17,16,18,15,15,16,15,22,0,14,15,15,75,50,15,23
ST4,1127,1127,1047,1047,1101,1101,8,7,8,10,10,11,10,12,9,9,10,11,16,14,0,1,3,73,44,13,17
ST3,1127,1127,1047,1047,1101,1101,7,6,7,9,9,10,9,11,8,8,9,12,17,15,1,0,4,74,45,12,18
ST8,1127,1127,1047,1047,1102,1102,11,10,11,13,13,14,13,15,12,12,13,12,18,15,3,4,0,72,47,14,20
ST21,1122,1122,1050,1050,1099,1099,72,72,73,73,75,76,75,77,72,72,73,70,77,75,73,74,72,0,83,74,79
ST20,1130,1130,1050,1050,1103,1103,43,43,44,44,46,47,46,48,43,43,44,43,49,50,44,45,47,83,0,49,50
ST14,1126,1126,1049,1049,1102,1102,11,6,5,13,7,8,7,9,12,12,13,14,21,15,13,12,14,74,49,0,22
ST19,1127,1127,1049,1049,1102,1102,17,16,17,17,19,20,19,21,16,16,17,18,7,23,17,18,20,79,50,22,0
Вот скрипт Python:
import csv
import numpy as np
import networkx as nx
from plotly.offline import plot
import plotly.graph_objs as go
from networkx.drawing.nx_agraph import graphviz_layout
def read_dist_matrix(file_name):
""" Reads a distance matrix in PHYLIP format.
Requires: file_name is the name of a file that has the distance matrix
in PHYLIP format.
Ensures: distance matrix as numpy array and a list with the names/ids of
each sequence.
"""
with open(file_name) as f:
#first_line = f.readline().strip()
reader = csv.reader(f, delimiter=',')
names = list(zip(*reader))[0]
cols = tuple(list(range(1,len(names)+1)))
dist_matrix = np.loadtxt(file_name, delimiter=',',
usecols=cols)
return [dist_matrix, names]
def dist_to_graph(matrix, names):
""" Converts numpy array representing distance matrix to graph.
Requires: matrix as a numpy array. names as a list with the names
of each row/column.
Ensures: networkx graph labeled with the right names.
"""
G = nx.from_numpy_matrix(matrix)
nodes = list(G.nodes())
names_dict = {}
for n in range(len(nodes)):
names_dict[nodes[n]] = names[n]
G = nx.relabel_nodes(G, names_dict)
return G
ola = read_dist_matrix("out.csv")
ola_g = dist_to_graph(ola[0], ola[1])
mst = nx.minimum_spanning_tree(ola_g, algorithm='prim')
pos = graphviz_layout(mst, prog='neato')
labels = list(mst.nodes.keys())
X = []
Y = []
for k in pos:
X.append(pos[k][0])
Y.append(pos[k][1])
tracer = go.Scatter(x=X, y=Y,
mode='lines',
line=go.Line(color='#888', width=2),
hoverinfo = 'none',
showlegend=False)
tracer_marker = go.Scatter(x=X, y=Y,
mode='markers+text',
text=labels,
textposition='top',
marker = go.Marker(size= 15,
line = dict(width = 2)),
hoverinfo = 'none',
showlegend=False)
layout = dict(title='Test', showlegend=False,
xaxis=dict(title='',
titlefont=dict(size=20),
showgrid=False,
zeroline=False,
showline=False,
ticks='',
showticklabels=False,
showspikes=False),
yaxis=dict(title='',
titlefont=dict(size=20),
showgrid=False,
zeroline=False,
showline=False,
ticks='',
showticklabels=False,
showspikes=False)
)
fig = dict(data=[tracer, tracer_marker], layout=layout)
plot(fig, filename='./test_plotly.html', auto_open=True, show_link=False)
Это скриншот выходного HTML:
Если мы выполним команду mst.edges(data=True)
, мы увидим, что узлы ST7 и ST5 связаны, но на графике Плотти они не являются.
Подобный вопрос был задан здесь , но он был на R:
Может кто-нибудь помочь мне с этим?