создание подзаголовков различных фреймов данных и использование фрейма данных в качестве значения x - PullRequest
1 голос
/ 01 августа 2020

У меня есть dataFrame, который я получаю из файла CSV, который содержит столбец, который является временем, и 18 столбцов, которые представляют собой образцы, взятые за это время. Первое, что я делаю, это вычисляю среднее значение для каждой реплики и создаю три разных фрейма данных. со следующим кодом:

data = pd.read_csv('growht.csv', delimiter=',', header=0)

file:

# read the following data in with

data = pd.read_clipboard(sep=',', index=False)

Time,WT5,WT5,WT5,WT1,WT1,WT1,NF5,NF5,NF5,NF1,NF1,NF1,D5,D5,D5,D1,D1,D1
9.7e-05,0.113,0.11900000000000001,0.11699999999999999,0.081,0.086,0.076,0.102,0.111,0.111,0.086,0.087,0.084,0.1,0.105,0.106,0.085,0.087,0.086
0.041737,0.122,0.121,0.126,0.075,0.07400000000000001,0.07400000000000001,0.10400000000000001,0.105,0.10300000000000001,0.075,0.073,0.073,0.1,0.09699999999999999,0.09699999999999999,0.075,0.073,0.073
0.08340299999999999,0.161,0.163,0.174,0.076,0.075,0.075,0.126,0.129,0.13,0.076,0.07400000000000001,0.07400000000000001,0.12,0.11900000000000001,0.11900000000000001,0.076,0.07400000000000001,0.07400000000000001
0.12507200000000002,0.285,0.307,0.303,0.079,0.079,0.079,0.175,0.188,0.191,0.077,0.07400000000000001,0.075,0.165,0.17,0.172,0.079,0.077,0.077
0.166738,0.34600000000000003,0.368,0.369,0.09,0.091,0.091,0.273,0.28300000000000003,0.292,0.078,0.076,0.077,0.255,0.27,0.278,0.08800000000000001,0.085,0.085
0.208404,0.418,0.461,0.418,0.113,0.122,0.121,0.366,0.41200000000000003,0.38,0.08,0.078,0.079,0.368,0.376,0.382,0.113,0.10400000000000001,0.106
0.25007399999999996,0.48,0.513,0.508,0.18,0.2,0.196,0.418,0.42100000000000004,0.43,0.08800000000000001,0.087,0.08900000000000001,0.446,0.47700000000000004,0.475,0.17300000000000001,0.155,0.158
0.29173699999999997,0.551,0.589,0.5920000000000001,0.311,0.33399999999999996,0.336,0.46399999999999997,0.47600000000000003,0.47,0.10400000000000001,0.105,0.10800000000000001,0.5379999999999999,0.544,0.542,0.24,0.22699999999999998,0.22699999999999998
0.3334,0.612,0.603,0.617,0.436,0.48100000000000004,0.446,0.514,0.556,0.53,0.14,0.147,0.154,0.59,0.644,0.629,0.361,0.35100000000000003,0.341
0.375066,0.682,0.685,0.703,0.516,0.505,0.47600000000000003,0.5670000000000001,0.605,0.5760000000000001,0.215,0.247,0.259,0.6559999999999999,0.72,0.735,0.456,0.41200000000000003,0.409
0.416733,0.7340000000000001,0.741,0.755,0.735,0.624,0.605,0.609,0.614,0.588,0.335,0.355,0.365,0.708,0.746,0.7490000000000001,0.523,0.495,0.494
0.4584,0.763,0.799,0.8420000000000001,0.748,0.682,0.6659999999999999,0.653,0.6759999999999999,0.655,0.42200000000000004,0.442,0.45299999999999996,0.759,0.809,0.81,0.629,0.5870000000000001,0.59
0.500066,0.802,0.858,0.8740000000000001,0.831,0.767,0.757,0.6809999999999999,0.705,0.684,0.47100000000000003,0.47,0.47200000000000003,0.816,0.863,0.8690000000000001,0.645,0.632,0.645
0.541733,0.852,0.893,0.903,0.863,0.748,0.731,0.7170000000000001,0.741,0.722,0.562,0.579,0.5760000000000001,0.872,0.927,0.9279999999999999,0.7070000000000001,0.675,0.6729999999999999
0.583399,0.927,0.907,0.9840000000000001,0.889,0.773,0.742,0.74,0.763,0.741,0.614,0.66,0.64,0.914,0.975,0.975,0.7290000000000001,0.698,0.693
0.625066,0.9590000000000001,0.956,1.041,0.892,0.7829999999999999,0.746,0.762,0.78,0.767,0.647,0.711,0.693,0.95,1.02,1.016,0.76,0.745,0.742
0.666733,0.987,1.04,1.035,0.8909999999999999,0.7959999999999999,0.807,0.769,0.7959999999999999,0.7859999999999999,0.7,0.731,0.718,0.978,1.058,1.047,0.789,0.782,0.782
0.708399,1.042,1.056,1.032,0.848,0.802,0.833,0.777,0.81,0.7979999999999999,0.737,0.782,0.775,0.9790000000000001,1.083,1.075,0.807,0.818,0.8170000000000001
0.750067,1.062,1.0979999999999999,1.0590000000000002,0.8540000000000001,0.8590000000000001,0.8490000000000001,0.785,0.815,0.8079999999999999,0.7929999999999999,0.828,0.804,0.973,1.102,1.091,0.831,0.851,0.85
0.791732,1.0959999999999999,1.102,1.069,0.8590000000000001,0.941,0.889,0.7709999999999999,0.802,0.797,0.809,0.853,0.825,0.956,1.0979999999999999,1.0859999999999999,0.836,0.875,0.872
0.8334,1.125,1.133,1.1,0.8690000000000001,0.9790000000000001,0.932,0.757,0.795,0.7909999999999999,0.835,0.884,0.8440000000000001,0.945,1.103,1.085,0.843,0.8859999999999999,0.889
0.875065,1.133,1.166,1.121,0.89,0.9990000000000001,0.975,0.7440000000000001,0.7829999999999999,0.7809999999999999,0.843,0.898,0.855,0.938,1.097,1.074,0.836,0.8959999999999999,0.8959999999999999
0.916733,1.136,1.198,1.119,0.92,1.056,0.9540000000000001,0.727,0.777,0.773,0.853,0.905,0.858,0.917,1.088,1.07,0.8220000000000001,0.8959999999999999,0.898
0.9584,1.119,1.202,1.115,0.9179999999999999,1.071,1.026,0.7140000000000001,0.7609999999999999,0.76,0.851,0.907,0.8490000000000001,0.904,1.075,1.055,0.812,0.8859999999999999,0.8909999999999999
1.000065,1.167,1.199,1.099,0.9079999999999999,1.093,1.006,0.6970000000000001,0.748,0.7509999999999999,0.835,0.902,0.843,0.889,1.069,1.0490000000000002,0.8009999999999999,0.885,0.892
data.columns = data.columns.str.replace('(\.\d+)$','') #with this I remove the .N when the columns has the same name

data_mean=data.mean(axis=1, level=0)#calculates the mean of the columns with the same name in the row axis
data_std=data.std(axis=1, level=0)

data_time=data.filter(like='Time')
data_WT=data_mean.filter(like='WT')
data_NF=data_mean.filter(like='NF')
data_D=data_mean.filter(like='D')

Теперь с помощью приведенного выше кода я создаю новые фреймы данных, которые содержат только столбцы с определенными c заголовками. Итак, у меня есть три разных фрейма данных с 2 столбцами и 24 строками в каждом. Мне удалось изобразить их на том же рисунке, используя следующий код:

fig, axes = plt.subplots(nrows=1, ncols=3,squeeze=False,figsize=(10,5))

axes = axes.flatten()

data_WT.plot(ax=axes[0],yerr=data_std,fontsize=6,grid=True)

data_NF.plot(ax=axes[1],yerr=data_std,fontsize=6,grid=True)

data_D.plot(ax=axes[2],yerr=data_std,fontsize=6,grid=True)

результат выглядит так:

сюжет с тремя фигурами

тогда я хотел добавить разброс для каждой точки на каждом графике для каждой точки. Для этого я добавляю data_time dataFrame, который содержит значения времени, чтобы использовать его в качестве значения x на диаграмме рассеяния. Однако, когда я делаю это для первого подзаговора, например:

data_WT.plot.scatter(ax=axes[0],x=data_time,y=data_WT)

, у меня появляется следующая ошибка:

   Traceback (most recent call last):
  File "c:/Users/Nico/Desktop/bioscreen.py", line 60, in <module>
    data_WT.plot.scatter(ax=axes[0],x=data_time,y=data_WT)
  File "C:\Users\Nico\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\plotting\_core.py", line 1499, in scatter
    return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs)
  File "C:\Users\Nico\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\plotting\_core.py", line 792, in __call__
    return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)
  File "C:\Users\Nico\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\plotting\_matplotlib\__init__.py", line 61, in plot
    plot_obj.generate()
  File "C:\Users\Nico\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\plotting\_matplotlib\core.py", line 263, in generate
    self._make_plot()
  File "C:\Users\Nico\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\plotting\_matplotlib\core.py", line 970, in _make_plot
    data[x].values,
  File "C:\Users\Nico\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\core\frame.py", line 2806, in __getitem__
    indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
  File "C:\Users\Nico\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\core\indexing.py", line 1551, in _get_listlike_indexer
    self._validate_read_indexer(
  File "C:\Users\Nico\AppData\Local\Programs\Python\Python38\lib\site-packages\pandas\core\indexing.py", line 1639, in _validate_read_indexer
    raise KeyError(f"None of [{key}] are in the [{axis_name}]")
KeyError: "None of [Float64Index([           9.73e-05,         0.041736991,         0.083402986,\n                      0.125072396,         0.166737708, 0.20840449100000003,\n                      0.250073843, 0.29173736100000003,         0.333400081,\n                      0.375066481, 0.41673263899999996,         0.458399595,\n                      0.500066227,      
   0.541732743,         0.583399375,\n                      0.625065949,         0.666732685,  0.7083994790000001,\n        
               0.75006728,          0.79173228,         0.833399606,\n                      0.875064988,         0.916732766,         0.958400093,\n                      1.000065417],\n             dtype='float64')] are in the [columns]"

Любые предложения о том, как преодолеть эту ошибку, я читал, но Я не могу дать никакого ответа, чтобы помочь мне с этим.

Спасибо.

1 Ответ

2 голосов
/ 01 августа 2020
  • Проще всего решить проблему, установив в качестве индекса Time.
  • Также с помощью seaborn.scatterplot проще добавить диаграмму рассеяния.
    • Seaborn - это Python библиотека визуализации данных, основанная на matplotlib. Он предоставляет высокоуровневый интерфейс для рисования привлекательной и информативной статистической графики.
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# read the file in with Time as the index
data = pd.read_csv('growth.csv', delimiter=',', header=0, index_col='Time')

# change the column names
data.columns = data.columns.str.replace('(\.\d+)$','')

# don't change any of this code, but data_time isn't needed
data_mean=data.mean(axis=1, level=0)
data_std=data.std(axis=1, level=0)
data_WT=data_mean.filter(like='WT')
data_NF=data_mean.filter(like='NF')
data_D=data_mean.filter(like='D')

# plot
fig, axes = plt.subplots(nrows=1, ncols=3, squeeze=False, figsize=(16, 8))

axes = axes.flatten()

data_WT.plot(ax=axes[0],yerr=data_std,fontsize=6,grid=True)
sns.scatterplot(data=data_WT, ax=axes[0])

data_NF.plot(ax=axes[1],yerr=data_std,fontsize=6,grid=True)
sns.scatterplot(data=data_NF, ax=axes[1])

data_D.plot(ax=axes[2],yerr=data_std,fontsize=6,grid=True)
sns.scatterplot(data=data_D, ax=axes[2])

enter image description here

  • If all you're trying to accomplish with the scatter plot is to add the markers to the plot, then you can use the marker parameter when making each plot
fig, axes = plt.subplots(nrows=1, ncols=3, squeeze=False, figsize=(16, 8))

axes = axes.flatten()

data_WT.plot(ax=axes[0],yerr=data_std,fontsize=6,grid=True, marker='o')

data_NF.plot(ax=axes[1],yerr=data_std,fontsize=6,grid=True, marker='o')

data_D.plot(ax=axes[2],yerr=data_std,fontsize=6,grid=True, marker='o')

введите описание изображения здесь

...