У меня есть датафрейм со ссылкой на коммуну, количество голосов в этом городе и результаты нескольких партий.
Comm Votes LPC CPC BQ
0 comm1 1315.0 2.0 424.0 572.0
1 comm2 4682.0 117.0 2053.0 1584.0
2 comm3 2397.0 2.0 40.0 192.0
3 comm4 931.0 2.0 12.0 345.0
4 comm5 842.0 47.0 209.0 76.0
... ... ... ... ... ...
1522 comm1523 23808.0 1588.0 4458.0 13147.0
1523 comm1524 639.0 40.0 126.0 40.0
1524 comm1525 10477.0 13.0 673.0 333.0
1525 comm1526 2674.0 1.0 55.0 194.0
1526 comm1527 1691.0 331.0 29.0 78.0
У меня есть код, который создает случайные столбцы для всех партийных столбцов, кроме исходного, чтобы притворяться, что у них есть предпочтения, а не голоса:
def fill_cells(cell):
votes_max = cell['Votes']
all_dict = {}
parties_temp = parties.copy()
#iterate over parties
for p in parties_temp:
preferences = ['1','2','3']
# iterate over preferences
for preference in preferences:
preferences.remove(preference)
# sample new data with equal choices
sampled = np.random.choice(preferences, int(votes_max-cell[p]))
# transform into dictionary
c_sampled = dict(collections.Counter(sampled))
c_sampled.update({p:cell[p]})
c_sampled['1'] = c_sampled.pop(p)
# batch update of the dictionary keys
all_dict.update(
dict(zip([p+'_%s' %k for k in c_sampled.keys()],c_sampled.values()))
)
# but I'm loosing the city references
return pd.Series(all_dict)
Возвращает:
LPC_2 LPC_3 LPC_1 CPC_2 CPC_3 CPC_1 BQ_2 BQ_3 BQ_1
0 891.0 487.0 424.0 743.0 373.0 572.0 1313.0 683.0 2.0
1 2629.0 1342.0 2053.0 3098.0 1603.0 1584.0 4565.0 2301.0 117.0
2 2357.0 1186.0 40.0 2205.0 1047.0 192.0 2395.0 1171.0 2.0
3 919.0 451.0 12.0 586.0 288.0 345.0 929.0 455.0 2.0
4 633.0 309.0 209.0 766.0 399.0 76.0 795.0 396.0 47.0
... ... ... ... ... ... ... ... ... ...
1520 1088.0 536.0 42.0 970.0 462.0 160.0 1117.0 540.0 13.0
1521 4742.0 2341.0 219.0 3655.0 1865.0 1306.0 4705.0 2375.0 256.0
1522 19350.0 9733.0 4458.0 10661.0 5352.0 13147.0 22220.0 11100.0 1588.0
1523 513.0 264.0 126.0 599.0 267.0 40.0 599.0 306.0 40.0
1524 9804.0 4885.0 673.0 10144.0 5012.0 333.0 10464.0 5162.0 13.0
Однако с этим кодом я теряю голоса и городские ссылки. Как я могу сохранить их?
Я пытался:
def fill_cells(cell):
votes_max = cell['Votes']
all_dict = {}
parties_temp = parties.copy()
#iterate over parties
for p in parties_temp:
# preferences = ['1','2','3','4','5','6','7','8','9','10','11']
preferences = ['1','2','3']
# iterate over preferences
for preference in preferences:
preferences.remove(preference)
# sample new data with equal choices
sampled = np.random.choice(preferences, int(votes_max-cell[p]))
# transform into dictionary
c_sampled = dict(collections.Counter(sampled))
c_sampled.update({p:cell[p]})
c_sampled['1'] = c_sampled.pop(p)
# batch update of the dictionary keys
all_dict.update(
dict(zip(cell['Votes'],cell['Comm'],[p+'_%s' %k for k in c_sampled.keys()],c_sampled.values()))
)
return pd.Series(all_dict)
Но он возвращает:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-255-dd1231fd4f2d> in <module>
----> 1 df_test.apply(fill_cells, axis =1)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6926 kwds=kwds,
6927 )
-> 6928 return op.get_result()
6929
6930 def applymap(self, func):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
184 return self.apply_raw()
185
--> 186 return self.apply_standard()
187
188 def apply_empty_result(self):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
290
291 # compute the result using the series generator
--> 292 self.apply_series_generator()
293
294 # wrap results
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
319 try:
320 for i, v in enumerate(series_gen):
--> 321 results[i] = self.f(v)
322 keys.append(v.name)
323 except Exception as e:
<ipython-input-253-d0d8d6a4b22d> in fill_cells(cell)
18 # batch update of the dictionary keys
19 all_dict.update(
---> 20 dict(zip(cell['Votes'],cell['Comm'],[p+'_%s' %k for k in c_sampled.keys()],c_sampled.values()))
21 )
22 return pd.Series(all_dict)
TypeError: ('zip argument #1 must support iteration', 'occurred at index 0')