При отправке данных панд в R, каков наилучший способ предотвратить создание R нескольких столбцов из одного столбца панд, используя rpy2? - PullRequest
0 голосов
/ 31 марта 2019

Я записываю несколько столбцов в pandas df и отправляю результат в R. Проблема в том, что один из столбцов df, 'temp_selection', появляется только один раз в pandas df, но несколько раз в R.

См. Ниже:


from pandas import DataFrame, read_csv

import os # For changing the working directory

import pandas as pd 
import sys #only needed to determine Python version number
import numpy as np
import os
from numpy import random
from scipy.stats import stats
from scipy.special import stdtr
from statsmodels.formula.api import ols
%load_ext rpy2.ipython

# Importing R Packages
from rpy2.robjects.packages import importr
import rpy2.robjects.packages as rpackages
import rpy2.interactive as r
import rpy2.interactive.packages # this can take few seconds

# import R's "base" package
base = importr('base')

# import R's "utils" package
utils = importr('utils')

# Reproducible dataframe for StackOverflow

df = pd.DataFrame(np.random.randint(0,100,size=(1200, 11)), columns=list('ABCDEFGHIJK'))

# Do a median split on valence
valence_median = df['D'].median()
df['valence_median_split'] = ''
df['Category'] = ''
df['valence_median_split'] = np.where(df['J'] < valence_median, 'Low_Valence', 'High_Valence')
df['Category'] = np.where(df['B'] < valence_median, 'Faces', 'Houses')

df['temp_selection'] = np.nan
low = np.random.choice(df.index[df['valence_median_split'] == 'Low_Valence'], size=2)
high = np.random.choice(df.index[df['valence_median_split'] == 'High_Valence'], size=2)
df.loc[low, 'temp_selection'] = 'Low'
df.loc[high, 'temp_selection'] = 'High'
df_temp = df[df.temp_selection.notnull()]
df_temp = df[df.Category != 'Faces']
%Rpush df_temp
%R names(df_temp)

Выход:

array(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
       'valence_median_split', 'Category', 'temp_selection.4',
       'temp_selection.6', 'temp_selection.7', 'temp_selection.10',
       'temp_selection.13', 'temp_selection.16', 'temp_selection.17',
       'temp_selection.19', 'temp_selection.21', 'temp_selection.22',
       'temp_selection.30', 'temp_selection.35', 'temp_selection.42',
       'temp_selection.43', 'temp_selection.44', 'temp_selection.46',
       'temp_selection.51', 'temp_selection.53', 'temp_selection.54',
       'temp_selection.56', 'temp_selection.57', 'temp_selection.58',
       'temp_selection.59', 'temp_selection.62', 'temp_selection.65',
       'temp_selection.67', 'temp_selection.68', 'temp_selection.69',
       'temp_selection.71', 'temp_selection.77', 'temp_selection.78',
       'temp_selection.82', 'temp_selection.88', 'temp_selection.89',
       'temp_selection.90', 'temp_selection.94', 'temp_selection.96',
       'temp_selection.97', 'temp_selection.98', 'temp_selection.99',
       'temp_selection.100', 'temp_selection.101', 'temp_selection.102',
       'temp_selection.106', 'temp_selection.109', 'temp_selection.115',
       'temp_selection.117', 'temp_selection.121', 'temp_selection.122',
       'temp_selection.123', 'temp_selection.124', 'temp_selection.125',
       'temp_selection.128', 'temp_selection.129', 'temp_selection.132',
       'temp_selection.133', 'temp_selection.134', 'temp_selection.138',
       'temp_selection.140', 'temp_selection.142', 'temp_selection.143',
       'temp_selection.144', 'temp_selection.145', 'temp_selection.148',
       'temp_selection.149', 'temp_selection.151', 'temp_selection.152',
       'temp_selection.153', 'temp_selection.154', 'temp_selection.156',
       'temp_selection.157', 'temp_selection.158', 'temp_selection.160',
       'temp_selection.161', 'temp_selection.163', 'temp_selection.165',
       'temp_selection.167', 'temp_selection.169', 'temp_selection.171',
       'temp_selection.172', 'temp_selection.173', 'temp_selection.176',
       'temp_selection.178', 'temp_selection.179', 'temp_selection.180',
       'temp_selection.184', 'temp_selection.185', 'temp_selection.186',
       'temp_selection.187', 'temp_selection.188', 'temp_selection.190',
       'temp_selection.194', 'temp_selection.195', 'temp_selection.197',
       'temp_selection.198', 'temp_selection.199', 'temp_selection.201',
       'temp_selection.204', 'temp_selection.205', 'temp_selection.207',
       'temp_selection.210', 'temp_selection.212', 'temp_selection.214',
       'temp_selection.215', 'temp_selection.217', 'temp_selection.218',
       'temp_selection.219', 'temp_selection.220', 'temp_selection.225',
       'temp_selection.227', 'temp_selection.228', 'temp_selection.229',
       'temp_selection.230', 'temp_selection.231', 'temp_selection.234',
       'temp_selection.236', 'temp_selection.237', 'temp_selection.238',
       'temp_selection.241', 'temp_selection.243', 'temp_selection.248',
       'temp_selection.249', 'temp_selection.254', 'temp_selection.256',
       'temp_selection.262', 'temp_selection.265', 'temp_selection.266',
       'temp_selection.267', 'temp_selection.270', 'temp_selection.271',
       'temp_selection.274', 'temp_selection.278', 'temp_selection.280',
       'temp_selection.282', 'temp_selection.284', 'temp_selection.286',
       'temp_selection.287', 'temp_selection.291', 'temp_selection.293',
       'temp_selection.294', 'temp_selection.295', 'temp_selection.297',
       'temp_selection.298', 'temp_selection.301', 'temp_selection.302',
       'temp_selection.307', 'temp_selection.311', 'temp_selection.312',
       'temp_selection.313', 'temp_selection.314', 'temp_selection.317',
       'temp_selection.318', 'temp_selection.319', 'temp_selection.320',
       'temp_selection.321', 'temp_selection.323', 'temp_selection.327',
       'temp_selection.328', 'temp_selection.329', 'temp_selection.331',
       'temp_selection.334', 'temp_selection.335', 'temp_selection.336',
       'temp_selection.341', 'temp_selection.342', 'temp_selection.345',
       'temp_selection.346', 'temp_selection.347', 'temp_selection.348',
       'temp_selection.354', 'temp_selection.355', 'temp_selection.357',
       'temp_selection.359', 'temp_selection.360', 'temp_selection.363',
       'temp_selection.364', 'temp_selection.365', 'temp_selection.366',
       'temp_selection.368', 'temp_selection.369', 'temp_selection.370',
       'temp_selection.371', 'temp_selection.372', 'temp_selection.373',
       'temp_selection.374', 'temp_selection.375', 'temp_selection.376',
       'temp_selection.377', 'temp_selection.380', 'temp_selection.382',
       'temp_selection.383', 'temp_selection.386', 'temp_selection.389',
       'temp_selection.390', 'temp_selection.391', 'temp_selection.394',
       'temp_selection.395', 'temp_selection.396', 'temp_selection.397',
       'temp_selection.398', 'temp_selection.399', 'temp_selection.402',
       'temp_selection.405', 'temp_selection.408', 'temp_selection.410',
       'temp_selection.413', 'temp_selection.414', 'temp_selection.415',
       'temp_selection.417', 'temp_selection.420', 'temp_selection.422',
       'temp_selection.424', 'temp_selection.426', 'temp_selection.428',
       'temp_selection.433', 'temp_selection.434', 'temp_selection.437',
       'temp_selection.442', 'temp_selection.443', 'temp_selection.445',
       'temp_selection.446', 'temp_selection.447', 'temp_selection.448',
       'temp_selection.451', 'temp_selection.452', 'temp_selection.455',
       'temp_selection.458', 'temp_selection.460', 'temp_selection.461',
       'temp_selection.462', 'temp_selection.466', 'temp_selection.474',
       'temp_selection.477', 'temp_selection.478', 'temp_selection.482',
       'temp_selection.484', 'temp_selection.486', 'temp_selection.487',
       'temp_selection.488', 'temp_selection.489', 'temp_selection.492',
       'temp_selection.493', 'temp_selection.495', 'temp_selection.496',
       'temp_selection.497', 'temp_selection.500', 'temp_selection.502',
       'temp_selection.505', 'temp_selection.506', 'temp_selection.507',
       'temp_selection.508', 'temp_selection.509', 'temp_selection.510',
       'temp_selection.511', 'temp_selection.513', 'temp_selection.518',
       'temp_selection.519', 'temp_selection.520', 'temp_selection.522',
       'temp_selection.524', 'temp_selection.525', 'temp_selection.526',
       'temp_selection.527', 'temp_selection.528', 'temp_selection.529',
       'temp_selection.530', 'temp_selection.531', 'temp_selection.532',
       'temp_selection.533', 'temp_selection.535', 'temp_selection.537',
       'temp_selection.538', 'temp_selection.539', 'temp_selection.542',
       'temp_selection.543', 'temp_selection.545', 'temp_selection.547',
       'temp_selection.548', 'temp_selection.549', 'temp_selection.551',
       'temp_selection.554', 'temp_selection.555', 'temp_selection.557',
       'temp_selection.558', 'temp_selection.561', 'temp_selection.562',
       'temp_selection.563', 'temp_selection.565', 'temp_selection.567',
       'temp_selection.569', 'temp_selection.570', 'temp_selection.571',
       'temp_selection.572', 'temp_selection.574', 'temp_selection.576',
       'temp_selection.580', 'temp_selection.582', 'temp_selection.583',
       'temp_selection.584', 'temp_selection.585', 'temp_selection.587',
       'temp_selection.588', 'temp_selection.589', 'temp_selection.592',
       'temp_selection.599', 'temp_selection.600', 'temp_selection.601',
       'temp_selection.602', 'temp_selection.603', 'temp_selection.606',
       'temp_selection.607', 'temp_selection.608', 'temp_selection.609',
       'temp_selection.611', 'temp_selection.614', 'temp_selection.616',
       'temp_selection.618', 'temp_selection.619', 'temp_selection.621',
       'temp_selection.622', 'temp_selection.623', 'temp_selection.631',
       'temp_selection.632', 'temp_selection.633', 'temp_selection.636',
       'temp_selection.637', 'temp_selection.639', 'temp_selection.640',
       'temp_selection.641', 'temp_selection.642', 'temp_selection.643',
       'temp_selection.645', 'temp_selection.646', 'temp_selection.647',
       'temp_selection.650', 'temp_selection.652', 'temp_selection.654',
       'temp_selection.655', 'temp_selection.656', 'temp_selection.657',
       'temp_selection.658', 'temp_selection.659', 'temp_selection.660',
       'temp_selection.665', 'temp_selection.666', 'temp_selection.668',
       'temp_selection.672', 'temp_selection.673', 'temp_selection.675',
       'temp_selection.676', 'temp_selection.677', 'temp_selection.678',
       'temp_selection.679', 'temp_selection.682', 'temp_selection.685',
       'temp_selection.687', 'temp_selection.688', 'temp_selection.689',
       'temp_selection.691', 'temp_selection.692', 'temp_selection.694',
       'temp_selection.697', 'temp_selection.700', 'temp_selection.703',
       'temp_selection.704', 'temp_selection.706', 'temp_selection.707',
       'temp_selection.708', 'temp_selection.713', 'temp_selection.715',
       'temp_selection.719', 'temp_selection.721', 'temp_selection.723',
       'temp_selection.724', 'temp_selection.726', 'temp_selection.727',
       'temp_selection.729', 'temp_selection.731', 'temp_selection.732',
       'temp_selection.734', 'temp_selection.735', 'temp_selection.738',
       'temp_selection.740', 'temp_selection.741', 'temp_selection.743',
       'temp_selection.744', 'temp_selection.746', 'temp_selection.747',
       'temp_selection.757', 'temp_selection.758', 'temp_selection.760',
       'temp_selection.761', 'temp_selection.762', 'temp_selection.764',
       'temp_selection.765', 'temp_selection.766', 'temp_selection.769',
       'temp_selection.770', 'temp_selection.771', 'temp_selection.773',
       'temp_selection.774', 'temp_selection.776', 'temp_selection.777',
       'temp_selection.781', 'temp_selection.783', 'temp_selection.786',
       'temp_selection.788', 'temp_selection.791', 'temp_selection.792',
       'temp_selection.794', 'temp_selection.797', 'temp_selection.798',
       'temp_selection.803', 'temp_selection.806', 'temp_selection.807',
       'temp_selection.808', 'temp_selection.810', 'temp_selection.814',
       'temp_selection.815', 'temp_selection.816', 'temp_selection.817',
       'temp_selection.818', 'temp_selection.823', 'temp_selection.824',
       'temp_selection.825', 'temp_selection.826', 'temp_selection.827',
       'temp_selection.829', 'temp_selection.830', 'temp_selection.831',
       'temp_selection.832', 'temp_selection.834', 'temp_selection.837',
       'temp_selection.838', 'temp_selection.839', 'temp_selection.840',
       'temp_selection.841', 'temp_selection.843', 'temp_selection.844',
       'temp_selection.846', 'temp_selection.848', 'temp_selection.849',
       'temp_selection.851', 'temp_selection.853', 'temp_selection.854',
       'temp_selection.855', 'temp_selection.859', 'temp_selection.860',
       'temp_selection.863', 'temp_selection.867', 'temp_selection.871',
       'temp_selection.872', 'temp_selection.874', 'temp_selection.875',
       'temp_selection.876', 'temp_selection.878', 'temp_selection.879',
       'temp_selection.880', 'temp_selection.881', 'temp_selection.882',
       'temp_selection.886', 'temp_selection.893', 'temp_selection.899',
       'temp_selection.900', 'temp_selection.901', 'temp_selection.902',
       'temp_selection.903', 'temp_selection.904', 'temp_selection.907',
       'temp_selection.908', 'temp_selection.910', 'temp_selection.914',
       'temp_selection.915', 'temp_selection.916', 'temp_selection.917',
       'temp_selection.926', 'temp_selection.927', 'temp_selection.933',
       'temp_selection.937', 'temp_selection.939', 'temp_selection.941',
       'temp_selection.942', 'temp_selection.946', 'temp_selection.947',
       'temp_selection.951', 'temp_selection.952', 'temp_selection.953',
       'temp_selection.956', 'temp_selection.957', 'temp_selection.958',
       'temp_selection.959', 'temp_selection.960', 'temp_selection.962',
       'temp_selection.965', 'temp_selection.966', 'temp_selection.968',
       'temp_selection.971', 'temp_selection.975', 'temp_selection.977',
       'temp_selection.979', 'temp_selection.980', 'temp_selection.982',
       'temp_selection.984', 'temp_selection.990', 'temp_selection.991',
       'temp_selection.993', 'temp_selection.995', 'temp_selection.996',
       'temp_selection.997', 'temp_selection.998', 'temp_selection.999',
       'temp_selection.1004', 'temp_selection.1006',
       'temp_selection.1009', 'temp_selection.1010',
       'temp_selection.1011', 'temp_selection.1012',
       'temp_selection.1013', 'temp_selection.1015',
       'temp_selection.1017', 'temp_selection.1018',
       'temp_selection.1019', 'temp_selection.1020',
       'temp_selection.1021', 'temp_selection.1022',
       'temp_selection.1023', 'temp_selection.1024',
       'temp_selection.1025', 'temp_selection.1026',
       'temp_selection.1027', 'temp_selection.1030',
       'temp_selection.1031', 'temp_selection.1037',
       'temp_selection.1040', 'temp_selection.1043',
       'temp_selection.1044', 'temp_selection.1045',
       'temp_selection.1047', 'temp_selection.1048',
       'temp_selection.1050', 'temp_selection.1053',
       'temp_selection.1059', 'temp_selection.1062',
       'temp_selection.1066', 'temp_selection.1067',
       'temp_selection.1070', 'temp_selection.1073',
       'temp_selection.1076', 'temp_selection.1079',
       'temp_selection.1080', 'temp_selection.1081',
       'temp_selection.1082', 'temp_selection.1085',
       'temp_selection.1089', 'temp_selection.1090',
       'temp_selection.1091', 'temp_selection.1093',
       'temp_selection.1097', 'temp_selection.1099',
       'temp_selection.1101', 'temp_selection.1103',
       'temp_selection.1104', 'temp_selection.1106',
       'temp_selection.1107', 'temp_selection.1109',
       'temp_selection.1111', 'temp_selection.1113',
       'temp_selection.1114', 'temp_selection.1116',
       'temp_selection.1117', 'temp_selection.1119',
       'temp_selection.1121', 'temp_selection.1122',
       'temp_selection.1131', 'temp_selection.1134',
       'temp_selection.1135', 'temp_selection.1136',
       'temp_selection.1138', 'temp_selection.1139',
       'temp_selection.1140', 'temp_selection.1145',
       'temp_selection.1146', 'temp_selection.1148',
       'temp_selection.1149', 'temp_selection.1152',
       'temp_selection.1153', 'temp_selection.1159',
       'temp_selection.1161', 'temp_selection.1164',
       'temp_selection.1167', 'temp_selection.1168',
       'temp_selection.1171', 'temp_selection.1173',
       'temp_selection.1174', 'temp_selection.1178',
       'temp_selection.1181', 'temp_selection.1184',
       'temp_selection.1185', 'temp_selection.1186',
       'temp_selection.1187', 'temp_selection.1188',
       'temp_selection.1190', 'temp_selection.1191',
       'temp_selection.1196'], dtype='<U20')

Проблема в том, что столбец temp_selection появляется несколько раз в R, но только один раз в pandas df.

Какой хороший способ исправить эту проблему?

1 Ответ

0 голосов
/ 02 апреля 2019

Вот решение, если кто-то найдет его полезным в будущем.

Умноженный столбец в R неверно интерпретировал значения. Поэтому, чтобы этого не случилось, я преобразовал их в строки в пандах, используя следующий код:

df_temp['temp_selection'] = df_temp['temp_selection'].astype(str) # Convert to string to see if it is fixed for R

Это привело к правильной интерпретации и решило проблему после нажатия на R.

...