Я думал, pd_get_dummies()
специально решает эту проблему. Куда я иду не так?
Попытается опубликовать некоторые данные = invest.head()
funding_rounds founded_year seed venture equity_crowdfunding undisclosed convertible_note debt_financing angel grant ... last_funding_at_2014-11-21 last_funding_at_2014-11-24 last_funding_at_2014-11-25 last_funding_at_2014-11-26 last_funding_at_2014-11-27 last_funding_at_2014-11-28 last_funding_at_2014-12-01 last_funding_at_2014-12-02 last_funding_at_2014-12-10 last_funding_at_2014-12-24
0 1.0 2012.0 1750000.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0 0 0 0 0 0 0 0 0 0
8 1.0 2010.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0 0 0 0 0 0 0 0 0 0
12 1.0 1986.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0 0 0 0 0 0 0 0 0 0
Попробовать базовое c моделирование:
invest=pd.get_dummies(invest)
X=invest.drop(['funding_total_usd '],axis=1)
y=invest[' funding_total_usd ']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=1)
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
y_pred = lin_reg.predict(X_test)
, но все равно получите:
ValueError Traceback (most recent call last)
<ipython-input-112-0a9e89083ca4> in <module>
1 from sklearn.linear_model import LinearRegression
2 lin_reg = LinearRegression()
----> 3 lin_reg.fit(X_train, y_train)
4 y_pred = lin_reg.predict(X_test)
~\Anaconda3\lib\site-packages\sklearn\linear_model\base.py in fit(self, X, y, sample_weight)
461 n_jobs_ = self.n_jobs
462 X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
--> 463 y_numeric=True, multi_output=True)
464
465 if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
717 ensure_min_features=ensure_min_features,
718 warn_on_dtype=warn_on_dtype,
--> 719 estimator=estimator)
720 if multi_output:
721 y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
534 # make sure we actually converted to numeric:
535 if dtype_numeric and array.dtype.kind == "O":
--> 536 array = array.astype(np.float64)
537 if not allow_nd and array.ndim >= 3:
538 raise ValueError("Found array with dim %d. %s expected <= 2."
ValueError: could not convert string to float: '/organization/crowdpc-inc