R&D Spend Administration Marketing Spend State Profit
0 165349.20 136897.80 471784.10 New York 192261.83
1 162597.70 151377.59 443898.53 California 191792.06
2 153441.51 101145.55 407934.54 Florida 191050.39
3 144372.41 118671.85 383199.62 New York 182901.99
4 142107.34 91391.77 366168.42 Florida 166187.94
5 131876.90 99814.71 362861.36 New York 156991.12
6 134615.46 147198.87 127716.82 California 156122.51
7 130298.13 145530.06 323876.68 Florida 155752.60
8 120542.52 148718.95 311613.29 New York 152211.77
9 123334.88 108679.17 304981.62 California 149759.96
10 101913.08 110594.11 229160.95 Florida 146121.95
11 100671.96 91790.61 249744.55 California 144259.40
12 93863.75 127320.38 249839.44 Florida 141585.52
13 91992.39 135495.07 252664.93 California 134307.35
14 119943.24 156547.42 256512.92 Florida 132602.65
15 114523.61 122616.84 261776.23 New York 129917.04
16 78013.11 121597.55 264346.06 California 126992.93
17 94657.16 145077.58 282574.31 New York 125370.37
18 91749.16 114175.79 294919.57 Florida 124266.90
19 86419.70 153514.11 0.00 New York 122776.86
20 76253.86 113867.30 298664.47 California 118474.03
21 78389.47 153773.43 299737.29 New York 111313.02
22 73994.56 122782.75 303319.26 Florida 110352.25
23 67532.53 105751.03 304768.73 Florida 108733.99
24 77044.01 99281.34 140574.81 New York 108552.04
25 64664.71 139553.16 137962.62 California 107404.34
26 75328.87 144135.98 134050.07 Florida 105733.54
27 72107.60 127864.55 353183.81 New York 105008.31
28 66051.52 182645.56 118148.20 Florida 103282.38
29 65605.48 153032.06 107138.38 New York 101004.64
30 61994.48 115641.28 91131.24 Florida 99937.59
31 61136.38 152701.92 88218.23 New York 97483.56
32 63408.86 129219.61 46085.25 California 97427.84
33 55493.95 103057.49 214634.81 Florida 96778.92
34 46426.07 157693.92 210797.67 California 96712.80
35 46014.02 85047.44 205517.64 New York 96479.51
36 28663.76 127056.21 201126.82 Florida 90708.19
37 44069.95 51283.14 197029.42 California 89949.14
38 20229.59 65947.93 185265.10 New York 81229.06
39 38558.51 82982.09 174999.30 California 81005.76
40 28754.33 118546.05 172795.67 California 78239.91
41 27892.92 84710.77 164470.71 Florida 77798.83
42 23640.93 96189.63 148001.11 California 71498.49
43 15505.73 127382.30 35534.17 New York 69758.98
44 22177.74 154806.14 28334.72 California 65200.33
45 1000.23 124153.04 1903.93 New York 64926.08
46 1315.46 115816.21 297114.46 Florida 49490.75
47 0.00 135426.92 0.00 California 42559.73
48 542.05 51743.15 0.00 New York 35673.41
49 0.00 116983.80 45173.06 California 14681.40
код
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer([('State', OneHotEncoder(), [3])], remainder='passthrough')
X = np.array(ct.fit_transform(X), dtype=np.float)
Я получаю сообщение об ошибке вроде -
TypeError Traceback (most recent call last)
<ipython-input-36-17f64bed7e4c> in <module>
3
4 ct = ColumnTransformer([('State', OneHotEncoder(), [3])], remainder='passthrough')
----> 5 X = np.array(ct.fit_transform(X), dtype=object)
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\compose\_column_transformer.py in fit_transform(self, X, y)
516 self._validate_remainder(X)
517
--> 518 result = self._fit_transform(X, y, _fit_transform_one)
519
520 if not result:
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\compose\_column_transformer.py in _fit_transform(self, X, y, func, fitted)
446 self._iter(fitted=fitted, replace_strings=True))
447 try:
--> 448 return Parallel(n_jobs=self.n_jobs)(
449 delayed(func)(
450 transformer=clone(trans) if not fitted else trans,
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1002 # remaining jobs.
1003 self._iterating = False
-> 1004 if self.dispatch_one_batch(iterator):
1005 self._iterating = self._original_iterator is not None
1006
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
833 return False
834 else:
--> 835 self._dispatch(tasks)
836 return True
837
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
752 with self._lock:
753 job_idx = len(self._jobs)
--> 754 job = self._backend.apply_async(batch, callback=cb)
755 # A job can complete so quickly than its callback is
756 # called before we get here, causing self._jobs to
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
207 def apply_async(self, func, callback=None):
208 """Schedule a func to be run"""
--> 209 result = ImmediateResult(func)
210 if callback:
211 callback(result)
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
588 # Don't delay the application, to avoid keeping the input
589 # arguments in memory
--> 590 self.results = batch()
591
592 def get(self):
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in __call__(self)
253 # change the default number of processes to -1
254 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 255 return [func(*args, **kwargs)
256 for func, args, kwargs in self.items]
257
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
253 # change the default number of processes to -1
254 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 255 return [func(*args, **kwargs)
256 for func, args, kwargs in self.items]
257
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
726 with _print_elapsed_time(message_clsname, message):
727 if hasattr(transformer, 'fit_transform'):
--> 728 res = transformer.fit_transform(X, y, **fit_params)
729 else:
730 res = transformer.fit(X, y, **fit_params).transform(X)
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\preprocessing\_encoders.py in fit_transform(self, X, y)
370 """
371 self._validate_keywords()
--> 372 return super().fit_transform(X, y)
373
374 def transform(self, X):
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
569 if y is None:
570 # fit method of arity 1 (unsupervised transformation)
--> 571 return self.fit(X, **fit_params).transform(X)
572 else:
573 # fit method of arity 2 (supervised transformation)
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\preprocessing\_encoders.py in fit(self, X, y)
345 """
346 self._validate_keywords()
--> 347 self._fit(X, handle_unknown=self.handle_unknown)
348 self.drop_idx_ = self._compute_drop_idx()
349 return self
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\preprocessing\_encoders.py in _fit(self, X, handle_unknown)
72
73 def _fit(self, X, handle_unknown='error'):
---> 74 X_list, n_samples, n_features = self._check_X(X)
75
76 if self.categories != 'auto':
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\preprocessing\_encoders.py in _check_X(self, X)
41 if not (hasattr(X, 'iloc') and getattr(X, 'ndim', 0) == 2):
42 # if not a dataframe, do normal check_array validation
---> 43 X_temp = check_array(X, dtype=None)
44 if (not hasattr(X, 'dtype')
45 and np.issubdtype(X_temp.dtype, np.str_)):
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
506 if sp.issparse(array):
507 _ensure_no_complex_data(array)
--> 508 array = _ensure_sparse_format(array, accept_sparse=accept_sparse,
509 dtype=dtype, copy=copy,
510 force_all_finite=force_all_finite,
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\utils\validation.py in _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, force_all_finite, accept_large_sparse)
304
305 if accept_sparse is False:
--> 306 raise TypeError('A sparse matrix was passed, but dense '
307 'data is required. Use X.toarray() to '
308 'convert to a dense numpy array.')
TypeError: A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.
Пожалуйста, помогите мне решить эту проблему .....