Количество меток не соответствует выборкам при регрессии дерева решений - PullRequest
0 голосов
/ 03 марта 2019

Пытаясь запустить регрессор дерева решений для моих данных, но всякий раз, когда я пытаюсь выполнить свой код, я получаю эту ошибку

ValueError: Количество меток = 78177 не соответствует количеству выборок =312706

#feature selection
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
target = ['sale_price']
train, test = train_test_split(housing_data, test_size=0.2)
regression_tree = DecisionTreeRegressor(criterion="entropy",random_state=100,
                                        max_depth=4,min_samples_leaf=5)
regression_tree.fit(train,test)

Я добавил образец своего кода, надеюсь, это даст вам больше контекста, чтобы помочь лучше понять мой вопрос и проблему:

{'Age of House at Sale': {0: 6,
  1: 2016,
  2: 92,
  3: 42,
  4: 90,
  5: 2012,
  6: 89,
  7: 3,
  8: 2015,
  9: 104},
 'AreaSource': {0: 2.0,
  1: 7.0,
  2: 2.0,
  3: 2.0,
  4: 2.0,
  5: 2.0,
  6: 2.0,
  7: 2.0,
  8: 2.0,
  9: 2.0},
 'AssessLand': {0: 9900.0,
  1: 1571850.0,
  2: 1548000.0,
  3: 36532350.0,
  4: 2250000.0,
  5: 3110400.0,
  6: 2448000.0,
  7: 1354500.0,
  8: 1699200.0,
  9: 1282500.0},
 'AssessTot': {0: 34380.0,
  1: 1571850.0,
  2: 25463250.0,
  3: 149792400.0,
  4: 27166050.0,
  5: 5579990.0,
  6: 28309500.0,
  7: 23965650.0,
  8: 3534300.0,
  9: 11295000.0},
 'BldgArea': {0: 2688.0,
  1: 0.0,
  2: 304650.0,
  3: 2548000.0,
  4: 356000.0,
  5: 382746.0,
  6: 290440.0,
  7: 241764.0,
  8: 463427.0,
  9: 547000.0},
 'BldgClass': {0: 72,
  1: 89,
  2: 80,
  3: 157,
  4: 150,
  5: 44,
  6: 92,
  7: 43,
  8: 39,
  9: 61},
 'BldgDepth': {0: 50.0,
  1: 0.0,
  2: 92.0,
  3: 0.0,
  4: 100.33,
  5: 315.0,
  6: 125.0,
  7: 100.0,
  8: 0.0,
  9: 80.92},
 'BldgFront': {0: 20.0,
  1: 0.0,
  2: 335.0,
  3: 0.0,
  4: 202.0,
  5: 179.0,
  6: 92.0,
  7: 500.0,
  8: 0.0,
  9: 304.0},
 'BsmtCode': {0: 5.0,
  1: 5.0,
  2: 5.0,
  3: 5.0,
  4: 2.0,
  5: 5.0,
  6: 2.0,
  7: 2.0,
  8: 5.0,
  9: 5.0},
 'CD': {0: 310.0,
  1: 302.0,
  2: 302.0,
  3: 318.0,
  4: 302.0,
  5: 301.0,
  6: 302.0,
  7: 301.0,
  8: 301.0,
  9: 302.0},
 'ComArea': {0: 0.0,
  1: 0.0,
  2: 304650.0,
  3: 2548000.0,
  4: 30000.0,
  5: 11200.0,
  6: 290440.0,
  7: 27900.0,
  8: 4884.0,
  9: 547000.0},
 'CommFAR': {0: 0.0,
  1: 2.0,
  2: 2.0,
  3: 2.0,
  4: 0.0,
  5: 0.0,
  6: 10.0,
  7: 2.0,
  8: 0.0,
  9: 2.0},
 'Council': {0: 41.0,
  1: 33.0,
  2: 33.0,
  3: 46.0,
  4: 33.0,
  5: 33.0,
  6: 33.0,
  7: 33.0,
  8: 33.0,
  9: 35.0},
 'Easements': {0: 0.0,
  1: 0.0,
  2: 0.0,
  3: 1.0,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 0.0,
  8: 0.0,
  9: 0.0},
 'ExemptLand': {0: 0.0,
  1: 1571850.0,
  2: 0.0,
  3: 0.0,
  4: 2250000.0,
  5: 0.0,
  6: 0.0,
  7: 932847.0,
  8: 0.0,
  9: 0.0},
 'ExemptTot': {0: 0.0,
  1: 1571850.0,
  2: 0.0,
  3: 0.0,
  4: 27166050.0,
  5: 0.0,
  6: 11304900.0,
  7: 23543997.0,
  8: 0.0,
  9: 0.0},
 'FacilFAR': {0: 0.0,
  1: 6.5,
  2: 0.0,
  3: 0.0,
  4: 4.8,
  5: 4.8,
  6: 10.0,
  7: 3.0,
  8: 5.0,
  9: 4.8},
 'FactryArea': {0: 0.0,
  1: 0.0,
  2: 0.0,
  3: 0.0,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 0.0,
  8: 0.0,
  9: 547000.0},
 'GarageArea': {0: 0.0,
  1: 0.0,
  2: 0.0,
  3: 1285000.0,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 22200.0,
  8: 0.0,
  9: 0.0},
 'HealthArea': {0: 6410.0,
  1: 1000.0,
  2: 2300.0,
  3: 8822.0,
  4: 2300.0,
  5: 400.0,
  6: 2300.0,
  7: 700.0,
  8: 500.0,
  9: 9300.0},
 'HealthCent': {0: 35.0,
  1: 36.0,
  2: 38.0,
  3: 35.0,
  4: 38.0,
  5: 30.0,
  6: 38.0,
  7: 30.0,
  8: 30.0,
  9: 36.0},
 'IrrLotCode': {0: 1, 1: 1, 2: 0, 3: 0, 4: 1, 5: 1, 6: 0, 7: 1, 8: 0, 9: 0},
 'LandUse': {0: 2.0,
  1: 10.0,
  2: 5.0,
  3: 5.0,
  4: 8.0,
  5: 4.0,
  6: 5.0,
  7: 3.0,
  8: 3.0,
  9: 6.0},
 'LotArea': {0: 2252.0,
  1: 134988.0,
  2: 32000.0,
  3: 905000.0,
  4: 20267.0,
  5: 57600.0,
  6: 12500.0,
  7: 50173.0,
  8: 44704.0,
  9: 113800.0},
 'LotDepth': {0: 100.0,
  1: 275.33,
  2: 335.92,
  3: 859.0,
  4: 100.33,
  5: 320.0,
  6: 125.0,
  7: 200.0,
  8: 281.86,
  9: 204.0},
 'LotFront': {0: 24.0,
  1: 490.5,
  2: 92.42,
  3: 930.0,
  4: 202.0,
  5: 180.0,
  6: 100.0,
  7: 521.25,
  8: 225.08,
  9: 569.0},
 'LotType': {0: 5.0,
  1: 5.0,
  2: 3.0,
  3: 3.0,
  4: 3.0,
  5: 3.0,
  6: 3.0,
  7: 1.0,
  8: 5.0,
  9: 3.0},
 'NumBldgs': {0: 1.0,
  1: 0.0,
  2: 1.0,
  3: 4.0,
  4: 1.0,
  5: 1.0,
  6: 1.0,
  7: 1.0,
  8: 2.0,
  9: 13.0},
 'NumFloors': {0: 2.0,
  1: 0.0,
  2: 13.0,
  3: 2.0,
  4: 15.0,
  5: 0.0,
  6: 37.0,
  7: 6.0,
  8: 20.0,
  9: 8.0},
 'OfficeArea': {0: 0.0,
  1: 0.0,
  2: 264750.0,
  3: 0.0,
  4: 30000.0,
  5: 1822.0,
  6: 274500.0,
  7: 4200.0,
  8: 0.0,
  9: 0.0},
 'OtherArea': {0: 0.0,
  1: 0.0,
  2: 39900.0,
  3: 0.0,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 0.0,
  8: 0.0,
  9: 0.0},
 'PolicePrct': {0: 70.0,
  1: 84.0,
  2: 84.0,
  3: 63.0,
  4: 84.0,
  5: 90.0,
  6: 84.0,
  7: 94.0,
  8: 90.0,
  9: 88.0},
 'ProxCode': {0: 0.0,
  1: 0.0,
  2: 0.0,
  3: 0.0,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 1.0,
  8: 0.0,
  9: 0.0},
 'ResArea': {0: 2172.0,
  1: 0.0,
  2: 0.0,
  3: 0.0,
  4: 0.0,
  5: 371546.0,
  6: 0.0,
  7: 213864.0,
  8: 458543.0,
  9: 0.0},
 'ResidFAR': {0: 2.0,
  1: 7.2,
  2: 0.0,
  3: 0.0,
  4: 2.43,
  5: 2.43,
  6: 10.0,
  7: 3.0,
  8: 5.0,
  9: 0.0},
 'RetailArea': {0: 0.0,
  1: 0.0,
  2: 0.0,
  3: 1263000.0,
  4: 0.0,
  5: 9378.0,
  6: 15940.0,
  7: 0.0,
  8: 4884.0,
  9: 0.0},
 'SHAPE_Area': {0: 2316.8863224,
  1: 140131.577176,
  2: 34656.4472405,
  3: 797554.847834,
  4: 21360.1476315,
  5: 58564.8643115,
  6: 12947.145471,
  7: 50772.624868800005,
  8: 47019.5677861,
  9: 118754.78573699998},
 'SHAPE_Leng': {0: 249.41135038849998,
  1: 1559.88914353,
  2: 890.718521021,
  3: 3729.78685686,
  4: 620.761169374,
  5: 1006.33799946,
  6: 460.03168012300006,
  7: 1385.27352839,
  8: 992.915660585,
  9: 1565.91477261},
 'SanitDistr': {0: 10.0,
  1: 2.0,
  2: 2.0,
  3: 18.0,
  4: 2.0,
  5: 1.0,
  6: 2.0,
  7: 1.0,
  8: 1.0,
  9: 2.0},
 'SanitSub': {0: 21,
  1: 23,
  2: 31,
  3: 22,
  4: 31,
  5: 21,
  6: 23,
  7: 7,
  8: 12,
  9: 22},
 'SchoolDist': {0: 19.0,
  1: 13.0,
  2: 13.0,
  3: 22.0,
  4: 13.0,
  5: 14.0,
  6: 13.0,
  7: 14.0,
  8: 14.0,
  9: 14.0},
 'SplitZone': {0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 0, 9: 1},
 'StrgeArea': {0: 0.0,
  1: 0.0,
  2: 0.0,
  3: 0.0,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 1500.0,
  8: 0.0,
  9: 0.0},
 'UnitsRes': {0: 2.0,
  1: 0.0,
  2: 0.0,
  3: 0.0,
  4: 0.0,
  5: 522.0,
  6: 0.0,
  7: 234.0,
  8: 470.0,
  9: 0.0},
 'UnitsTotal': {0: 2.0,
  1: 0.0,
  2: 0.0,
  3: 123.0,
  4: 1.0,
  5: 525.0,
  6: 102.0,
  7: 237.0,
  8: 472.0,
  9: 1.0},
 'YearAlter1': {0: 0.0,
  1: 0.0,
  2: 1980.0,
  3: 0.0,
  4: 1998.0,
  5: 0.0,
  6: 2009.0,
  7: 2012.0,
  8: 0.0,
  9: 0.0},
 'YearAlter2': {0: 0.0,
  1: 0.0,
  2: 0.0,
  3: 0.0,
  4: 2000.0,
  5: 0.0,
  6: 0.0,
  7: 0.0,
  8: 0.0,
  9: 0.0},
 'ZipCode': {0: 11220.0,
  1: 11201.0,
  2: 11201.0,
  3: 11234.0,
  4: 11201.0,
  5: 11249.0,
  6: 11241.0,
  7: 11211.0,
  8: 11249.0,
  9: 11205.0},
 'ZoneDist1': {0: 24,
  1: 76,
  2: 5,
  3: 64,
  4: 24,
  5: 24,
  6: 30,
  7: 74,
  8: 45,
  9: 27},
 'ZoneMap': {0: 3,
  1: 19,
  2: 19,
  3: 22,
  4: 19,
  5: 19,
  6: 19,
  7: 2,
  8: 19,
  9: 19},
 'building_class': {0: 141,
  1: 97,
  2: 87,
  3: 176,
  4: 168,
  5: 8,
  6: 102,
  7: 46,
  8: 97,
  9: 66},
 'building_class_at_sale': {0: 143,
  1: 98,
  2: 89,
  3: 179,
  4: 171,
  5: 7,
  6: 103,
  7: 49,
  8: 98,
  9: 69},
 'building_class_category': {0: 39,
  1: 71,
  2: 31,
  3: 38,
  4: 86,
  5: 40,
  6: 80,
  7: 75,
  8: 71,
  9: 41},
 'commercial_units': {0: 1,
  1: 0,
  2: 0,
  3: 123,
  4: 1,
  5: 0,
  6: 102,
  7: 3,
  8: 0,
  9: 1},
 'gross_sqft': {0: 0.0,
  1: 0.0,
  2: 304650.0,
  3: 2548000.0,
  4: 356000.0,
  5: 0.0,
  6: 290440.0,
  7: 241764.0,
  8: 0.0,
  9: 547000.0},
 'land_sqft': {0: 0.0,
  1: 134988.0,
  2: 32000.0,
  3: 905000.0,
  4: 20267.0,
  5: 57600.0,
  6: 12500.0,
  7: 50173.0,
  8: 44704.0,
  9: 113800.0},
 'neighborhood': {0: 43,
  1: 48,
  2: 6,
  3: 44,
  4: 6,
  5: 40,
  6: 6,
  7: 28,
  8: 40,
  9: 56},
 'residential_units': {0: 0,
  1: 0,
  2: 0,
  3: 0,
  4: 0,
  5: 0,
  6: 0,
  7: 234,
  8: 0,
  9: 0},
 'sale_date': {0: 2257,
  1: 4839,
  2: 337,
  3: 638,
  4: 27,
  5: 1458,
  6: 2450,
  7: 3276,
  8: 5082,
  9: 1835},
 'sale_price': {0: 499401179.0,
  1: 345000000.0,
  2: 340000000.0,
  3: 276947000.0,
  4: 202500000.0,
  5: 185445000.0,
  6: 171000000.0,
  7: 169000000.0,
  8: 165000000.0,
  9: 161000000.0},
 'tax_class': {0: 3, 1: 3, 2: 3, 3: 3, 4: 3, 5: 3, 6: 3, 7: 7, 8: 3, 9: 3},
 'total_units': {0: 1,
  1: 0,
  2: 0,
  3: 123,
  4: 1,
  5: 0,
  6: 102,
  7: 237,
  8: 0,
  9: 1},
 'zip_code': {0: 11201,
  1: 11201,
  2: 11201,
  3: 11234,
  4: 11201,
  5: 11249,
  6: 11241,
  7: 11211,
  8: 11249,
  9: 11205}}
...