Я студент, делающий хранилище данных для одного из моих предметов, и мне трудно обойти некоторые ошибки Python (не совсем мой лучший язык). Я читаю данные из файла .csv
, создаю локальную базу данных и сохраняю данные. У меня есть несколько дочерних таблиц, которые содержат FK их соответствующих родителей. Например: Категория, Подкатегория и Продукт отлично работает и вводит данные в мою БД. Но Маркет, Регион и Страна этого не делают, хотя код буквально одинаков ... и поэтому мне нужна ваша помощь!
Вот фрагмент кода.
// IMPORTS
import pymysql
import pandas as pd
import numpy as np
import json
import requests
import random
from sqlalchemy import create_engine
// READ FILE
CSV_FILE_PATH = r"C:\Users\Timon\SPI/Superstore.csv"
df = pd.read_csv(CSV_FILE_PATH, delimiter=',', encoding= 'unicode_escape')
print("CSV size: ", df.shape)
// DB DATA
user = 'root'
passw = '**************'
host = 'localhost'
port = 3306
database = '**************'
// CONNECT TO DB
mydb = create_engine('mysql+pymysql://' + user + ':' + passw + '@' + host + ':' + str(port) + '/' + database , echo=False)
connection = mydb.connect()
// CREATING DB TABLES
product_category_ddl = "CREATE TABLE superstore.product_category (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, name VARCHAR(45) NOT NULL, UNIQUE INDEX id_UNIQUE (id ASC), UNIQUE INDEX name_UNIQUE (name ASC));"
connection.execute(product_category_ddl)
product_sub_category_ddl = "CREATE TABLE superstore.product_sub_category (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, name VARCHAR(45) NOT NULL, product_category_fk INT NOT NULL, UNIQUE INDEX id_UNIQUE (id ASC), UNIQUE INDEX name_UNIQUE (name ASC), CONSTRAINT product_category_id FOREIGN KEY (product_category_fk) REFERENCES superstore.product_category (id) ON DELETE NO ACTION ON UPDATE CASCADE);"
connection.execute(product_sub_category_ddl)
product_ddl = "CREATE TABLE superstore.product (id INT NOT NULL PRIMARY KEY, name VARCHAR(200) NOT NULL, product_sub_category_fk INT NOT NULL, UNIQUE INDEX id_UNIQUE (id ASC), UNIQUE INDEX name_UNIQUE (name ASC), CONSTRAINT product_sub_category_fk FOREIGN KEY (product_sub_category_fk) REFERENCES superstore.product_sub_category (id) ON DELETE NO ACTION ON UPDATE CASCADE);"
connection.execute(product_ddl)
segment_ddl = "CREATE TABLE superstore.segment (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, name VARCHAR(45) NOT NULL, UNIQUE INDEX id_UNIQUE (id ASC), UNIQUE INDEX name_UNIQUE (name ASC));"
connection.execute(segment_ddl)
market_ddl = "CREATE TABLE superstore.market (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, name VARCHAR(45) NOT NULL, UNIQUE INDEX id_UNIQUE (id ASC), UNIQUE INDEX name_UNIQUE (name ASC));"
connection.execute(market_ddl)
region_ddl = "CREATE TABLE superstore.region (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, name VARCHAR(45) NOT NULL, market_fk INT NOT NULL, UNIQUE INDEX id_UNIQUE (id ASC), UNIQUE INDEX name_UNIQUE (name ASC), CONSTRAINT market_id FOREIGN KEY (market_fk) REFERENCES superstore.market (id) ON DELETE NO ACTION ON UPDATE CASCADE);"
connection.execute(region_ddl)
country_ddl = "CREATE TABLE superstore.country (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, name VARCHAR(45) NOT NULL, region_fk INT NOT NULL, UNIQUE INDEX id_UNIQUE (id ASC), UNIQUE INDEX name_UNIQUE (name ASC), CONSTRAINT region_id FOREIGN KEY (region_fk) REFERENCES superstore.region (id) ON DELETE NO ACTION ON UPDATE CASCADE)"
connection.execute(country_ddl)
state_ddl = "CREATE TABLE superstore.state (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, name VARCHAR(45) NOT NULL, country_fk INT NOT NULL, UNIQUE INDEX id_UNIQUE (id ASC), UNIQUE INDEX name_UNIQUE (name ASC), CONSTRAINT country_id FOREIGN KEY (country_fk) REFERENCES superstore.country (id) ON DELETE NO ACTION ON UPDATE CASCADE);"
connection.execute(state_ddl)
city_ddl = "CREATE TABLE superstore.city (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, name VARCHAR(45) NOT NULL, state_fk INT NOT NULL, UNIQUE INDEX id_UNIQUE (id ASC), UNIQUE INDEX name_UNIQUE (name ASC), CONSTRAINT state_id FOREIGN KEY (state_fk) REFERENCES superstore.state (id) ON DELETE NO ACTION ON UPDATE CASCADE);"
connection.execute(city_ddl)
ship_mode_ddl = "CREATE TABLE superstore.ship_mode (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, name VARCHAR(45) NOT NULL, UNIQUE INDEX id_UNIQUE (id ASC), UNIQUE INDEX name_UNIQUE (name ASC));"
connection.execute(ship_mode_ddl)
customer_ddl = "CREATE TABLE superstore.customer (id INT NOT NULL PRIMARY KEY, name VARCHAR(45) NOT NULL, UNIQUE INDEX id_UNIQUE (id ASC));"
connection.execute(customer_ddl)
order_priority_ddl = "CREATE TABLE superstore.order_priority (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, name VARCHAR(45) NOT NULL, UNIQUE INDEX id_UNIQUE (id ASC), UNIQUE INDEX name_UNIQUE (name ASC));"
connection.execute(order_priority_ddl)
order_ddl = "CREATE TABLE superstore.order (id INT NOT NULL PRIMARY KEY, order_date DATE NOT NULL, ship_date DATE NOT NULL, shipping_price DECIMAL NOT NULL, sales DECIMAL NOT NULL, quantity INT NOT NULL, discount DECIMAL NULL, profit DECIMAL NOT NULL, ship_mode_fk INT NOT NULL, order_priority_fk INT NOT NULL, customer_fk INT NOT NULL, city_fk INT NOT NULL, segment_fk INT NOT NULL, product_fk INT NOT NULL, UNIQUE INDEX id_UNIQUE (id ASC), INDEX ship_mode_id_idx (ship_mode_fk ASC), INDEX order_priority_id_idx (order_priority_fk ASC), INDEX customer_id_idx (customer_fk ASC), INDEX city_id_idx (city_fk ASC), INDEX segment_id_idx (segment_fk ASC), INDEX product_id_idx (product_fk ASC), CONSTRAINT ship_mode_id FOREIGN KEY (ship_mode_fk) REFERENCES superstore.ship_mode (id) ON DELETE NO ACTION ON UPDATE CASCADE, CONSTRAINT order_priority_id FOREIGN KEY (order_priority_fk) REFERENCES superstore.order_priority (id) ON DELETE NO ACTION ON UPDATE CASCADE, CONSTRAINT customer_id FOREIGN KEY (customer_fk) REFERENCES superstore.customer (id) ON DELETE NO ACTION ON UPDATE CASCADE, CONSTRAINT city_id FOREIGN KEY (city_fk) REFERENCES superstore.city (id) ON DELETE NO ACTION ON UPDATE CASCADE, CONSTRAINT segment_id FOREIGN KEY (segment_fk) REFERENCES superstore.segment (id) ON DELETE NO ACTION ON UPDATE CASCADE, CONSTRAINT product_id FOREIGN KEY (product_fk) REFERENCES superstore.product (id) ON DELETE NO ACTION ON UPDATE CASCADE);"
connection.execute(order_ddl)
// LOADING DATA INTO DB
product_category_names = df['Category'].unique().tolist()
product_category_data = pd.DataFrame({'id': list(range(1, len(product_category_names) + 1)), 'name': product_category_names})
product_category_data.to_sql(con=mydb, name='product_category', if_exists='append', index=False)
product_sub_category_names = df['Sub-Category'].unique().tolist()
product_category_fk = []
for product_sub_category in product_sub_category_names:
product_category = df['Category'].loc[df['Sub-Category'] == product_sub_category].unique()
product_category_fk.append(int(product_category_data['id'].loc[product_category_data['name'].str.contains(product_category[0])]))
product_sub_category_data = pd.DataFrame({'id': list(range(1, len(product_sub_category_names) + 1)), 'name': product_sub_category_names, 'product_category_fk': product_category_fk})
product_sub_category_data.to_sql(con=mydb, name='product_sub_category', if_exists='append', index=False)
product_names = df['Product Name'].unique().tolist()
product_sub_category_fk = []
for product in product_names:
product_sub_category = df['Sub-Category'].loc[df['Product Name'] == product].unique()
product_sub_category_fk.append(int(product_sub_category_data['id'].loc[product_sub_category_data['name'].str.contains(product_sub_category[0])]))
product_data = pd.DataFrame({'id': list(range(1, len(product_names) + 1)), 'name': product_names, 'product_sub_category_fk': product_sub_category_fk})
product_data.to_sql(con=mydb, name='product', if_exists='append', index=False)
segment_names = df['Segment'].unique().tolist()
segment_data = pd.DataFrame({'id': list(range(1, len(segment_names) + 1)), 'name': segment_names})
segment_data.to_sql(con=mydb, name='segment', if_exists='append', index=False)
market_names = df['Market'].unique().tolist()
market_data = pd.DataFrame({'id': list(range(1, len(market_names) + 1)), 'name': market_names})
market_data.to_sql(con=mydb, name='market', if_exists='append', index=False)
region_names = df['Region'].unique().tolist()
market_fk = []
for region in region_names:
market = df['Market'].loc[df['Region'] == region].unique()
market_fk.append(int(market_data['id'].loc[market_data['name'].str.contains(market[0])]))
region_data = pd.DataFrame({'id': list(range(1, len(region_names) + 1)), 'name': region_names, 'market_fk': market_fk})
region_data.to_sql(con=mydb, name='region', if_exists='append', index=False)
country_names = df['Country'].unique().tolist()
region_fk = []
for country in country_names:
region = df['Region'].loc[df['Country'] == country].unique()
// **The line of code below this one causes the error to happen: TypeError: cannot convert the series to <class 'int'>**
region_fk.append(int(region_data['id'].loc[region_data['name'].str.contains(region[0])]))
country_data = pd.DataFrame({'id': list(range(1, len(country_names) + 1)), 'name': country_names, 'region_fk': region_fk})
country_data.to_sql(con=mydb, name='country', if_exists='append', index=False)
state_names = df['State'].unique().tolist()
country_fk = []
for state in state_names:
country = df['Country'].loc[df['State'] == state].unique()
country_fk.append(int(country_data['id'].loc[country_data['name'].str.contains(country[0])]))
state_data = pd.DataFrame({'id': list(range(1, len(state_names) + 1)), 'name': state_names, 'country_fk': country_fk})
state_data.to_sql(con=mydb, name='state', if_exists='append', index=False)
city_names = df['City'].unique().tolist()
state_fk = []
for city in city_names:
state = df['State'].loc[df['City'] == city].unique()
state_fk.append(int(state_data['id'].loc[state_data['name'].str.contains(state[0])]))
city_data = pd.DataFrame({'id': list(range(1, len(city_names) + 1)), 'name': city_names, 'state_fk': state_fk})
city_data.to_sql(con=mydb, name='city', if_exists='append', index=False)
ship_mode_names = df['Ship Mode'].unique().tolist()
ship_mode_data = pd.DataFrame({'id': list(range(1, len(ship_mode_names) + 1)), 'name': ship_mode_names})
ship_mode_data.to_sql(con=mydb, name='ship_mode', if_exists='append', index=False)
customer_names = df['Customer Name'].unique().tolist()
customer_data = pd.DataFrame({'id': list(range(1, len(customer_names) + 1)), 'name': customer_names})
customer_data.to_sql(con=mydb, name='customer', if_exists='append', index=False)
order_priority_names = df['Order Priority'].unique().tolist()
order_priority_data = pd.DataFrame({'id': list(range(1, len(order_priority_names) + 1)), 'name': order_priority_names})
order_priority_data.to_sql(con=mydb, name='order_priority', if_exists='append', index=False)
Ошибка возникает в части, когда я загружаю данные страны в БД. В частности, вторая строка в for l oop вызывает его и говорит, что не может преобразовать тип класса в int
. Код идентичен коду с продуктами, и он работал там ... Я думал, что проблема может быть с данными, но я проверил все это, и он выглядит просто отлично (идентично продуктам с точки зрения типов). Я пытался удалить AUTO_INCREMENTS
и пытался использовать .astype(int)
для каждой части этого кода.
Файл CSV загружен ЗДЕСЬ.
Итак как мне это сделать?