Экспорт данных с Python на SQL Сервер - PullRequest
0 голосов
/ 27 марта 2020

Я пытаюсь вернуть SQL СЕРВЕР фрейм данных, но мне трудно это сделать. Со следующим кодом я получаю эту ошибку:

pyodb c .DataError: ('22008', '[22008] [Microsoft] [ODB C SQL Драйвер сервера] Превышение емкость поля datetime (0) (SQLExecDirectW) ')

Вот мой код до сих пор:

import pandas as pd
import pyodbc
import numpy as np

df = pd.read_excel(r'path.xlsx')

new_names = {"Calendar Date": "CALENDAR_DATE",
             "Origin ID": "ORIGIN_ID",
             "Dest ID": "DEST_ID",
             "Destination Name": "DESTINATION_NAME",
             "Destination City": "DESTINATION_CITY",
             "Destination State": "DESTINATION_STATE",
             "Carrier Name": "CARRIER_NAME",
             "Stop Number": "STOP_NUMBER",
             "Planned Arrival Time Start": "PLANNED_ARRIVAL_TIME_START",
             "Planned Arrival Time End": "PLANNED_ARRIVAL_TIME_END",
             "Delivery App't Time Start": "DELIVERY_APPT_TIME_START",
             "Delivery App't Time End": "DELIVERY_APPT_TIME_END",
             "Actual Delivery Departure Time": "ACTUAL_DELIVERY_DEPARTURE_TIME",
             "Reason Code and Description": "REASON_CODE_AND_DESCRIPTION",
             "Days Late Vs Plan": "DAYS_LATE_VS_PLAN",
             "Hrs Late Vs Plan": "HRS_LATE_VS_PLAN",
             "Days Late Vs Appt": "DAYS_LATE_VS_APPT",
             "Hrs Late Vs Appt": "HRS_LATE_VS_APPT"}

df.rename(columns=new_names, inplace=True)

conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=xxx;'
                      'Database=Business_Planning;'
                      'UID="xxx";'
                      'PWD="xxx";'
                      'Trusted_Connection=yes;')
cursor = conn.cursor()

SQL_Query = pd.read_sql_query('SELECT * FROM Business_Planning.dbo.OTD_1_DELIVERY_TRACKING_F_IMPORT', conn)

df2 = pd.DataFrame(SQL_Query, columns=["CALENDAR_DATE", "ORIGIN_ID", "DEST_ID", "DESTINATION_NAME", "DESTINATION_CITY",
                                       "DESTINATION_STATE", "SCAC", "CARRIER_NAME", "SID", "STOP_NUMBER",
                                       "PLANNED_ARRIVAL_TIME_START", "PLANNED_ARRIVAL_TIME_END",
                                       "DELIVERY_APPT_TIME_START", "DELIVERY_APPT_TIME_END",
                                       "ACTUAL_DELIVERY_DEPARTURE_TIME", "REASON_CODE_AND_DESCRIPTION",
                                       "DAYS_LATE_VS_PLAN", "HRS_LATE_VS_PLAN", "DAYS_LATE_VS_APPT",
                                       "HRS_LATE_VS_APPT"])

df3 = pd.concat([df2, df]).drop_duplicates(["SID", "STOP_NUMBER", "PLANNED_ARRIVAL_TIME_START"],
                                           keep='last').sort_values(
                                           ["SID", "STOP_NUMBER", "PLANNED_ARRIVAL_TIME_START"])
df3['SID'].replace('', np.nan, inplace=True)
df3.dropna(subset=['SID'], inplace=True)

conn.execute('TRUNCATE TABLE Business_Planning.dbo.OTD_1_DELIVERY_TRACKING_F_IMPORT')

for index, row in df3.iterrows():
    conn.execute(
        "INSERT INTO OTD_1_DELIVERY_TRACKING_F_IMPORT([CALENDAR_DATE], [ORIGIN_ID], [DEST_ID], [DESTINATION_NAME], "
        "[DESTINATION_CITY], [DESTINATION_STATE], [SCAC], [CARRIER_NAME], [SID], [STOP_NUMBER], "
        "[PLANNED_ARRIVAL_TIME_START], [PLANNED_ARRIVAL_TIME_END], [DELIVERY_APPT_TIME_START], "
        "[DELIVERY_APPT_TIME_END], [ACTUAL_DELIVERY_DEPARTURE_TIME], [REASON_CODE_AND_DESCRIPTION], "
        "[DAYS_LATE_VS_PLAN], [HRS_LATE_VS_PLAN], [DAYS_LATE_VS_APPT], [HRS_LATE_VS_APPT]) "
        "values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
        row['CALENDAR_DATE'],
        row['ORIGIN_ID'],
        row['DEST_ID'],
        row['DESTINATION_NAME'],
        row['DESTINATION_CITY'],
        row['DESTINATION_STATE'],
        row['SCAC'],
        row['CARRIER_NAME'],
        row['SID'],
        row['STOP_NUMBER'],
        row['PLANNED_ARRIVAL_TIME_START'],
        row['PLANNED_ARRIVAL_TIME_END'],
        row['DELIVERY_APPT_TIME_START'],
        row['DELIVERY_APPT_TIME_END'],
        row['ACTUAL_DELIVERY_DEPARTURE_TIME'],
        row['REASON_CODE_AND_DESCRIPTION'],
        row['DAYS_LATE_VS_PLAN'],
        row['HRS_LATE_VS_PLAN'],
        row['DAYS_LATE_VS_APPT'],
        row['HRS_LATE_VS_APPT'])
    conn.commit()
conn.commit()
conn.close()

Ошибка исходит от этой части:

for index, row in df3.iterrows():
    conn.execute(
        "INSERT INTO OTD_1_DELIVERY_TRACKING_F_IMPORT([CALENDAR_DATE], [ORIGIN_ID], [DEST_ID], [DESTINATION_NAME], "
        "[DESTINATION_CITY], [DESTINATION_STATE], [SCAC], [CARRIER_NAME], [SID], [STOP_NUMBER], "
        "[PLANNED_ARRIVAL_TIME_START], [PLANNED_ARRIVAL_TIME_END], [DELIVERY_APPT_TIME_START], "
        "[DELIVERY_APPT_TIME_END], [ACTUAL_DELIVERY_DEPARTURE_TIME], [REASON_CODE_AND_DESCRIPTION], "
        "[DAYS_LATE_VS_PLAN], [HRS_LATE_VS_PLAN], [DAYS_LATE_VS_APPT], [HRS_LATE_VS_APPT]) "
        "values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
        row['CALENDAR_DATE'],
        row['ORIGIN_ID'],
        row['DEST_ID'],
        row['DESTINATION_NAME'],
        row['DESTINATION_CITY'],
        row['DESTINATION_STATE'],
        row['SCAC'],
        row['CARRIER_NAME'],
        row['SID'],
        row['STOP_NUMBER'],
        row['PLANNED_ARRIVAL_TIME_START'],
        row['PLANNED_ARRIVAL_TIME_END'],
        row['DELIVERY_APPT_TIME_START'],
        row['DELIVERY_APPT_TIME_END'],
        row['ACTUAL_DELIVERY_DEPARTURE_TIME'],
        row['REASON_CODE_AND_DESCRIPTION'],
        row['DAYS_LATE_VS_PLAN'],
        row['HRS_LATE_VS_PLAN'],
        row['DAYS_LATE_VS_APPT'],
        row['HRS_LATE_VS_APPT'])
    conn.commit()

В перечисленных полях представлены все столбцы из df3. Я не могу понять это правильно, у кого-нибудь есть подсказка?

1 Ответ

1 голос
/ 27 марта 2020

Я настоятельно советую вам использовать метод to_sql() pandas .DataFrame. Кроме того, используйте sqlalchemy lib для подключения к вашей базе данных. Используйте этот пример:

import pyodbc
import sqlalchemy

engine = sqlalchemy.create_engine('mssql+pyodbc://{0}:{1}@{2}:1433/{3}?driver=ODBC+Driver+{4}+for+SQL+Server'.format(username,password,server,bdName,driverVersion))
pd.to_sql("TableName",con=engine,if_exists="append")
...