Вы можете загрузить его как SQL в памяти или использовать панд.
Предположим, у нас есть два CSV-файла, как вы определили выше:
/tmp/x.csv
"Atomic symbol","Atomic number"
"H",1
"He",2
"Be",4
"Si",14
"Fe",26
"U",92
"Pu",94
и /tmp/y.csv
"Atomic symbol","Name","Hazard"
"H","Hydrogen","ignition, combustion"
"Be","Beryllium","dust is toxic"
"As","Arsenic","toxic"
"Pu","Plutonium","dust is toxic"
Панда:
import pandas as pd
pd.set_option('display.max_columns', 100)
x = pd.read_csv('/tmp/x.csv')
y = pd.read_csv('/tmp/y.csv')
result = pd.merge(x, y, on=['Atomic symbol'], how='outer')
print(x)
print(y)
print(result)
→
Atomic symbol Atomic number
0 H 1
1 He 2
...
Atomic symbol Name Hazard
0 H Hydrogen ignition, combustion
1 Be Beryllium dust is toxic
2 As Arsenic toxic
...
Atomic symbol Atomic number Name Hazard
0 H 1.0 Hydrogen ignition, combustion
1 He 2.0 NaN NaN
2 Be 4.0 Beryllium dust is toxic
...
sql в памяти:
import csv, sqlite3
connection = sqlite3.connect(":memory:")
def load_into_table(con, table_name, file_name):
with open(file_name) as f:
dr = csv.DictReader(f)
fields = ', '.join(['`{}`'.format(f) for f in dr.fieldnames])
values = ', '.join(['?' for _ in dr.fieldnames])
query = "CREATE TABLE {table_name} ({fields});".format(table_name=table_name, fields=fields)
con.execute(query)
to_db = [list(i.values()) for i in dr]
insert_query = "INSERT INTO {table_name} VALUES ({values});".format(table_name=table_name, fields=fields, values=values)
con.executemany(insert_query, to_db)
con.commit()
load_into_table(con=connection, table_name='x', file_name='/tmp/x.csv')
load_into_table(con=connection, table_name='y', file_name='/tmp/y.csv')
print(connection.execute('SELECT * FROM x').fetchall())
print(connection.execute('SELECT * FROM y').fetchall())
print(connection.execute('SELECT * FROM x LEFT JOIN y ON x.`Atomic symbol` = y.`Atomic symbol`; ').fetchall())
→
[('H', '1'), ('He', '2'), ('Be', '4'), ...]
[('H', 'Hydrogen', 'ignition, combustion'), ('Be', 'Beryllium', 'dust is toxic'), ...]
[('H', '1', 'H', 'Hydrogen', 'ignition, combustion'), ('He', '2', None, None, None), ...]
Примечание. SQLite не поддерживает внешнее объединение.
Вы можете подражать ему: http://www.sqlitetutorial.net/sqlite-full-outer-join/