在数据探查、分析时,经常会将CSV,XML或JSON等文件加载到 pandas DataFrame中,
但DataFrame不能直接使用SQL进行分析
本文提供两种示例,将DataFrame写入sqllite, 然后再进行SQL分析
基于sqlalchemy实现
import pandas as pd
import sqlite3
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
data = {
'product_name': ['Computer','Tablet','Monitor','Printer'],
'price': [900,300,450,150]
}
df = pd.DataFrame(data, columns= ['product_name','price'])
engine = create_engine('sqlite:///test.db', echo=False)
df.to_sql('products', engine.connect(),if_exists='replace', index = False)
# 基于pandas查询数据
pd.read_sql("SELECT * FROM products WHERE price = (SELECT max(price) FROM products)",engine)
#基于sqlalchemy查询数据
Session = sessionmaker(bind=engine)
db_session = Session()
for row in db_session.execute('select * from products'):
print(row)
基于sqlite3实现
import pandas as pd
import sqlite3
def query_sql(conn,sql):
c=conn.cursor()
c.execute(sql)
return c.fetchall()
def execute_sql(conn,sql):
c=conn.cursor()
c.execute(sql)
conn.commit()
conn = sqlite3.connect('test_database')
execute_sql(conn,'CREATE TABLE IF NOT EXISTS products (product_name text, price number)')
data = {
'product_name': ['Computer','Tablet','Monitor','Printer'],
'price': [900,300,450,150]
}
df = pd.DataFrame(data, columns= ['product_name','price'])
df.to_sql('products', conn, if_exists='replace', index = False)
df = pd.DataFrame(query_sql(conn,'SELECT * FROM products WHERE price = (SELECT max(price) FROM products)'), columns=['product_name','price'])
print (df)