基于SQLite,在Pandas DataFrames数据集中执行SQL操作

在数据探查、分析时,经常会将CSV,XML或JSON等文件加载到 pandas DataFrame中,
但DataFrame不能直接使用SQL进行分析

本文提供两种示例,将DataFrame写入sqllite, 然后再进行SQL分析

基于sqlalchemy实现

import pandas as pd
import sqlite3
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

data = {
			'product_name': ['Computer','Tablet','Monitor','Printer'],
        	'price': [900,300,450,150]
        }

df = pd.DataFrame(data, columns= ['product_name','price'])

engine = create_engine('sqlite:///test.db', echo=False)


df.to_sql('products', engine.connect(),if_exists='replace', index = False)

# 基于pandas查询数据
pd.read_sql("SELECT * FROM products WHERE price = (SELECT max(price) FROM products)",engine)

#基于sqlalchemy查询数据
Session = sessionmaker(bind=engine)
db_session = Session()
for row in db_session.execute('select * from products'):
    print(row)

基于sqlite3实现

import pandas as pd
import sqlite3

def query_sql(conn,sql):
	c=conn.cursor()
	c.execute(sql)
	return c.fetchall()
def execute_sql(conn,sql):
	c=conn.cursor()
	c.execute(sql)
	conn.commit()

conn = sqlite3.connect('test_database')
execute_sql(conn,'CREATE TABLE IF NOT EXISTS products (product_name text, price number)')

data = {
			'product_name': ['Computer','Tablet','Monitor','Printer'],
        	'price': [900,300,450,150]
        }

df = pd.DataFrame(data, columns= ['product_name','price'])
df.to_sql('products', conn, if_exists='replace', index = False)

df = pd.DataFrame(query_sql(conn,'SELECT * FROM products WHERE price = (SELECT max(price) FROM products)'), columns=['product_name','price'])    
print (df)
上一篇:备忘录:关于.net程序连接Oracle数据库


下一篇:【pwn学习】格式化字符漏洞