1.DataToNeo4jClass.py
# -*- coding: utf-8 -*-
from py2neo import Node, Graph, Relationship
class DataToNeo4j(object):
"""将excel中数据存入neo4j"""
# 初始化
def __init__(self):
"""建立连接"""
link = Graph("http://localhost:7474", username="neo4j", password="jhy042369")
self.graph = link
# 定义label
self.invoice_name = '名称'
self.invoice_value = '值'
self.graph.delete_all() # 从中删除所有节点和关系Graph。
# 创建实体(节点)
def create_node(self, node_list_key, node_list_value):
"""建立节点"""
for name in node_list_key:
name_node = Node(self.invoice_name, name=name)
self.graph.create(name_node)
for name in node_list_value:
value_node = Node(self.invoice_value, name=name)
self.graph.create(value_node) #建立实体
# 创建关系 df_data 就是实体1——关系——实体2的DataFrame
def create_relation(self, df_data):
"""建立联系"""
m = 0
for m in range(0, len(df_data)):
try:
#实体——关系——实体
rel = Relationship(self.graph.find_one(label=self.invoice_name, property_key='name',
property_value=df_data['name'][m]),
df_data['relation'][m],
self.graph.find_one(label=self.invoice_value, property_key='name',
property_value=df_data['name2'][m]))
self.graph.create(rel) #建立关系
except AttributeError as e:
print(e, m)
2.invoice_neo4j.py
# -*- coding: utf-8 -*-
from dataToNeo4jClass.DataToNeo4jClass import DataToNeo4j
import os
import pandas as pd
# 提取excel表格中数据,将其转换成dateframe类型
os.chdir('D:\\Example\\neo4j-python-pandas-py2neo-v3-master\\data')
#invoice_data = pd.read_excel('./Invoice_data_Demo.xls', header=0, encoding='utf8')
invoice_data = pd.read_excel('./Invoice_data_Demo.xls', header=0, encoding='utf8')
print(invoice_data)
#实体1放入一个列表中,实体2放入一个列表中
def data_extraction():
"""节点数据抽取"""
# 取出名称到list
node_list_key = []
for i in range(0, len(invoice_data)):
node_list_key.append(invoice_data['题名'][i])
# 去除重复的名称
node_list_key = list(set(node_list_key))
# value抽出作node
node_list_value = []
for i in range(0, len(invoice_data)):
for n in range(1, len(invoice_data.columns)):
# 取出表头名称invoice_data.columns[i] ,取出第一列第一行,第二列第一行....
node_list_value.append(invoice_data[invoice_data.columns[n]][i])
# 去重
node_list_value = list(set(node_list_value))
# 将list中浮点及整数类型全部转成string类型
node_list_value = [str(i) for i in node_list_value]
# print(node_list_value)
return node_list_key, node_list_value
#建立成实体1——关系——实体2的DataFrame的表形式
def relation_extraction():
"""联系数据抽取"""
links_dict = {} # 存放下面三个
name_list = [] # 存放实体
relation_list = [] # 存放关系
name2_list = [] # 存放实体
for i in range(0, len(invoice_data)):
m = 0
name_node = invoice_data[invoice_data.columns[m]][i]
while m < len(invoice_data.columns)-1:
relation_list.append(invoice_data.columns[m+1]) # 存放列名称
name2_list.append(invoice_data[invoice_data.columns[m+1]][i])
name_list.append(name_node)
m += 1
# 将数据中int类型全部转成string
name_list = [str(i) for i in name_list]
name2_list = [str(i) for i in name2_list]
# 整合数据,将三个list整合成一个dict
links_dict['name'] = name_list #实体
links_dict['relation'] = relation_list #关系(存放列名)
links_dict['name2'] = name2_list #实体
# 将数据转成DataFrame
df_data = pd.DataFrame(links_dict)
return df_data
# 实例化对象
data_extraction()
relation_extraction()
create_data = DataToNeo4j()
create_data.create_node(data_extraction()[0], data_extraction()[1]) # 创建第一个实体和第二个实体
create_data.create_relation(relation_extraction()) # 建立关系
print(relation_extraction())
requirements.txt
atomicwrites==1.2.1
attrs==18.2.0
backcall==0.1.0
certifi==2016.2.28
Click==7.0
colorama==0.4.0
decorator==4.3.0
ipykernel==5.1.0
ipython==7.1.1
ipython-genutils==0.2.0
jedi==0.13.1
jieba==0.39
jupyter-client==5.2.3
jupyter-console==6.0.0
jupyter-core==4.4.0
more-itertools==4.3.0
neo4j-driver==1.6.2
neobolt==1.7.0
neotime==1.7.1
numpy==1.15.3
pandas==0.23.4
parso==0.3.1
pickleshare==0.7.5
pluggy==0.8.0
prompt-toolkit==1.0.15
py==1.7.0
py2neo==3
Pygments==2.2.0
pytest==3.9.3
python-dateutil==2.7.5
pytz==2018.6
pyzmq==17.1.2
six==1.11.0
tornado==5.1.1
traitlets==4.3.2
urllib3==1.22
wcwidth==0.1.7
wincertstore==0.2
xlrd==1.1.0