Lucene的使用方法与Luke工具(2)-第2章 Lucene快速入门

2.1 项目搭建

2.1.1 SQL语句

SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;

-- ----------------------------
-- Table structure for goods
-- ----------------------------
DROP TABLE IF EXISTS `goods`;
CREATE TABLE `goods`  (
  `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '主键',
  `name` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '商品名称',
  `title` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '标题',
  `price` decimal(10, 2) NULL DEFAULT NULL COMMENT '价格',
  `pic` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '图片',
  PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 29 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;

-- ----------------------------
-- Records of goods
-- ----------------------------
INSERT INTO `goods` VALUES (1, '华为nova 5i Pro', '华为 HUAWEI nova 5i Pro 前置3200万人像超级夜景4800万AI四摄极点全面屏6GB+128GB翡冷翠全网通双4G手机', 2199.00, 'https://img12.360buyimg.com/n5/jfs/t1/57784/26/5843/534057/5d39087fEb9cd66b7/d66c941633b410dd.jpg');
INSERT INTO `goods` VALUES (2, '华为荣耀20', ' 荣耀20 4800万超广角AI四摄 3200W美颜自拍 麒麟Kirin980全网通版8GB+128GB 冰岛白 移动联通电信4G全面屏', 2699.00, 'https://img12.360buyimg.com/n5/s54x54_jfs/t28603/102/1236695962/227407/29d12d49/5ce41500N146e357e.jpg');
INSERT INTO `goods` VALUES (3, '华为 HUAWEI nova 5 Pro', '华为 HUAWEI nova 5 Pro 前置3200万人像超级夜景4800万AI四摄麒麟980芯片8GB+128GB绮境森林全网通双4G手机', 2999.00, 'https://img13.360buyimg.com/n5/s54x54_jfs/t1/47193/2/3369/231278/5d11cb39Ef3674059/ba3c0a1d956429e2.jpg');
INSERT INTO `goods` VALUES (4, '小米 CC 9e', '小米 CC 9e屏幕指纹 3200万美颜自拍 4800万超广角三摄 6GB+64GB 深蓝星球 全网通 水滴全面屏游戏拍照手机', 1349.00, 'https://img10.360buyimg.com/n5/s54x54_jfs/t1/40290/27/11774/242367/5d2d4b7dE2e3fac58/4006d5155fec96cd.jpg');
INSERT INTO `goods` VALUES (5, '小米红米Note7', '小米 红米Redmi Note7 幻彩渐变AI双摄 6GB+64GB 梦幻蓝 全网通4G 双卡双待 水滴全面屏拍照游戏智能手机', 1299.00, 'https://img11.360buyimg.com/n5/s54x54_jfs/t1/25067/14/4953/146200/5c371c8dE0999a312/d44bdb1c5e5f709a.jpg');
INSERT INTO `goods` VALUES (6, '小米 红米Redmi Note7Pro', '小米 红米Redmi Note7Pro AI双摄 6GB+128GB 梦幻蓝 全网通4G 双卡双待 水滴屏拍照游戏手机', 1499.00, 'https://img10.360buyimg.com/n5/s54x54_jfs/t1/22746/35/11162/104085/5c8b6a19Eb8e8f34e/9cd57e3a481c7160.jpg');
INSERT INTO `goods` VALUES (7, '小米9 SE', '小米9 SE 4800万超广角三摄 骁龙712 水滴全面屏 游戏智能拍照手机 8GB+128GB 全息幻彩蓝 全网通4G双卡双待', 2249.00, 'https://img12.360buyimg.com/n5/s54x54_jfs/t1/42543/32/5265/77884/5cebcaf1Ec3ac2ab6/1fd1f6fab7811447.jpg');
INSERT INTO `goods` VALUES (8, '魅族 16s', '魅族 16s 骁龙855全面屏拍照游戏手机 6GB+128GB 碳纤黑 全网通移动联通电信4G手机 双卡双待', 2699.00, 'https://img13.360buyimg.com/n5/s54x54_jfs/t1/32804/25/14881/277224/5cbf0a08Ecfe815f9/2351cebef4cbe443.jpg');
INSERT INTO `goods` VALUES (9, '魅族16Xs', '魅族16Xs 全面屏三摄拍照游戏手机 6GB+128GB骑士黑 4000mAh大电池全网通移动联通电信4G智能手机 双卡双待', 1699.00, 'https://img14.360buyimg.com/n5/s54x54_jfs/t1/47393/33/1179/77607/5cef4f50E2e0fd9e6/2835154527d724e7.jpg');
INSERT INTO `goods` VALUES (10, '魅族 Note8', '魅族 Note8 全面屏手机 4GB+64GB 曜黑 全网通移动联通电信4G手机 双卡双待', 899.00, 'https://img12.360buyimg.com/n5/s54x54_jfs/t1/2481/15/12216/274259/5bd1af8bE2de8c15f/c56a6788061f4d46.jpg');
INSERT INTO `goods` VALUES (11, '神舟战神', '神舟(HASEE)战神Z7-CT5NA 英特尔酷睿i5-9300H GTX1660Ti 6G独显15.6英寸窄边框游戏笔记本电脑(8G 512G SSD)', 5989.00, 'https://img12.360buyimg.com/n5/s54x54_jfs/t28834/286/1327928640/227342/e6558c29/5cdd0695Nb1405cc3.jpg');
INSERT INTO `goods` VALUES (12, '神舟精盾', '神舟(HASEE)精盾U45A1畅玩版 英特尔酷睿i5-8265U 14英寸窄边框轻薄笔记本电脑8G 512GPCIe SSD MX250 2G IPS', 3989.00, 'https://img10.360buyimg.com/n5/s54x54_jfs/t1/53197/35/2666/156443/5d07d3f8E3b647e9a/3aa8dcb79c66c33d.jpg');
INSERT INTO `goods` VALUES (13, '神舟(HASEE) 优雅XS-3000S1/X4-KL7S1 I7-7500U X3G1商务本 X1 X4-KL7S1I7-7500U/16/256+1', '神舟(HASEE) 优雅XS-3000S1/X4-KL7S1 I7-7500U X3G1商务本 X1 X4-KL7S1I7-7500U/16/256+1', 5199.00, 'https://img12.360buyimg.com/n5/jfs/t1/44399/21/3002/73822/5ccd2f9aE69ddb0b9/c6dbb67d5aa0ce3a.jpg');
INSERT INTO `goods` VALUES (14, '神舟战神', '神舟(HASEE)战神G8-CT7NK 英特尔酷睿i7-9750H RTX2060 72%色域144Hz17.3英寸游戏笔记本电脑16G 256GSSD+1T', 8489.00, 'https://img11.360buyimg.com/n5/s54x54_jfs/t29797/23/1432190332/154288/3b194518/5cdf7017N86940fe7.jpg');
INSERT INTO `goods` VALUES (15, '神舟战神Z7M', '神舟(HASEE)战神Z7M-KP7GZ 英特尔酷睿i7-8750H GTX1050Ti 15.6英寸72%色域商务设计师游戏本(8G 128G+1T)', 5389.00, 'https://img11.360buyimg.com/n5/s54x54_jfs/t1/32189/20/11013/145627/5cb40149E7b953a44/4e64910f84c4648e.jpg');
INSERT INTO `goods` VALUES (16, '华为MateBook X Pro', '华为HUAWEI MateBook X Pro 2019款 英特尔酷睿i7 13.9英寸全面屏轻薄笔记本(i7 16G 1T MX250 3K触控) 灰', 13999.00, 'https://img12.360buyimg.com/n5/s54x54_jfs/t1/38530/28/1693/191138/5cbdc69aE65d57b0e/7748fdd0fcd0563f.jpg');
INSERT INTO `goods` VALUES (17, '华为MateBook 14', '华为HUAWEI MateBook 14 全面屏轻薄性能笔记本电脑(英特尔酷睿i5 8G 512G MX250 office 2K 一碰传)灰', 5999.00, 'https://img10.360buyimg.com/n5/jfs/t1/16018/40/13941/131110/5ca3286cE7bbb1c23/2c0a1de00945af08.jpg');
INSERT INTO `goods` VALUES (18, '小米pro', '小米(MI)Pro 2019款 15.6英寸金属轻薄笔记本(第八代英特尔酷睿i7-8550U 16G 512GSSD MX250 2G独显) 深空灰', 7199.00, 'https://img11.360buyimg.com/n5/s54x54_jfs/t1/83950/36/557/45350/5cebfa41E02cee56c/8bc3f60029d6c5a0.jpg');
INSERT INTO `goods` VALUES (19, '小米(MI)RedmiBook 14英寸MX250 2G独显 全金属超轻薄便携红米游戏本笔记本电脑 i5-8265U 8G 512G MX250独显', '小米(MI)RedmiBook 14英寸MX250 2G独显 全金属超轻薄便携红米游戏本笔记本电脑 i5-8265U 8G 512G MX250独显', 3999.00, 'https://img11.360buyimg.com/n5/jfs/t1/63604/40/808/33300/5cf087e6E4edd35f3/24a842fd24539ef4.jpg');
INSERT INTO `goods` VALUES (20, '联想小新Air14英寸轻薄超极笔记本电脑满血版四核办公游戏本 i7-8565 20G 1TB MX250 定制银 正版win10+office', '联想小新Air14英寸轻薄超极笔记本电脑满血版四核办公游戏本 i7-8565 20G 1TB MX250 定制银 正版win10+office', 7099.00, 'https://img14.360buyimg.com/n5/jfs/t1/82062/36/2718/34167/5d0f2cacE0586f769/e70326356821455f.jpg');
INSERT INTO `goods` VALUES (21, '联想(Lenovo)小新air', '联想轻薄本小新Air14超薄笔记本电脑窄边框镜面屏带指纹背光学生办公设计独显超极本air pro升级 15.6英寸尊贵银 镜面屏 定制i7-8550U 16G 1T固态', 8999.00, 'https://img10.360buyimg.com/n5/jfs/t1/31412/29/6259/54365/5c8cbd9eEfa4efb6f/fa51739fc476d503.jpg');
INSERT INTO `goods` VALUES (22, '华硕(ASUS)灵耀Deluxe14 14.0英寸 92%全面屏 轻薄笔记本电脑超薄笔记本 冰钻银 i7-8565U 8G 512G固态 92%屏占比', '华硕(ASUS)灵耀Deluxe14 14.0英寸 92%全面屏 轻薄笔记本电脑超薄笔记本 冰钻银 i7-8565U 8G 512G固态 92%屏占比', 7599.00, 'https://img10.360buyimg.com/n5/jfs/t1/14412/14/13290/64393/5c9e39e1E725e62ea/ff96d311347a88d6.jpg');
INSERT INTO `goods` VALUES (23, '华为荣耀笔记本MagicBook 2019超薄本14英寸轻薄本学生商务办公便携手提笔记本电脑超极本 2019版R7+8G+512G PCIE银', '华为荣耀笔记本MagicBook 2019超薄本14英寸轻薄本学生商务办公便携手提笔记本电脑超极本 2019版R7+8G+512G PCIE银', 4999.00, 'https://img13.360buyimg.com/n5/jfs/t1/27680/33/15329/41379/5cb05173E9805dccb/eb8ae8ff323e2020.jpg');
INSERT INTO `goods` VALUES (24, '惠普(HP)星14青春版 笔记本超轻薄微窄边框锐龙R7游戏学生本金属商务办公手提电脑【官方新品】 星空银:R7-3700U【14寸 IPS高清屏】 配置五:8G/512G固态+1T', '惠普(HP)星14青春版 笔记本超轻薄微窄边框锐龙R7游戏学生本金属商务办公手提电脑【官方新品】 星空银:R7-3700U【14寸 IPS高清屏】 配置五:8G/512G固态+1T', 4699.00, 'https://img14.360buyimg.com/n5/jfs/t1/19237/21/13795/81184/5ca21fb5E5e21d713/c23f316c1f24db77.jpg');
INSERT INTO `goods` VALUES (25, 'ThinkPad联想游侠E485(0HCD) 14英寸轻薄商务办公锐龙7笔记本 升配(16G 256G固态+1T双硬盘 R7-2700u FHD屏 office)', 'ThinkPad联想游侠E485(0HCD) 14英寸轻薄商务办公锐龙7笔记本 升配(16G 256G固态+1T双硬盘 R7-2700u FHD屏 office)', 6999.00, 'https://img14.360buyimg.com/n5/jfs/t1/15146/30/9562/83903/5c80d3f8Eb1cee110/720a999eedd964a2.jpg');
INSERT INTO `goods` VALUES (26, '戴尔G3', '戴尔DELL游匣G3 15.6英寸轻薄游戏笔记本电脑(九代i7-9750H 8G双通道 128GSSD 1T GTX1660TiMax-Q 6G 72色域)', 7099.00, 'https://img12.360buyimg.com/n5/s54x54_jfs/t1/71785/30/5471/166703/5d39293bEd8fce664/48f30488a9ec0ad4.jpg');
INSERT INTO `goods` VALUES (27, '机械革命轻薄游戏笔记本电脑', '机械革命(MECHREVO)Z2 Air i7 15.6英寸轻薄游戏笔记本电脑(i7-9750H 8G 512G SSD GTX1650 72%高色域)', 6089.00, 'https://img11.360buyimg.com/n5/s54x54_jfs/t1/61544/27/2452/146827/5d0b55feE229d5224/c7d799230f50f3be.jpg');
INSERT INTO `goods` VALUES (28, '东标方准笔记本电脑', '东标牌商务笔记本,价格实惠!', 9888.00, 'http://www.dongfangbiaozhun.com/picture/logo.gif');

SET FOREIGN_KEY_CHECKS = 1;

2.1.2 maven依赖

<dependencies>
    <!--Lucene核心包-->
    <dependency>
        <groupId>org.apache.lucene</groupId>

        <artifactId>lucene-analyzers-common</artifactId>

        <version>8.0.0</version>

    </dependency>

    <!--IK分词器-->
    <dependency>
        <groupId>com.github.magese</groupId>

        <artifactId>ik-analyzer</artifactId>

        <version>8.0.0</version>

    </dependency>

    <!--MySQL驱动-->
    <dependency>
        <groupId>mysql</groupId>

        <artifactId>mysql-connector-java</artifactId>

        <version>5.1.47</version>

    </dependency>

    
    <!--测试单元-->
    <dependency>
        <groupId>junit</groupId>

        <artifactId>junit</artifactId>

        <version>4.12</version>

    </dependency>

    
    <!--lombok-->
    <dependency>
        <groupId>org.projectlombok</groupId>

        <artifactId>lombok</artifactId>

        <version>1.18.18</version>

    </dependency>

</dependencies>

2.1.3 实体类:

package com.dfbz.entity;

@AllArgsConstructor
@NoArgsConstructor
@Data
public class Goods {

    private Integer id;
    private String name;
    private String title;
    private Double price;
    private String pic;
}

2.1.4 编写DAO:

package com.dfbz.dao;

import com.dfbz.entity.Goods;

import java.sql.*;
import java.util.ArrayList;
import java.util.List;

public class GoodsDao {

    public List<Goods> findAll() {

        try {
            Class.forName("com.mysql.jdbc.Driver");
            Connection conn = DriverManager.getConnection("jdbc:mysql:///lucene_db", "root", "admin");

            Statement st = conn.createStatement();
            ResultSet rs = st.executeQuery("select * from goods");

            List<Goods> goodsList=new ArrayList<>();

            while (rs.next()){

                Integer id=rs.getInt("id");
                String name = rs.getString("name");
                String title = rs.getString("title");
                Double price = rs.getDouble("price");
                String pic = rs.getString("pic");

                Goods goods=new Goods();
                goods.setId(id);
                goods.setName(name);
                goods.setTitle(title);
                goods.setPrice(price);
                goods.setPic(pic);

                goodsList.add(goods);
            }

            conn.close();

            return goodsList;
        } catch (Exception e) {
            e.printStackTrace();
        }

        return null;
    }

}

2.2 建立索引

2.2.1 步骤:

  1. 读取原始数据(从数据库读取)
  2. 创建文档对象(Document)、域对象(Field)。并把域对象添加到文档对象中
  3. 创建分析器(Analyzer),用于分词
  4. 创建索引库配置对象(IndexWriterConfig),配置索引库(传入分析器)
  5. 设置索引库打开方式(OpenModel)
  6. 创建索引库目录对象(Directory),指定索引库的目录
  7. 创建索引库操作对象(IndexWriter),用于把文档写入索引库中
  8. 释放资源(close)

2.2.2 实现代码:

package com.dfbz.demo01_lucene入门;

import com.dfbz.dao.GoodsDao;
import com.dfbz.entity.Goods;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

/**
 * @author lscl
 * @version 1.0
 * @intro:
 */
public class Demo01_createIndex {

    GoodsDao goodsDao = new GoodsDao();

    @Test
    public void test1() throws Exception {
        List<Goods> goodsList = goodsDao.findAll();
        List<Document> docs = new ArrayList<>();

        for (Goods goods : goodsList) {

            // 创建一篇文档
            Document doc = new Document();

            // 添加域
            doc.add(new StringField("id", goods.getId() + "", Field.Store.YES));
            doc.add(new TextField("name", goods.getName(), Field.Store.YES));
            doc.add(new TextField("title", goods.getTitle(), Field.Store.YES));
            doc.add(new DoublePoint("price", goods.getPrice()));
            doc.add(new StoredField("pic", goods.getPic()));
            docs.add(doc);
        }


        // 创建分析器
        Analyzer analyzer = new IKAnalyzer();

        // 创建索引库配置对象
        IndexWriterConfig config = new IndexWriterConfig(analyzer);

        /*
        CREATE: 每次运行程序都会删除索引库
        APPEND: 不会删除索引库,追加本次程序的内容
        CREATE_OR_APPEND:如果没有索引库则创建,如果有所以库追加
         */
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

        // 打开索引库(设置索引库的位置)
        FSDirectory dir = FSDirectory.open(Paths.get("D:/index"));

        // 索引操作对象
        IndexWriter indexWriter = new IndexWriter(dir, config);

        // 添加文档
        indexWriter.addDocuments(docs);

        // 释放资源
        indexWriter.close();
        dir.close();
    }
}
  • 查看索引目录:

2.3 Luke工具

Luke是一个用于Lucene搜索引擎的,方便开发和诊断的第三方工具,它可以访问现有Lucene的索引,并允许您显示和修改。

Luke的Github官网:https://github.com/DmitryKey/luke

Luke下载地址:https://github.com/DmitryKey/luke/tags

注意:Luke的版本必须和Lucene的版本一致!我们本次采用的版本是8.0.0

2.3.1 运行界面介绍:

1)主界面

2)文档界面:

3)搜索界面:

4)文本分析界面

2.3.2 添加扩展词

扩展IK词:“东标方准”:

将数据库中的数据减少,方便观察效果:

运行代码,未扩展词库之前:

扩展词库之后:

2.3.3 添加停用词

在数据库中添加一条记录:

INSERT INTO `goods`  VALUES (29, '是很好的手机啊', '是很好的手机啊', 9999.9, 'https://www.baidu.com/favicon.ico');

添加停用词,运行测试代码;

Tips:停用词文件中的第一行不会被Lucene读取到,我们自己的停用词从第二行开始编写

未添加前:啊、是、的

添加停用词之后:

2.4 检索索引

2.4.1 步骤:

  1. 创建索引库目录对象(Directory),指定索引库目录
  2. 创建索引库读取对象(IndexReader),指定把索引库数据读取到内存中
  3. 创建索引库搜索对象(IndexSearcher),用于搜索索引库
  4. 创建分词器(Analyzer),用于搜索条件分词
  5. 创建查询解析器(QueryParse),传入分词器并指定查询的域
  6. 创建查询对象(Query),指定查询条件
  7. 使用索引库搜索对象(IndexSearcher)执行搜索,返回搜索结果(TopDocs)
  8. 处理结果集
  9. 关闭资源(close)

2.4.2 代码实现

package com.dfbz.demo01;


import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.nio.file.Paths;

/**
 * @author lscl
 * @version 1.0
 * @intro:
 */
public class Demo02_QueryIndex {

    @Test
    public void test1() throws Exception {

        // 1. 打开索引库
        FSDirectory dir = FSDirectory.open(Paths.get("D:/index"));

        // 2. 创建索引库读取对象
        IndexReader reader = DirectoryReader.open(dir);

        // 3. 创建索引搜索对象
        IndexSearcher searcher = new IndexSearcher(reader);

        // 4. 创建分析器
        Analyzer analyzer = new IKAnalyzer();

        // 5. 创建查询解析器
        QueryParser queryParser = new QueryParser("title", analyzer);

        // 6. 执行查询
        Query query = queryParser.parse("梦幻蓝");

        // 7. 开搜索
        TopDocs topDocs = searcher.search(query, 10);

        // 查询的条数
        TotalHits totalHits = topDocs.totalHits;

        System.out.println("共查询到【" + totalHits.value + "】篇文档");

        // 包含了每篇文档的分值和id
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;

        for (ScoreDoc scoreDoc : scoreDocs) {
            // 文档的id
            int docId = scoreDoc.doc;

            // 文档的分值
            float score = scoreDoc.score;

            Document doc = searcher.doc(docId);


            String id = doc.get("id");
            String name = doc.get("name");
            String title = doc.get("title");
            String price = doc.get("price");
            String pic = doc.get("pic");

            System.out.println("匹配分值: " + score);
            System.out.println("商品id: " + id);
            System.out.println("商品名称: " + name);
            System.out.println("商品标题: " + title);
            System.out.println("商品价格: " + price);
            System.out.println("商品图片: " + pic);
            System.out.println("-------------------");
        }

        dir.close();
        reader.close();
    }
}

上一篇:【学术论文投稿】探索嵌入式硬件设计:揭秘智能设备的心脏


下一篇:代码随想录第十五天