package com.hope.lucene;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
import java.io.File;
/**
* @author newcityman
* @date 2020/1/15 - 0:01
*/
public class LuceneFirst {
/**
* 创建索引
* @throws Exception
*/
@Test
public void createIndex() throws Exception{
//1、创建一个Director对象,指定索引库保存的位置
//把索引库保存到磁盘
Directory directory = FSDirectory.open(new File("G:\\workspace_idea3\\lucene\\temp\\index").toPath());
//2、基于Directory对象,创建一个IndexWriter对象
IndexWriter indexWriter = new IndexWriter(directory,new IndexWriterConfig());
//3、读取磁盘上的文件,对应每个文件创建一个文档对象
File file = new File("G:\\workspace_idea3\\lucene\\temp\\searchsource");
File[] files = file.listFiles();
for (File f : files) {
//取文件名
String fileName = f.getName();
//取文件路径
String filePath = f.getPath();
//取文件内容
String fileContent = FileUtils.readFileToString(f, "utf-8");
//文件大小
long fileSize = FileUtils.sizeOf(f);
//创建Field
TextField fieldName = new TextField("name", fileName, Field.Store.YES);
TextField fieldPath = new TextField("path", filePath, Field.Store.YES);
TextField fieldContent = new TextField("content", fileContent, Field.Store.YES);
TextField fieldSize = new TextField("size", fileSize+"", Field.Store.YES);
//4、向文档对象中添加Field
//创建文档
Document document = new Document();
document.add(fieldName);
document.add(fieldPath);
document.add(fieldContent);
document.add(fieldSize);
//5、把文档对象写入到索引库中
indexWriter.addDocument(document);
}
//6、关闭indexWriter对象
indexWriter.close();
}
/**
* 查询索引
* @throws Exception
*/
@Test
public void searchIndex() throws Exception{
//1、创建一个Directory对象,指定索引库位置
Directory directory = FSDirectory.open(new File("G:\\workspace_idea3\\lucene\\temp\\index").toPath());
//2、创建IndexReader对象
IndexReader indexReader = DirectoryReader.open(directory);
//3、创建IndexSearch对象
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
//4、创建Query对象,TermQuery对象
TermQuery termQuery = new TermQuery(new Term("content", "spring"));
//5、执行查询TopDocs
//参数1:查询对象 参数2:查询结果返回的最大记录数
TopDocs topDocs = indexSearcher.search(termQuery, 10);
//6、取查询结果的总记录数
System.out.println("查询总记录数:"+topDocs.totalHits);
//7、取文档列表
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
//8、打印文档内容
for (ScoreDoc scoreDoc : scoreDocs) {
//取文档id
int docId = scoreDoc.doc;
//根据id取文档对象
Document document = indexSearcher.doc(docId);
System.out.println(document.get("name"));
System.out.println(document.get("path"));
System.out.println(document.get("size"));
// System.out.println(document.get("content"));
System.out.println("++++++++++++++++++++++++++++++");
}
//9、关闭IndexReader对象
indexReader.close();
}
/**
* 查询标准分词器的分词效果
*
* @throws Exception
*/
@Test
public void testTokenStream() throws Exception {
// 1、创建一个Analyzer对象,StrandAnalyzer对象
Analyzer analyzer = new StandardAnalyzer();
// 2、使用分词器对象的tokenStream方法获取一个TokenStream对象
TokenStream tokenStream = analyzer.tokenStream("", "Learn how to create a web page with Spring MVC.");
// 3、向TokenStream对象中设置一个引用,相当于一个指针
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
// 4、调用TokenStream对象的reset方法,如果不调用会抛异常
tokenStream.reset();
// 5、使用while循环遍历TokenStream对象
while (tokenStream.incrementToken()) {
System.out.println(charTermAttribute.toString());
}
// 6、关闭TokenStream对象
tokenStream.close();
}
}