Lucene更新实在太快了,只好紧跟脚步开始学习Lucene5,花了点时间写了一个demo,就是程序根据用户提供的一个文件夹,读取该文件夹下的所有文件,然后读取文件里的内容写入索引。读取文件部分采用的是最新的NIO2.0API,因此,JDK必须使用1.7及以上版本。Lucene5开发压缩包请在Lucene官网下载。不多说了,对于码农来说,最直接的就是上代码。
- package com.yida.framework.lucene5.core;
- import java.io.BufferedReader;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.InputStreamReader;
- import java.nio.charset.StandardCharsets;
- import java.nio.file.FileVisitResult;
- import java.nio.file.Files;
- import java.nio.file.LinkOption;
- import java.nio.file.OpenOption;
- import java.nio.file.Path;
- import java.nio.file.Paths;
- import java.nio.file.SimpleFileVisitor;
- import java.nio.file.attribute.BasicFileAttributes;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.LongField;
- import org.apache.lucene.document.StringField;
- import org.apache.lucene.document.TextField;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- /**
- * 读取硬盘文件,创建索引
- *
- * @author Lanxiaowei
- *
- */
- @SuppressWarnings({ "unchecked", "unused", "rawtypes" })
- public class IndexFile {
- public static void main(String[] args) throws IOException {
- String dirPath = "D:/docPath";
- String indexPath = "D:/lucenedir";
- createIndex(dirPath, indexPath);
- }
- /**
- * 创建索引
- * @param dirPath 需要读取的文件所在文件目录
- * @param indexPath 索引存放目录
- * @throws IOException
- */
- public static void createIndex(String dirPath, String indexPath) throws IOException {
- createIndex(dirPath, indexPath, false);
- }
- /**
- * 创建索引
- * @param dirPath 需要读取的文件所在文件目录
- * @param indexPath 索引存放目录
- * @param createOrAppend 始终重建索引/不存在则追加索引
- * @throws IOException
- */
- public static void createIndex(String dirPath, String indexPath,
- boolean createOrAppend) throws IOException {
- long start = System.currentTimeMillis();
- Directory dir = FSDirectory.open(Paths.get(indexPath, new String[0]));
- Path docDirPath = Paths.get(dirPath, new String[0]);
- Analyzer analyzer = new StandardAnalyzer();
- IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
- if (createOrAppend) {
- indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
- } else {
- indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
- }
- IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
- indexDocs(writer, docDirPath);
- writer.close();
- long end = System.currentTimeMillis();
- System.out.println("Time consumed:" + (end - start) + " ms");
- }
- /**
- *
- * @param writer
- * 索引写入器
- * @param path
- * 文件路径
- * @throws IOException
- */
- public static void indexDocs(final IndexWriter writer, Path path)
- throws IOException {
- // 如果是目录,查找目录下的文件
- if (Files.isDirectory(path, new LinkOption[0])) {
- System.out.println("directory");
- Files.walkFileTree(path, new SimpleFileVisitor() {
- @Override
- public FileVisitResult visitFile(Object file,
- BasicFileAttributes attrs) throws IOException {
- Path path = (Path)file;
- System.out.println(path.getFileName());
- indexDoc(writer, path, attrs.lastModifiedTime().toMillis());
- return FileVisitResult.CONTINUE;
- }
- });
- } else {
- indexDoc(writer, path,
- Files.getLastModifiedTime(path, new LinkOption[0])
- .toMillis());
- }
- }
- /**
- * 读取文件创建索引
- *
- * @param writer
- * 索引写入器
- * @param file
- * 文件路径
- * @param lastModified
- * 文件最后一次修改时间
- * @throws IOException
- */
- public static void indexDoc(IndexWriter writer, Path file, long lastModified)
- throws IOException {
- InputStream stream = Files.newInputStream(file, new OpenOption[0]);
- Document doc = new Document();
- Field pathField = new StringField("path", file.toString(),
- Field.Store.YES);
- doc.add(pathField);
- doc.add(new LongField("modified", lastModified, Field.Store.NO));
- doc.add(new TextField("contents", new BufferedReader(
- new InputStreamReader(stream, StandardCharsets.UTF_8))));
- if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
- System.out.println("adding " + file);
- writer.addDocument(doc);
- } else {
- System.out.println("updating " + file);
- writer.updateDocument(new Term("path", file.toString()), doc);
- }
- writer.commit();
- }
- }
项目采用的是Maven构建,怎么创建Maven Project就不用介绍了吧,我就贴下pom配置吧。
- <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <groupId>com.yida.framework</groupId>
- <artifactId>lucene5</artifactId>
- <packaging>war</packaging>
- <version>1.0</version>
- <name>lucene5 Maven Webapp</name>
- <url>http://maven.apache.org</url>
- <properties>
- <lucene.version>5.0.0</lucene.version>
- </properties>
- <dependencies>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>3.8.1</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-core</artifactId>
- <version>${lucene.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-analyzers-common</artifactId>
- <version>${lucene.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-queryparser</artifactId>
- <version>${lucene.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-highlighter</artifactId>
- <version>${lucene.version}</version>
- </dependency>
- </dependencies>
- <build>
- <finalName>lucene5</finalName>
- </build>
- </project>
项目结构图如图:
运行之前,先在D盘新建两个文件夹,如图:
然后在docPath文件夹里随便放几个文本文件,如图:
然后运行测试类,就会在lucenedir文件夹下创建索引。
代码很简单,没什么需要过多解释的,demo源码请在附件里下载。
希望能对大家学习Lucene有所帮助,其次也算是对自己学习轨迹的一个记录,写博客这个习惯
我会努力保持下去。
若你还有什么疑问,请加我Q-Q:7-3-6-0-3-1-3-0-5,或者加裙:
,欢迎你加入一起交流学习。
转载:http://iamyida.iteye.com/blog/2192938