1. 环境 lucene 2.4
a. 实体 Article.java
public class Article { private Long id; private String title; private String content; public Long getId() { return id; } public void setId(Long id) { this.id = id; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } }
b. 将 实体和 document 转换的类
import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumberTools; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; // logForj public class ArticleDocumentUtils { /** * Article --> Document * @param article * @return */ public static Document article2Document(Article article) { Document doc = new Document(); // article.properties --> doc.fieldList doc.add(new Field("id", NumberTools.longToString(article.getId()), Store.YES, Index.NOT_ANALYZED)); Field field = new Field("title", article.getTitle(), Store.YES, Index.ANALYZED); field.setBoost(2.0F); // 默认为1.0F doc.add(field); doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED)); return doc; } /** * Document --> Article * * @param doc * @return */ public static Article document2Article(Document doc) { Article article = new Article(); Long id =NumberTools.stringToLong(doc.getField("id").stringValue()); String title = doc.getField("title").stringValue(); String content = doc.getField("content").stringValue(); article.setId(id); article.setTitle(title); article.setContent(content); return article; } }
// 时间类弄的转换 doc.add(new Field("postTime", DateTools.dateToString(article.getPostTime(), Resolution.SECOND), Store.YES, Index.NO));
article.setPostTime(DateTools.stringToDate(doc.get("postTime")));
c. 测试方法 HelloWorld.java
import java.util.ArrayList; import java.util.List; import jeasy.analysis.MMAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.junit.Test; // Lucene 2.4 public class HelloWorld { // 索引目录 private String indexPath = "./index/"; // 分词器 // private Analyzer analyzer = new StandardAnalyzer(); private Analyzer analyzer = new MMAnalyzer(); // 建立索引 @Test public void createIndex() throws Exception { // 模拟一个已经存在的文章 Article article = new Article(); article.setId(1L); article.setTitle("小笑话 -- 牛人发帖"); // 笑话 article .setContent("有一牛人发一帖,然后马上就用发帖id疯狂回复自己的帖子:自己回帖1:楼主太有才了自己回帖2:楼主说的不错,挺有道理的自己回帖3:楼主真是太牛了,好崇拜你.最后终于有人回复他的帖子: 我靠,好歹你也换个id啊"); // article --> Document Document doc = ArticleDocumentUtils.article2Document(article); // 建立索引(放到索引库中) IndexWriter indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED); indexWriter.addDocument(doc); indexWriter.close(); } // 搜索 @Test public void search() throws Exception { // String queryString = "笑话"; String queryString = "幽默"; // ============================================= // 1,queryString --> query // hql --> Hibernate.Query String[] fields = new String[] { "title", "content" }; QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer); Query query = queryParser.parse(queryString); // 2,搜索-->搜索结果 // 在所有文章的"标题"和"内容"中搜索 IndexSearcher indexSearcher = new IndexSearcher(indexPath); // 在指定的索引库中搜索 TopDocs topDocs = indexSearcher.search(query, null, 100);// TopDocs是包装了查询结果的对象 // 3,处理搜索结果 // topDocs.totalHits; 数字类型,代表匹配的结果的数量 // topDocs.scoreDocs; ScoreDoc数组,代表匹配的所有结果(ScoreDoc只有Document的内部编号) System.out.println("匹配的结果的数量:" + topDocs.totalHits); List<Article> list = new ArrayList<Article>(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { int docSn = scoreDoc.doc; // 文档对应的内部编码 Document doc = indexSearcher.doc(docSn); // 根据内部编号取出Document list.add(ArticleDocumentUtils.document2Article(doc)); } indexSearcher.close(); // ============================================= // 打印结果 for (Article a : list) { System.out.println("---------------------------> " + a.getId()); System.out.println("Id = " + a.getId()); System.out.println("Title = " + a.getTitle()); System.out.println("Content = " + a.getContent()); } } }
高亮器测试
import java.util.ArrayList; import java.util.List; import jeasy.analysis.MMAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.junit.Test; import cn.itcast.lucene.helloworld.Article; import cn.itcast.lucene.helloworld.ArticleDocumentUtils; public class HighLighterTest { // 索引目录 private String indexPath = "./index/"; // 分词器 private Analyzer analyzer = new MMAnalyzer();// new StandardAnalyzer(); @Test public void test() throws Exception { String queryString = "回帖"; // String queryString = "幽默"; // ============================================= // 1,queryString --> query QueryParser queryParser = new MultiFieldQueryParser(new String[] { "title", "content" }, analyzer); Query query = queryParser.parse(queryString); // 2,搜索-->搜索结果 // 在所有文章的"标题"和"内容"中搜索 IndexSearcher indexSearcher = new IndexSearcher(indexPath); // 在指定的索引库中搜索 TopDocs topDocs = indexSearcher.search(query, null, 100);// TopDocs是包装了查询结果的对象 // ===================== 初始化高亮器 Formatter formatter = new SimpleHTMLFormatter("<span class='keyword'>", "</span>");// 默认为<b>和</b> Scorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(50); // 默认为100 highlighter.setTextFragmenter(fragmenter); // ===================== // 3,处理搜索结果 // topDocs.totalHits; 数字类型,代表匹配的结果的数量 // topDocs.scoreDocs; ScoreDoc数组,代表匹配的所有结果(ScoreDoc只有Document的内部编号) System.out.println("匹配的结果的数量:" + topDocs.totalHits); List<Article> list = new ArrayList<Article>(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { int docSn = scoreDoc.doc; // 文档对应的内部编码 // scoreDoc.score; Document doc = indexSearcher.doc(docSn); // 根据内部编号取出Document // =================== 使用高亮器 // doc.getField("content").stringValue() --> doc.get("content") // 高亮操作不影响原始数据 // 如果高亮的属性值中没有出现关键词,就返回null String ht = highlighter.getBestFragment(analyzer, "content", doc.get("content")); if (ht != null) { doc.getField("content").setValue(ht); } // =================== list.add(ArticleDocumentUtils.document2Article(doc)); } indexSearcher.close(); // ============================================= // 打印结果 for (Article a : list) { System.out.println("---------------------------> " + a.getId()); System.out.println("Id = " + a.getId()); System.out.println("Title = " + a.getTitle()); System.out.println("Content = " + a.getContent()); } } }
CRUD 操作
import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import cn.itcast.lucene.helloworld.Article; import cn.itcast.lucene.helloworld.ArticleDocumentUtils; public class IndexDao { // 索引目录 private String indexPath = "./index/"; // 分词器 private Analyzer analyzer = new StandardAnalyzer(); /** * 建立索引(保存到索引库) * * @param article */ public void save(Article article) { // 1, article --> Document Document doc = ArticleDocumentUtils.article2Document(article); // 2, indexWriter.add( doc ) IndexWriter indexWriter = null; try { indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED); indexWriter.addDocument(doc); } catch (Exception e) { throw new RuntimeException(e); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (Exception e) { throw new RuntimeException(e); } } } } /** * 删除索引 * * @param id * * delete from table_article WHERE ?(term.name)=?(term.value) */ public void delete(Long id) { IndexWriter indexWriter = null; try { indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED); Term term = new Term("id", id.toString()); // 含有term的所有Document都将被删掉 indexWriter.deleteDocuments(term); } catch (Exception e) { throw new RuntimeException(e); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (Exception e) { throw new RuntimeException(e); } } } } /** * 更新索引 * * @param article * * update table_article set xxx=xxx,yy=yyy... WHERE id=?( article.getId() ) */ public void update(Article article) { IndexWriter indexWriter = null; try { Term term = new Term("id", article.getId().toString()); Document doc = ArticleDocumentUtils.article2Document(article); indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED); // 更新含有term的Document,更新后的状态在doc中 indexWriter.updateDocument(term, doc); // 更新就是“先删除,再创建” // indexWriter.deleteDocuments(term); // indexWriter.addDocument(doc); } catch (Exception e) { throw new RuntimeException(e); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (Exception e) { throw new RuntimeException(e); } } } } /** * 搜索,分页(符合某条件的某一页的数据) * * @param queryString * @param firstResult * @param maxResults * @return * * select * from table_article limit ?:first,?:max * * select count(*) from table_article */ public SearchResult search(String queryString, int firstResult, int maxResults) { // 1, queryString --> Query IndexSearcher indexSearcher = null; try { QueryParser queryParser = new MultiFieldQueryParser(new String[] { "title", "content" }, analyzer); Query query = queryParser.parse(queryString); // 2, 进行搜索, 在title与content中搜索 --> TopDocs( totalHits, scoreDocs ) indexSearcher = new IndexSearcher(indexPath); TopDocs topDocs = indexSearcher.search(query, null, 100); // 3, 处理结果,返回 List<Article> list = new ArrayList<Article>(); int end = Math.min(firstResult + maxResults, topDocs.scoreDocs.length); for (int i = firstResult; i < end; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int docSn = scoreDoc.doc; // Document的内部编号 Document doc = indexSearcher.doc(docSn); // 根据Document的内部编号取出相应的Document Article article = ArticleDocumentUtils.document2Article(doc); list.add(article); } return new SearchResult(topDocs.totalHits, list); } catch (Exception e) { throw new RuntimeException(e); } finally { if (indexSearcher != null) { try { indexSearcher.close(); // 取完数据在关闭 } catch (Exception e) { throw new RuntimeException(e); } } } } }
相关推荐
lucenetest.rar,lucene,全文检索,lucene例子 lucenetest.rar,lucene,全文检索,lucene例子lucenetest.rar,lucene,全文检索,lucene例子
lucene入门小例子
lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子lucene quartz 例子
lucene3.0 例子lucene3.0 例子 lucene3.0 例子 ,很好的学习,只有原代原,jar 包自己加上去就OK了
Lucene操作数据库例子,通过JDBC程序+Lucene
lucene文档例子
一个基于LUCENE搜索引擎项目例子一个基于LUCENE搜索引擎项目例子一个基于LUCENE搜索引擎项目例子
lucene3.6 搜索例子
B/S架构下采用LUCENE开发的一个搜索引擎的小例子
Lucene学习例子与文档,有兴趣的朋友可以看看。
程序展现了lucene包强大的建索引和搜索功能!
NULL 博文链接:https://chinaxxren.iteye.com/blog/548498
lucene实例lucene实例lucene实例lucene实例lucene实例lucene实例lucene实例lucene实例lucene实例
Lucene.net是Lucene的.net移植版本,是一个开源的全文检索引擎开发包,即它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎。
lucene Heritrix
lucene 简单例子 , 直接运行, `
Lucene 7.1 RMI远程搜索例子 Lucene 7.1 RMI远程搜索例子 Lucene 7.1 RMI远程搜索例子
这是一个lucene人例子,大家可以参考一下。很不错了。
lucene demo lucene 使用例子 更快掌握lucene
lucene全文搜索ajax例子,集成高亮显示。多次搜索等功能了,解压就可以运行了