发布于 2015-05-17 05:59:26 | 200 次阅读 | 评论: 0 | 来源: 网友投递
			Apache Lucene全文检索引擎工具包
Lucene是apache软件基金会4 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包,即它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。Lucene的目的是为软件开发人员提供一个简单易用的工具包,以方便的在目标系统中实现全文检索的功能,或者是以此为基础建立起完整的全文检索引擎。		

TestFileIndex.java
package com.lixing.paoding.index;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestFileIndex {
  public static void main(String[] args) throws Exception {
    String dataDir="d:/data";
    String indexDir="d:/luceneindex";
    
    File[] files=new File(dataDir).listFiles();
    System.out.println(files.length);
    
    Analyzer analyzer=new PaodingAnalyzer();
    Directory dir=FSDirectory.open(new File(indexDir));
    IndexWriter writer=new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
    
    for(int i=0;i<files.length;i++){
      StringBuffer strBuffer=new StringBuffer();
      String line="";
      FileInputStream is=new FileInputStream(files[i].getCanonicalPath());
      BufferedReader reader=new BufferedReader(new InputStreamReader(is,"gb2312"));
      line=reader.readLine();
      while(line != null){
        strBuffer.append(line);
        strBuffer.append("n");
        line=reader.readLine();
      }
        
      Document doc=new Document();
      doc.add(new Field("fileName", files[i].getName(), Field.Store.YES, Field.Index.ANALYZED));
      doc.add(new Field("contents", strBuffer.toString(), Field.Store.YES, Field.Index.ANALYZED));
      writer.addDocument(doc);
      reader.close();
      is.close();
    }
    
    writer.optimize();
    writer.close();
    dir.close();
    System.out.println("ok");
  }
}
 
TestFileSearcerh.java
package com.lixing.paoding.index;
import java.io.File;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class TestFileSearcher {
  public static void main(String[] args) throws Exception {
    String indexDir = "d:/luceneindex";
    Analyzer analyzer = new PaodingAnalyzer();
    Directory dir = FSDirectory.open(new File(indexDir));
    IndexSearcher searcher = new IndexSearcher(dir, true);
    QueryParser parser = new QueryParser(Version.LUCENE_29, "contents",analyzer);
    Query query = parser.parse("呼救");
    //Term term=new Term("fileName", "大学");
    //TermQuery query=new TermQuery(term);
    
    TopDocs docs=searcher.search(query, 1000);
    ScoreDoc[] hits=docs.scoreDocs;
    System.out.println(hits.length);
    for(int i=0;i<hits.length;i++){
      Document doc=searcher.doc(hits[i].doc);
      System.out.print(doc.get("fileName")+"--:n");
      System.out.println(doc.get("contents")+"n");
    }
    
    searcher.close();
    dir.close();
  }
}