lucene 同义词的索引
发表于:2024-11-24 作者:热门IT资讯网编辑
编辑最后更新 2024年11月24日,public interface SynonymEngine { String[] getSynonyms(String key);}public class SynonymEngineImpl
public interface SynonymEngine { String[] getSynonyms(String key);}
public class SynonymEngineImpl implements SynonymEngine { private static HashMapmap = new HashMap (); static { map.put("quick",new String[]{"fast","speedy"}); map.put("jumps",new String[]{"leaps","hops"}); map.put("over",new String[]{"above"}); map.put("lazy",new String[]{"apathetic","sluggish"}); map.put("dog",new String[]{"canine","pooch"}); } @Override public String[] getSynonyms(String key) { // TODO Auto-generated method stub return map.get(key); }}
public class SynonymFilter extends TokenFilter { private SynonymEngine engine; private CharTermAttribute ct; private PositionIncrementAttribute pt; private Stackstack; private AttributeSource.State current; protected SynonymFilter(TokenStream input,SynonymEngine engine) { super(input); this.engine = engine; ct = this.addAttribute(CharTermAttribute.class); pt = this.addAttribute(PositionIncrementAttribute.class); stack = new Stack (); } @Override public boolean incrementToken() throws IOException { if(stack.size()>0) { this.restoreState(current); String p = stack.pop(); ct.setEmpty(); ct.append(p); pt.setPositionIncrement(0); return true; } System.out.println("++++++"+ct); if(!input.incrementToken()) return false; System.out.println("------"+ct); if(addSynonym(ct.toString())) { current = this.captureState(); } return true; } private boolean addSynonym(String name) { String[] sa = engine.getSynonyms(name); if(sa != null && sa.length>0) { for(String s:sa) { stack.push(s); } return true; } else { return false; } }}
public class SynonymAnalyzer extends Analyzer { private SynonymEngine engine; public SynonymAnalyzer(SynonymEngine engine) { this.engine = engine; } @Override public TokenStream tokenStream(String s, Reader reader) { // TODO Auto-generated method stub return new SynonymFilter(new StopFilter(Version.LUCENE_35, new LowerCaseFilter(Version.LUCENE_35, new StandardFilter(Version.LUCENE_35, new StandardTokenizer(Version.LUCENE_35,reader))) ,StopAnalyzer.ENGLISH_STOP_WORDS_SET),engine); }}
public class TestSynonym { private RAMDirectory directory; @Test public void init() { directory = new RAMDirectory(); SynonymEngine engine = new SynonymEngineImpl(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,new SynonymAnalyzer(engine)); String content = "The quick brown fox jumps over the lazy dog"; try { IndexWriter writer = new IndexWriter(directory,config); Document doc = new Document(); doc.add(new Field("content",content,Field.Store.YES,Field.Index.ANALYZED)); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(new TermQuery(new Term("content","pooch")),10); for(ScoreDoc sd:docs.scoreDocs) { Document d = searcher.doc(sd.doc); System.out.println(d.get("content")); } } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }}