热门IT资讯网

lucene 同义词的索引

发表于:2024-11-24 作者:热门IT资讯网编辑
编辑最后更新 2024年11月24日,public interface SynonymEngine { String[] getSynonyms(String key);}public class SynonymEngineImpl
public interface SynonymEngine {    String[] getSynonyms(String key);}
public class SynonymEngineImpl implements SynonymEngine {        private static HashMap map = new HashMap();    static {        map.put("quick",new String[]{"fast","speedy"});        map.put("jumps",new String[]{"leaps","hops"});        map.put("over",new String[]{"above"});        map.put("lazy",new String[]{"apathetic","sluggish"});        map.put("dog",new String[]{"canine","pooch"});    }    @Override    public String[] getSynonyms(String key) {        // TODO Auto-generated method stub        return map.get(key);    }}
public class SynonymFilter extends TokenFilter {    private SynonymEngine engine;    private CharTermAttribute ct;    private PositionIncrementAttribute pt;    private Stack stack;    private AttributeSource.State current;    protected SynonymFilter(TokenStream input,SynonymEngine engine) {        super(input);        this.engine = engine;        ct = this.addAttribute(CharTermAttribute.class);        pt = this.addAttribute(PositionIncrementAttribute.class);        stack  = new Stack();    }    @Override    public boolean incrementToken() throws IOException {        if(stack.size()>0) {            this.restoreState(current);            String p = stack.pop();            ct.setEmpty();            ct.append(p);            pt.setPositionIncrement(0);            return true;        }        System.out.println("++++++"+ct);        if(!input.incrementToken()) return false;        System.out.println("------"+ct);                if(addSynonym(ct.toString())) {            current = this.captureState();                    }                                        return true;    }        private boolean addSynonym(String name) {        String[] sa = engine.getSynonyms(name);        if(sa != null && sa.length>0) {            for(String s:sa) {                stack.push(s);            }            return true;        } else {            return false;        }    }}
public class SynonymAnalyzer extends Analyzer {    private SynonymEngine engine;        public SynonymAnalyzer(SynonymEngine engine) {        this.engine = engine;    }    @Override    public TokenStream tokenStream(String s, Reader reader) {        // TODO Auto-generated method stub        return new SynonymFilter(new StopFilter(Version.LUCENE_35,                new LowerCaseFilter(Version.LUCENE_35,                        new StandardFilter(Version.LUCENE_35,                                new StandardTokenizer(Version.LUCENE_35,reader)))                ,StopAnalyzer.ENGLISH_STOP_WORDS_SET),engine);    }}
public class TestSynonym {    private RAMDirectory directory;    @Test    public void init() {        directory = new RAMDirectory();        SynonymEngine engine = new SynonymEngineImpl();        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,new SynonymAnalyzer(engine));        String content = "The quick brown fox jumps over the lazy dog";                try {            IndexWriter writer = new IndexWriter(directory,config);            Document doc = new Document();            doc.add(new Field("content",content,Field.Store.YES,Field.Index.ANALYZED));            writer.addDocument(doc);            writer.close();                        IndexReader reader = IndexReader.open(directory);            IndexSearcher searcher = new IndexSearcher(reader);            TopDocs docs = searcher.search(new TermQuery(new Term("content","pooch")),10);            for(ScoreDoc sd:docs.scoreDocs) {                Document d = searcher.doc(sd.doc);                System.out.println(d.get("content"));            }                    } catch (CorruptIndexException e) {            // TODO Auto-generated catch block            e.printStackTrace();        } catch (LockObtainFailedException e) {            // TODO Auto-generated catch block            e.printStackTrace();        } catch (IOException e) {            // TODO Auto-generated catch block            e.printStackTrace();        }    }}


0