Вы можете внедрить свой собственный Анализатор или расширить StandardAnalyzer.
Пример:
TokenFilter + Analyzer
public class MinTermLengthTokenFilter : TokenFilter
{
private int minTermLength;
private TermAttribute termAtt;
public MinTermLengthTokenFilter(int maxTermLength, TokenStream input)
: base(input)
{
this.minTermLength = maxTermLength;
termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute));
}
public override bool IncrementToken()
{
while (input.IncrementToken())
{
if (termAtt.TermLength() >= minTermLength)
{
return true;
}
}
return false;
}
}
public class MinTermLengthAnalyzer : StandardAnalyzer
{
private int minTermLength;
public MinTermLengthAnalyzer(int minTermLength)
:base()
{
this.minTermLength = minTermLength;
}
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
return new MinTermLengthTokenFilter(minTermLength, base.TokenStream(fieldName, reader));
}
public override TokenStream ReusableTokenStream(string fieldName, TextReader reader)
{
return new MinTermLengthTokenFilter(minTermLength, base.ReusableTokenStream(fieldName, reader));
}
}
Индексирование:
FSDirectory dir = FSDirectory.GetDirectory("C:\\temp\\CFSTEST");
IndexWriter writer = new IndexWriter(dir, new MinTermLengthAnalyzer(5));
Document document = new Document();
document.Add(new Field(
"text",
"some sample text for demonstration",
Field.Store.YES,
Field.Index.ANALYZED,
Field.TermVector.WITH_POSITIONS_OFFSETS));
writer.AddDocument(document);
writer.Close();
Поиск:
var indexSearcher = new IndexSearcher(IndexReader.Open("C:\\temp\\CFSTEST"));
var results = indexSearcher.Search(new TermQuery(new Term("text", "demonstration")), null, 25);
foreach (var result in results.ScoreDocs)
{
TermFreqVector[] vectors = indexSearcher.GetIndexReader().GetTermFreqVectors(result.doc);
foreach (Lucene.Net.Index.TermFreqVector vector in vectors)
{
String[] terms = vector.GetTerms();
foreach (String term in terms)
{
Console.WriteLine(term);
}
}
}
indexSearcher.Close();
// outputs:
// demonstration
// sample