KeywordAnalyzer tokenizes the entire stream as a single token. This is useful for data like zip codes, ids, and some product names.
How to get instance of KeywordAnalyzer?
Analyzer whitespaceAnalyzer = new KeywordAnalyzer();
App.java
package com.sample.app;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
public class App {
public static List<String> getTokens(String text, String fieldName, Analyzer analyzer) throws IOException {
TokenStream tokenStream = analyzer.tokenStream(fieldName, text);
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
tokenStream.reset();
List<String> result = new ArrayList<String>();
while (tokenStream.incrementToken()) {
result.add(charTermAttribute.toString());
}
return result;
}
public static void main(String args[]) throws IOException {
Analyzer whitespaceAnalyzer = new KeywordAnalyzer();
List<String> tokens = getTokens(
"Acer Aspire 3 A315-53 15.6-inch Laptop (Intel Celeron Processor 3867U/4GB/500GB HDD/Windows 10 Home 64 bit/Intel HD 610 Graphics), Obsidian Black",
null, whitespaceAnalyzer);
for (String token : tokens) {
System.out.println(token);
}
}
}
Output
Acer Aspire 3 A315-53 15.6-inch Laptop (Intel Celeron Processor 3867U/4GB/500GB HDD/Windows 10 Home 64 bit/Intel HD 610 Graphics), Obsidian Black
No comments:
Post a Comment