openNLP
provides document categorizer tool to classify given text into pre-defined
categories.
$ opennlp
Doccat
Usage:
opennlp Doccat model < documents
Document Categorizer API
Following is the working application to categorize given text.
Following is the working application to categorize given text.
import java.io.FileInputStream; import java.io.InputStream; import java.util.Objects; import opennlp.tools.doccat.DoccatModel; import opennlp.tools.doccat.DocumentCategorizerME; public class CategoryDetectorUtil { private InputStream inputStream; private DoccatModel docCatModel; private DocumentCategorizerME myCategorizer; public CategoryDetectorUtil(String modelFile) { Objects.nonNull(modelFile); initModel(modelFile); } private void initModel(String modelFile) { try { inputStream = new FileInputStream(modelFile); docCatModel = new DoccatModel(inputStream); myCategorizer = new DocumentCategorizerME(docCatModel); } catch (Exception e) { System.out.println(e.getMessage()); } } public String getCategory(String text) { double[] outcomes = myCategorizer.categorize(text); String category = myCategorizer.getBestCategory(outcomes); return category; } }
No comments:
Post a Comment