package lemmaextractor; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.util.CoreMap; import lemmaextractor.results.Result; import java.util.Properties; /** * Created with IntelliJ IDEA. * User: michal * Date: 24.03.14 * Time: 14:15 * To change this template use File | Settings | File Templates. */ public class DummyProcessor implements LanguageProcessor{ private String lang; StanfordCoreNLP pipeline; public DummyProcessor(String lang){ this.lang = lang; Properties props = new Properties(); props.put("annotators", "tokenize, ssplit"); pipeline = new StanfordCoreNLP(props); } @Override public Result processFull(String text) { return processLemmas(text); } @Override public Result processLemmas(String text){ Result result = new Result(lang); Annotation document = new Annotation(text); pipeline.annotate(document); int positionInText = 0; for(CoreMap sentence: document.get(CoreAnnotations.SentencesAnnotation.class)) { for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { String word = token.get(CoreAnnotations.TextAnnotation.class); int token_start = text.indexOf(word, positionInText); int token_end = token_start + word.length(); result.addLemma(word, token_start, token_end); positionInText = token_end; } } return result; } public void setLang(String lang){ this.lang = lang; } }