package lemmaextractor;

//import edu.stanford.nlp.ie.AbstractSequenceClassifier;
//import edu.stanford.nlp.ie.crf.CRFClassifier;
//import edu.stanford.nlp.ling.CoreAnnotations;
//import edu.stanford.nlp.ling.CoreLabel;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import lemmaextractor.results.Result;

import java.util.ArrayList;
import java.util.Properties;

/**
 * Created by michal on 4/15/14.
 */
public class EnglishProcessor implements LanguageProcessor{

       StanfordCoreNLP pipeline;

    public EnglishProcessor(String taggerModel, String nerModel){
        Properties props = new Properties();
        props.put("pos.model", taggerModel);
        props.put("ner.model", nerModel);
        props.put("annotators", "tokenize, ssplit, pos, lemma, ner");
        pipeline = new StanfordCoreNLP(props);
    }

    public EnglishProcessor(String taggerModel){
        Properties props = new Properties();
        props.put("pos.model", taggerModel);
        props.put("annotators", "tokenize, ssplit, pos, lemma");
        pipeline = new StanfordCoreNLP(props);
    }

    @Override
    public Result processFull(String text) throws Exception {
        Annotation document = new Annotation(text);
        pipeline.annotate(document);
        Result result = new Result("en");
        int positionInText = 0;
        for(CoreMap sentence: document.get(CoreAnnotations.SentencesAnnotation.class)) {
            String annotationChannel = "";
            String namedEntity = "";
            String namedEntityBase = "";
            ArrayList<Integer> namedEntityIndices = new ArrayList<Integer>();
            for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                        String word = token.get(CoreAnnotations.TextAnnotation.class);
                String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
                String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                int token_start = text.indexOf(word, positionInText);
                int token_end = token_start + word.length();
                result.addLemma(lemma, token_start, token_end);
                positionInText = token_end;
                if(!ne.equals("O")){
                    if(ne.equals(annotationChannel)){
                        namedEntity += token.getString(CoreAnnotations.BeforeAnnotation.class) + word;
                        namedEntityIndices.add(result.getLemmas().size() - 1);
                        namedEntityBase += token.getString(CoreAnnotations.BeforeAnnotation.class) + lemma;
                    }
                    else{
                        if(!namedEntity.isEmpty()){
                            result.addNamedEntity(namedEntity, namedEntityBase, namedEntityIndices);
                        }
                        annotationChannel = ne;
                        namedEntity = word;
                        namedEntityBase = lemma;
                        namedEntityIndices = new ArrayList<Integer>();
                        namedEntityIndices.add(result.getLemmas().size() - 1);
                    }

                }
                else{
                    if(!namedEntity.isEmpty()){
                        result.addNamedEntity(namedEntity, namedEntityBase, namedEntityIndices);
                        annotationChannel = "";
                        namedEntity = "";
                        namedEntityBase = "";
                        namedEntityIndices = new ArrayList<Integer>();
                    }
                }
            }
        }
        return result;
    }

    @Override
    public Result processLemmas(String text) {
        Annotation document = new Annotation(text);
        pipeline.annotate(document);
        Result result = new Result("en");
        int positionInText = 0;
        for(CoreMap sentence: document.get(CoreAnnotations.SentencesAnnotation.class)) {
            for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                String word = token.get(CoreAnnotations.TextAnnotation.class);
                String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
                int token_start = text.indexOf(word, positionInText);
                int token_end = token_start + word.length();
                result.addLemma(lemma, token_start, token_end);
                positionInText = token_end;
            }
        }
        return result;
    }
}