/**
 * DummyLemmatizerPlugin.java
 *
 * $Author: tsakas $
 * $Date: 2007/12/20 14:37:39 $
 * $Id: DummyLemmatizerPlugin.java,v 1.1 2007/12/20 14:37:39 tsakas Exp $
 *
 * <pre>
 *             Copyright (c) : 2006 Dummy Search & Transfer ASA
 *                             ALL RIGHTS RESERVED
 * </pre>
 */

package org.gcube.indexmanagement.common.linguistics.lemmatizerplugin;

import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Vector;

import org.gcube.common.core.utils.logging.GCUBELog;
import org.gcube.indexmanagement.common.IndexException;
import org.tartarus.snowball.SnowballProgram;
import org.tartarus.snowball.ext.EnglishStemmer;
import org.tartarus.snowball.ext.FrenchStemmer;
import org.tartarus.snowball.ext.GermanStemmer;
import org.tartarus.snowball.ext.ItalianStemmer;
import org.tartarus.snowball.ext.SpanishStemmer;


/**
 * The class that provides methods for language identification.
 * The init method can be called once to initialize lemmatizers
 * for the different languages.
 * To add a new language after the init has been called,
 * use the addLanguage method.
 *
 */
public class SnowballStemmingPlugin implements LemmatizerPlugin {

	/** logger */
	static GCUBELog logger = new GCUBELog(DummyLemmatizerPlugin.class);
	
	Map<String,SnowballProgram> Stemmer = new HashMap<String, SnowballProgram>();
	
    /**
     * Constructor, creates the lemmatizerMap.
     */
    public SnowballStemmingPlugin() {
    }


    /**
     * Method that initialises the implementation of the language id
     * @param configFile      The config file needed by the identify language
     * @param languages       The languages that shall be supported by the lemmatizer
     * @throws                IndexException when the language_identifier can not be created
     *                        If the init is called with a vector of languages, and one or several
     *                        of the lemmatizers can not be created, the init method will
     *                        throw an IndexException with the lanuages that failed like this:
     *                        IndexExecption("en","fr","it")
     */
    public void init(String configFile, Vector<Language> languages) throws IndexException {
        try {
        	logger.info("Initializing Sowball plugin");
        	Iterator<Language> it = languages.iterator();
        	while (it.hasNext()){
        		Language l = it.next();
        		String lang = new String(l.toString());
        		if (lang.equalsIgnoreCase("en")){
        			Stemmer.put(lang, new EnglishStemmer());
        		}
        		if (lang.equalsIgnoreCase("es")){
        			Stemmer.put(lang, new SpanishStemmer());
        		}
        		if (lang.equalsIgnoreCase("it")){
        			Stemmer.put(lang, new ItalianStemmer());
        		}
        		if (lang.equalsIgnoreCase("de")){
        			Stemmer.put(lang, new GermanStemmer());
        		}
        		if (lang.equalsIgnoreCase("de")){
        			Stemmer.put(lang, new GermanStemmer());
        		}
        		if (lang.equalsIgnoreCase("fr")){
        			Stemmer.put(lang, new FrenchStemmer());
        		}
        	}
            return;
        }
        catch (NoSuchElementException e) {
            // No more elements in the iterator, should never happen using the while construct
            throw new IndexException("Language iterator failed " + e.toString());
        }
        catch (ClassCastException e) {
            // Language can not be cast to Language, Should never happen
            throw new IndexException("Language cast exception" + e.toString());
        }
        catch (Exception e) {
            throw new IndexException("Error creating language_identifier with config: "
                                     + configFile + " " + e.toString());
        }
    }

    /**
     * Detects the language in the document
     * @param  word         The document
     * @param  language     The language for the lemmatizer
     * @return              The lemmatized words, each form separted by ! a!b!c
     * @throws              IndexException in case of a failure
     */
    public String lemmatize_word(String word,Language language) throws IndexException {
        try {
        	SnowballProgram stemer = Stemmer.get(language.toString());
        	stemer.setCurrent(word);
        	stemer.stem();
        	return stemer.getCurrent()+"*";
        }
        catch ( Exception ex) {
            throw new IndexException("SnowballStemmingPlugin lemmatize_word failed " + ex.toString());
        }
    }

    /**
     * Detects the language in the document
     * @param  document     The document to lemmatize
     * @param  language     The language for the lemmatizer
     * @return              The lemmatized document a!b!c#d!e!f#g
     * @throws              IndexException in case of a failure
     */
    public String lemmatize_string(String document,Language language) throws IndexException {
        try {
            return "";
        }
        catch ( Exception ex) {
            throw new IndexException("DummyLemmatizerPlugin::lemmatize_word failed " + ex.toString());
        }
    }


    /**
     * Add a lemmatizer for the language
     * @param  language     The language for the lemmatizer
     * @throws              IndexException in case of a failure
     */
    public void add_language(Language language) throws IndexException {
        try {
            return;
        }
        catch (Exception e) {
            throw new IndexException("Error creating language_identifier with config: "
                                     + e.toString());
        }
    }

    /**
     * Test main method to test the loading of the language id plugin and
     * functions in the class
     * @param args  - The main method input arguments
     */
    public static void main(String args[]) {

        try {
            //org.diligentproject.indexservice.linguistics.fastplugin.DummyLanguageIdPlugin
            //String pluginName = "DummyLanguageIdPlugin";
            String pluginName = 
                "org.gcube.indexmanagement.common.linguistics.lemmatizerplugin.SnowballStemmingPlugin";
            String configFilePath = 
                "/ld/work/diligent/daacvs/indexservice/linguistics/fastlinguistics/fastlemmatizer/etc/LemmatizationConfigQueryExpansion.xml";
            logger.info("Loading plugin using factory, config file: " + configFilePath);
            Vector<Language> languages = new Vector<Language>();  // initial set with 10 elements
            LemmatizerPlugin lepl = null;
            lepl = LemmatizerFactory.loadPlugin(pluginName);
            try {
                languages.add(Language.en);
                languages.add(Language.it);
                languages.add(Language.fr);
                languages.add(Language.es);
                languages.add(Language.sv);
                languages.add(Language.af);
                lepl.init(configFilePath,languages);
            }
            catch (IndexException e) {
                logger.warn( "**** Got exception for languages", e);
            }
            logger.info( "L E M M A T I Z I N G" );
             String input = "house";
            logger.info("Lemmatize: " + Language.en.toLongString() + " house " + " lemmatized form "
                               + lepl.lemmatize_word(input,Language.en));
            logger.info("Lemmatize: " + Language.fr.toLongString() + " maison " + " lemmatized form "
                               + lepl.lemmatize_word("maison",Language.fr));
            logger.info("Lemmatize: " + Language.it.toLongString()+ " casa " + " lemmatized form "
                               + lepl.lemmatize_word("casa",Language.it));
             logger.info(" Adding language: " + Language.pt.toLongString());
            try {
                lepl.add_language(Language.pt);
            }
            catch (IndexException e) {
                logger.warn( "**** Got exception for languages", e);
            }
            logger.info(" Adding language: " + Language.en.toLongString());
            try {
                lepl.add_language(Language.en);
            }
            catch (IndexException e) {
                logger.warn( "**** Got exception for languages", e);
            }

            input = "house maison case knee hus casa knife ceasare ";
            logger.info("Lemmatize: " + Language.en.toLongString() + " " + input + " lemmatized form "
                               + lepl.lemmatize_string(input,Language.en));
            logger.info("Lemmatize: " + Language.fr.toLongString() + " "  + input + " lemmatized form "
                               + lepl.lemmatize_string(input,Language.fr));
            logger.info("Lemmatize: " + Language.it.toLongString() + " "  + input + " lemmatized form "
                               + lepl.lemmatize_string(input,Language.it));
            logger.info("Lemmatize: " + Language.pt.toLongString() + " "  + input + " lemmatized form "
                               + lepl.lemmatize_string(input,Language.pt));
            logger.info("Lemmatize: " + Language.es.toLongString() + " "  + input + " lemmatized form "
                               + lepl.lemmatize_string(input,Language.es));
        }
        catch (IndexException ie) {
        	logger.error("Error during lemmatization.", ie);
        }
    }
}
