package org.gcube.indexmanagement.lucenewrapper;

import org.apache.lucene.search.DefaultSimilarity;

/*
We want to index content and annotation collections, in addition to the metadata collection indexing that we already have.
During a search, we want to use all the available information from metadata, content and annotations in order to decide the relevance of a document to a number of terms.
Problem: Ranking!
Some documents may not have metadata or annotations
The score related to the content of a document must be comparable with the score of metadata and annotations.
Lucene's tf-idf formula will give much lower scores to a content document than a metadata document with the same relevance but much smaller size.


Approach followed
Implement new scoring formula (tf-idf based) that can be adjusted based on the notion of relevance we have for the different possible magnitudes of documents' size.
Build different full text lookup resources for a collection’s content, metadata and annotation collections.
While searching, query all these lookup resources (for content, metadata, annotations) and feed the results to a new search operator, which produces the final ranked results for the query by combining the different scores for each IO.
The combined score reflects the relevance of an IO to the terms of a query, based on all the available data for this IO.
 * 
 * */


class GlobalSimilarity extends DefaultSimilarity{
	
	public float lengthNorm(String fieldName, int numTerms){
//		System.out.println("lengthNorm numTerms: " + numTerms + "---- lengthNorm factor: " + factor(numTerms));
		return (float)Math.sqrt(factor(numTerms)/((double)numTerms));
//		return (float)1.0/((float)numTerms);
	}
	public float tf(float freq){
//		System.out.println("tf freq: " + freq);
		return (float)Math.sqrt((double)freq);
	}
	
	private static double f(double mag)
	{
		if(mag < 0.0)
			return 0.0;
		
		double result = 0.0;
		double magf = Math.floor(mag);
		for(long i=0; i<magf; i++)
			result += ((double)i)/((double)i+1);
		result += (mag-magf)*(magf/(magf + 1));
		return result;
	}
	
	private static double factor(long size)
	{
		double mag;
		if(size < 10)
			mag = 0.0;
		else
			mag = Math.log10(((double)size)/((double)10.0));
		return Math.pow(10.0, mag)/(Math.pow(mmf1,mag)/Math.pow(mmf2,f(mag)));
	}
	
	private static double mmf1 = 4;
	private static double mmf2 = 2;
}
