SELECT d.id AS datasourceid,
--regexp_replace(d.id, '^.*::', '') AS originalid,
	-- array_agg(DISTINCT (d.id, i.pid)) as identities,
	ARRAY[d.id, i.pid] as identities,
	--SELECT ARRAY[dd.id, ii.pid] as identities from datasources dd left outer join datasourcepids dps2 on (dps2.datasource = dd.id)
	--	left outer join identities ii on (ii.pid = dps2.pid)

-- d.id           AS originalid,
d.officialname AS officialname,
d.englishname AS englishname,

CASE
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['driver', 'openaire2.0'])
THEN
				'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['driver'])
THEN
				'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['openaire2.0'])
THEN
				'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['openaire3.0'])
THEN
				'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['openaire2.0_data'])
THEN
'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['native'])
THEN
				'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['hostedBy'])
THEN
	'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['notCompatible'])
THEN
				'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
ELSE
				'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
END AS openairecompatibility,

d.websiteurl AS websiteurl,
d.logourl AS logourl,
d.contactemail AS contactemail,
array_agg( DISTINCT CASE
			WHEN apc.param='baseUrl' and api.protocolclass='oai' THEN
				CASE WHEN COALESCE(apc.edited, '') = ''  THEN apc.original ELSE apc.edited END
			ELSE
				NULL
END ) AS accessinfopackage, -- we need baseUrl param only -when available-, but we can not use WHERE clause on apc.param, otherwise we exclude datasources without a baseUrl param and the returned rows are less than we expect.
		d.latitude					as latitude,
		d.longitude					as longitude,
		d.namespaceprefix			as namespaceprefix,
		d.od_numberofitems			as odnumberofitems,          	
		d.od_numberofitemsdate		as odnumberofitemsdate,      		
		array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) as subjects,
		d.description				as description,
		d.od_policies				as odpolicies,
		ARRAY(select trim(s) from unnest(string_to_array(d.od_languages, ',')) as s)  as odlanguages,
		ARRAY(select trim(s) from unnest(string_to_array(d.od_contenttypes, '-')) as s) as odcontenttypes,
		d.inferred					as inferred,
		d.deletedbyinference		as deletedbyinference,
		d.trust						as trust,
		d.inferenceprovenance		as inferenceprovenance,
		d.dateofcollection			as dateofcollection,
		d.dateofvalidation			as dateofvalidation,
		
		-- re3data fields
		d.releasestartdate			as releasestartdate,
		d.releaseenddate			as releaseenddate,
		d.missionstatementurl		as missionstatementurl,
		d.dataprovider				as dataprovider,
		d.serviceprovider			as serviceprovider,
		d.databaseaccesstype		as databaseaccesstype,
		d.datauploadtype			as datauploadtype,
		d.databaseaccessrestriction	as databaseaccessrestriction,
		d.datauploadrestriction		as datauploadrestriction,
		d.versioning				as versioning,
		d.citationguidelineurl		as citationguidelineurl,
		d.qualitymanagementkind		as qualitymanagementkind,
		d.pidsystems				as pidsystems,
		d.certificates				as certificates,
		array_agg(DISTINCT p.name || '&&&' || p.url) as policies,
		-- end of re3data fields
		
		dc.id					as collectedfromid,
		dc.officialname			as collectedfromname,
		
		tc.code || '@@@' || tc.name || '@@@' || ts.code || '@@@' || ts.name	as datasourcetype,
		pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name	as provenanceaction

	  -- merged into identities above
		-- array_agg(DISTINCT i.pid || '###' || i.issuertypeclass) as pid
		

FROM datasources d
	left outer join class tc on (tc.code = d.datasourceclass)	
	left outer join scheme ts on (ts.code = d.datasourcescheme)
	
	left outer join class pac on (pac.code = d.provenanceactionclass)	
	left outer join scheme pas on (pas.code = d.provenanceactionscheme)		

	left outer join datasourcepids dps on (dps.datasource = d.id)
	left outer join identities i on (i.pid = dps.pid)

	left outer join datasources dc on (dc.id = d.collectedfrom)
	left outer join api on (api.datasource = d.id)
	left outer join apicollections apc on (apc.api = api.id)	
	
	left outer join temp_compliances tcc on (tcc.api = api.id)	
	
	left outer join datasource_subject ds on (ds.datasource = d.id)
	left outer join subjects s on (s.id = ds.subject) 
	
	left outer join class sc on (sc.code = s.semanticclass)
	left outer join scheme ss on (ss.code = s.semanticscheme)
	
	left outer join datasource_policy dp on (dp.datasource = d.id)
	left outer join policies p on (p.id = dp.policy)
	
--  These clauses have been commented to allow the import of all the datasource on the index 
-- WHERE d.datasourceclass != 'entityregistry'
--	AND d.openairecompatibilityclass IS NOT null
--	AND d.openairecompatibilityclass != 'UNKNOWN'
--	AND d.openairecompatibilityclass != 'notCompatible'
	
GROUP BY 
		d.id,
		d.officialname, 
		d.englishname,
		d.websiteurl,
		d.logourl,
		d.contactemail,
		d.namespaceprefix,
		d.description,
		d.od_numberofitems,          	
		d.od_numberofitemsdate,      		
		d.od_policies,
		d.od_languages,         		
		d.od_contenttypes,
		d.latitude,
		d.longitude,
		d.inferred,
		d.deletedbyinference,
		d.trust,
		d.inferenceprovenance,
		d.dateofcollection,
		d.dateofvalidation,
		dc.id,
		d.releasestartdate,
		d.releaseenddate,
		d.missionstatementurl,
		d.dataprovider,
		d.serviceprovider,
		d.databaseaccesstype,
		d.datauploadtype,
		d.databaseaccessrestriction,
		d.datauploadrestriction,
		d.versioning,
		d.citationguidelineurl,
		d.qualitymanagementkind,
		d.pidsystems,
		d.certificates,
		dc.officialname,
		tc.code, tc.name, ts.code, ts.name, 
		pac.code, pac.name, pas.code, pas.name,
		sc.code, sc.name, ss.code, ss.name,
	  i.pid
