SELECT
	d.id                                                                                                       AS datasourceid,
	--regexp_replace(d.id, '^.*::', '') AS originalid,
	-- array_agg(DISTINCT (d.id, i.pid)) as identities,
	ARRAY [d.id, i.pid]                                                                                        AS identities,
	--SELECT ARRAY[dd.id, ii.pid] as identities from datasources dd left outer join datasourcepids dps2 on (dps2.datasource = dd.id)
	--	left outer join identities ii on (ii.pid = dps2.pid)

	-- d.id           AS originalid,
	d.officialname                                                                                             AS officialname,
	d.englishname                                                                                              AS englishname,

	CASE
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['driver', 'openaire2.0'])
		THEN
			'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['driver'])
		THEN
			'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['openaire2.0'])
		THEN
			'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['openaire3.0'])
		THEN
			'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['openaire2.0_data'])
		THEN
			'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['native'])
		THEN
			'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['hostedBy'])
		THEN
			'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['notCompatible'])
		THEN
			'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
	ELSE
		'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
	END                                                                                                        AS openairecompatibility,

	d.websiteurl                                                                                               AS websiteurl,
	d.logourl                                                                                                  AS logourl,
	d.contactemail                                                                                             AS contactemail,
	array_agg(DISTINCT CASE
	                   WHEN apc.param = 'baseUrl' AND api.protocolclass = 'oai'
		                   THEN
			                   CASE WHEN COALESCE(apc.edited, '') = ''
				                   THEN apc.original
			                   ELSE apc.edited END
	                   ELSE
		                   NULL
	                   END)                                                                                    AS accessinfopackage,
	-- we need baseUrl param only -when available-, but we can not use WHERE clause on apc.param, otherwise we exclude datasources without a baseUrl param and the returned rows are less than we expect.
	d.latitude                                                                                                 AS latitude,
	d.longitude                                                                                                AS longitude,
	d.namespaceprefix                                                                                          AS namespaceprefix,
	d.od_numberofitems                                                                                         AS odnumberofitems,
	d.od_numberofitemsdate                                                                                     AS odnumberofitemsdate,
	array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects,
	d.description                                                                                              AS description,
	d.od_policies                                                                                              AS odpolicies,
	ARRAY(SELECT trim(s)
	      FROM unnest(string_to_array(d.od_languages, ',')) AS s)                                              AS odlanguages,
	ARRAY(SELECT trim(s)
	      FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s)                                           AS odcontenttypes,
	d.inferred                                                                                                 AS inferred,
	d.deletedbyinference                                                                                       AS deletedbyinference,
	d.trust                                                                                                    AS trust,
	d.inferenceprovenance                                                                                      AS inferenceprovenance,
	d.dateofcollection                                                                                         AS dateofcollection,
	d.dateofvalidation                                                                                         AS dateofvalidation,

	-- re3data fields
	d.releasestartdate                                                                                         AS releasestartdate,
	d.releaseenddate                                                                                           AS releaseenddate,
	d.missionstatementurl                                                                                      AS missionstatementurl,
	d.dataprovider                                                                                             AS dataprovider,
	d.serviceprovider                                                                                          AS serviceprovider,
	d.databaseaccesstype                                                                                       AS databaseaccesstype,
	d.datauploadtype                                                                                           AS datauploadtype,
	d.databaseaccessrestriction                                                                                AS databaseaccessrestriction,
	d.datauploadrestriction                                                                                    AS datauploadrestriction,
	d.versioning                                                                                               AS versioning,
	d.citationguidelineurl                                                                                     AS citationguidelineurl,
	d.qualitymanagementkind                                                                                    AS qualitymanagementkind,
	d.pidsystems                                                                                               AS pidsystems,
	d.certificates                                                                                             AS certificates,
	array_agg(DISTINCT p.name || '&&&' || p.url)                                                               AS policies,
	-- end of re3data fields

	dc.id                                                                                                      AS collectedfromid,
	dc.officialname                                                                                            AS collectedfromname,

	tc.code || '@@@' || tc.name || '@@@' || ts.code || '@@@' || ts.name                                        AS datasourcetype,
	pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name                                    AS provenanceaction

-- merged into identities above
-- array_agg(DISTINCT i.pid || '###' || i.issuertypeclass) as pid


FROM datasources d
	LEFT OUTER JOIN class tc ON (tc.code = d.datasourceclass)
	LEFT OUTER JOIN scheme ts ON (ts.code = d.datasourcescheme)

	LEFT OUTER JOIN class pac ON (pac.code = d.provenanceactionclass)
	LEFT OUTER JOIN scheme pas ON (pas.code = d.provenanceactionscheme)

	LEFT OUTER JOIN datasourcepids dps ON (dps.datasource = d.id)
	LEFT OUTER JOIN identities i ON (i.pid = dps.pid)

	LEFT OUTER JOIN datasources dc ON (dc.id = d.collectedfrom)
	LEFT OUTER JOIN api ON (api.datasource = d.id)
	LEFT OUTER JOIN apicollections apc ON (apc.api = api.id)

	LEFT OUTER JOIN temp_compliances tcc ON (tcc.api = api.id)

	LEFT OUTER JOIN datasource_subject ds ON (ds.datasource = d.id)
	LEFT OUTER JOIN subjects s ON (s.id = ds.subject)

	LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass)
	LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme)

	LEFT OUTER JOIN datasource_policy dp ON (dp.datasource = d.id)
	LEFT OUTER JOIN policies p ON (p.id = dp.policy)

--  These clauses have been commented to allow the import of all the datasource on the index 
-- WHERE d.datasourceclass != 'entityregistry'
--	AND d.openairecompatibilityclass IS NOT null
--	AND d.openairecompatibilityclass != 'UNKNOWN'
--	AND d.openairecompatibilityclass != 'notCompatible'

GROUP BY
	d.id,
	d.officialname,
	d.englishname,
	d.websiteurl,
	d.logourl,
	d.contactemail,
	d.namespaceprefix,
	d.description,
	d.od_numberofitems,
	d.od_numberofitemsdate,
	d.od_policies,
	d.od_languages,
	d.od_contenttypes,
	d.latitude,
	d.longitude,
	d.inferred,
	d.deletedbyinference,
	d.trust,
	d.inferenceprovenance,
	d.dateofcollection,
	d.dateofvalidation,
	dc.id,
	d.releasestartdate,
	d.releaseenddate,
	d.missionstatementurl,
	d.dataprovider,
	d.serviceprovider,
	d.databaseaccesstype,
	d.datauploadtype,
	d.databaseaccessrestriction,
	d.datauploadrestriction,
	d.versioning,
	d.citationguidelineurl,
	d.qualitymanagementkind,
	d.pidsystems,
	d.certificates,
	dc.officialname,
	tc.code, tc.name, ts.code, ts.name,
	pac.code, pac.name, pas.code, pas.name,
	sc.code, sc.name, ss.code, ss.name,
	i.pid
