package org.gcube.data.analysis.tabulardata.operation.data.remove;

import java.sql.SQLException;
import java.util.List;

import org.gcube.data.analysis.tabulardata.cube.CubeManager;
import org.gcube.data.analysis.tabulardata.cube.data.connection.DatabaseConnectionProvider;
import org.gcube.data.analysis.tabulardata.model.column.Column;
import org.gcube.data.analysis.tabulardata.model.column.type.IdColumnType;
import org.gcube.data.analysis.tabulardata.model.column.type.ValidationColumnType;
import org.gcube.data.analysis.tabulardata.model.table.Table;
import org.gcube.data.analysis.tabulardata.operation.SQLHelper;
import org.gcube.data.analysis.tabulardata.operation.invocation.OperationInvocation;
import org.gcube.data.analysis.tabulardata.operation.worker.ImmutableWorkerResult;
import org.gcube.data.analysis.tabulardata.operation.worker.Worker;
import org.gcube.data.analysis.tabulardata.operation.worker.WorkerResult;
import org.gcube.data.analysis.tabulardata.operation.worker.exceptions.WorkerException;

public class DuplicateRowRemover extends Worker {
	
	CubeManager cubeManager;
	
	DatabaseConnectionProvider connectionProvider;
	
	Table targetTable;
	
	Column targetColumn;
	
	Table newTable;
	
	public DuplicateRowRemover(OperationInvocation sourceInvocation, CubeManager cubeManager,
			DatabaseConnectionProvider connectionProvider) {
		super(sourceInvocation);
		this.cubeManager = cubeManager;
		this.connectionProvider = connectionProvider;
	}

	@Override
	protected WorkerResult execute() throws WorkerException {
		retrieveParameters();
		updateProgress(0.1f);
		createNewTable();
		updateProgress(0.5f);
		removeDuplicateTuples();
		return new ImmutableWorkerResult(newTable);
	}

	private void removeDuplicateTuples() throws WorkerException {
		String sql = generateSQLDeleteCommand();
		try {
			SQLHelper.executeSQLCommand(sql, connectionProvider);
		} catch (SQLException e) {
			throw new WorkerException("Error occurred while performing deletion of duplicate tuples", e);
		}
	}
	
	@SuppressWarnings("unchecked")
	private String generateSQLDeleteCommand(){
		List<Column> columns = targetTable.getColumnsExceptTypes(IdColumnType.class, ValidationColumnType.class);
		String columnsSnippet = SQLHelper.generateColumnNameSnippet(columns);
		return String.format("" +
				"WITH dupHashes AS (" +
				"	SELECT md5(concat(%s)) AS hash " +
				"	FROM %s " +
				"	GROUP BY hash " +
				"	HAVING count(*) > 1" +
				"), duplicateTuples AS (" +
				"	SELECT id, md5(concat(%1$s)) AS hash " +
				"	FROM %2$s " +
				"	WHERE md5(concat(%1$s)) IN (SELECT * from dupHashes)" +
				"), toKeep AS (" +
				"	SELECT min(id) AS id, md5(concat(%1$s)) AS hash" +
				"	FROM %2$s" +
				"	WHERE md5(concat(%1$s)) IN (SELECT * from dupHashes)" +
				"	GROUP BY hash" +
				") DELETE FROM %2$s WHERE id IN (" +
				"	SELECT id FROM duplicateTuples EXCEPT (SELECT id FROM toKeep)" +
				");"
				, columnsSnippet, newTable.getName());
	}

	private void createNewTable() {
		newTable = cubeManager.createTable(targetTable.getTableType()).like(targetTable, true).create();
	}

	private void retrieveParameters() {
		targetTable = cubeManager.getTable(getSourceInvocation().getTargetTableId());
	}

}
