TSQL — Delete duplicates in your table


Howdy Everyone,
It will be quite problematic to delete the duplicates in your table, if they are inserted because of an improper insert or some other means (may be bcp was run twice ….. )
In this case the below stored procedure can work, People who want to create can simply create the function out of it , I assume or else just let me know, I will post that too….

IF EXISTS ( SELECT 1 FROM SYSOBJECTS WHERE name = 'deleteDuplicates' and xtype = 'P' )
	DROP PROCEDURE deleteDuplicates
GO

CREATE PROCEDURE dbo.deleteDuplicates
(
	@tableName	sysname
   ,@check		int = 0	--- 0 just to see what are duplicates, 1 delete the duplicates.
)
AS
BEGIN
	DECLARE @columnsDelimited	NVARCHAR(max)
			,@dySql				NVARCHAR(max)
			,@tran				int
			,@tempTable			sysname
	SET @tran = 0
	SET @columnsDelimited = ''
	SET @tempTable = 'delteDuplicates' + CAST (@@SPID AS VARCHAR)
	BEGIN TRY

		IF NOT EXISTS ( SELECT	1
						FROM	sys.tables
						WHERE	name	= @tableName
					  )
		BEGIN
			PRINT @tableName + ' not exists in the current database'
			RETURN -1
		END
		SELECT @columnsDelimited = @columnsDelimited
									+ CASE LEN(@columnsDelimited)
										WHEN 0 THEN ''
											ELSE ','
										END
									+ QUOTENAME(c.name)
		FROM sys.columns C
		JOIN sys.tables  T
		ON	 C.object_id	= T.object_id
		AND  c.is_identity  = 0
		AND  T.name			= @tableName

		IF OBJECT_ID(@tempTable) IS NOT NULL
		BEGIN
			SET @dySql = 'DROP TABLE ' + @tempTable
			exec (@dySql)
		END

		SET @dySql	= 'SELECT ROW_NUMBER() OVER(PARTITION BY ' + @columnsDelimited  + ' ORDER BY ' +@columnsDelimited + ' ) as row_number_delete, * into '  + @tempTable
						+ ' FROM ' + @tableName
		exec sp_executeSQl @dySql
		SELECT @dySql

		IF (@check = 0)
		BEGIN
			SET @dySql = 'SELECT * FROM ' + @tempTable + ' WHERE row_number_delete > 1 '
			EXEC (@dySql)
		END

		IF (@check = 1)
		BEGIN
			SET @dySql = 'DELETE ' + @tempTable + ' WHERE row_number_delete > 1'
			EXEC (@dySql)

			BEGIN TRAN
				SET @tran = 1
				SET @dySql	= 'TRUNCATE TABLE ' +@tableName
				EXEC ( @dySql)

				SET @dySql	= 'INSERT INTO ' + @tableName + ' (' +@columnsDelimited + ') ' + 'SELECT ' + @columnsDelimited + ' FROM ' + @tempTable
				EXEC ( @dySql)

			COMMIT TRAN
		END

	END TRY
	BEGIN CATCH
		IF (@tran = 1)
			ROLLBACK;
		SELECT 'errored_line '+ CAST (ERROR_LINE() AS VARCHAR) + ' error_msg ' + ERROR_MESSAGE() + ' error_number ' + ERROR_NUMBER()
	END CATCH;
END

GO

--- TEST DATA
drop table abcd
create table abcd
(
	a int
	,b int
	,c int
)

insert into abcd values (1,2,3)
insert into abcd values (1,2,3)
insert into abcd values (1,2,3)
insert into abcd values (1,2,3)
insert into abcd values (2,2,3)
insert into abcd values (2,2,3)
insert into abcd values (2,2,3)
insert into abcd values (3,0,0)

exec deleteDuplicates 'abcd',1
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s