/* This script is given "As Is" with no warranties and plenty of caveats. Use at your own risk! For more on data profiling, see Chapter 10 in "SQL Server 2012 Data Integration Recipes", Apress, 2012 */ ----------------------------------------------------------------------- -- User-defined variables ----------------------------------------------------------------------- USE CarSales -- Your database here GO DECLARE @TABLE_SCHEMA NVARCHAR(128) = 'dbo' -- Your schema here DECLARE @TABLE_NAME NVARCHAR(128) = 'client' -- Your table here DECLARE @ColumnListIN NVARCHAR(4000) = '' -- Enter a comma-separated list of specific columns -- to profile, or leave blank for all DECLARE @TextCol BIT = 1 -- Analyse all text (char/varchar/nvarchar) data type columns DECLARE @NumCol BIT = 1 -- Analyse all numeric data type columns DECLARE @DateCol BIT = 1 -- Analyse all date data type data type columns DECLARE @LobCol BIT = 1 -- Analyse all VAR(char/nchar/binary) MAX data type columns (potentially time-consuming) DECLARE @AdvancedAnalysis BIT = 1 -- Perform advanced analysis (threshold counts/domain analysis) --(potentially time-consuming) DECLARE @DistinctValuesMinimum INT = 200 -- Minimum number of distinct values to suggest a reference -- table and/or perform domain analysis DECLARE @BoundaryPercent NUMERIC(3,2) = 0.57 -- Percent of records at upper/lower threshold to suggest -- a possible anomaly DECLARE @NullBoundaryPercent NUMERIC(5,2) = 90.00 -- Percent of NULLs to suggest a possible anomaly DECLARE @DataTypePercentage INT = 2 -- Percentage variance allowed when suggesting another data type -- for a column ----------------------------------------------------------------------- -- Process variables ----------------------------------------------------------------------- DECLARE @DATA_TYPE VARCHAR(128) = '' DECLARE @FULLSQL VARCHAR(MAX) = '' DECLARE @SQLMETADATA VARCHAR(MAX) = '' DECLARE @NUMSQL VARCHAR(MAX) = '' DECLARE @DATESQL VARCHAR(MAX) = '' DECLARE @LOBSQL VARCHAR(MAX) = '' DECLARE @COLUMN_NAME VARCHAR(128) DECLARE @CHARACTER_MAXIMUM_LENGTH INT DECLARE @ROWCOUNT BIGINT = 0 DECLARE @ColumnList VARCHAR(4000) = ' ' DECLARE @TableCheck TINYINT DECLARE @ColumnCheck SMALLINT DECLARE @DataTypeVariance INT ----------------------------------------------------------------------- -- Start the process: BEGIN TRY -- Test that the schema and table exist SELECT @TableCheck = COUNT (*) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = @TABLE_SCHEMA AND TABLE_NAME = @TABLE_NAME IF @TableCheck <> 1 BEGIN RAISERROR ('The table does not exist',16,1) RETURN END ----------------------------------------------------------------------- -- Parse list of columns to process / get list of columns according to types required ----------------------------------------------------------------------- IF OBJECT_ID('tempdb..#ColumnList') IS NOT NULL DROP TABLE tempdb..#ColumnList; CREATE TABLE #ColumnList (COLUMN_NAME VARCHAR(128), DATA_TYPE VARCHAR(128), CHARACTER_MAXIMUM_LENGTH INT) -- Used to hold list of columns to process IF @ColumnListIN <> '' -- See if there is a list of columns to process BEGIN -- Process list SET @ColumnList = @ColumnListIN + ',' DECLARE @CharPosition int WHILE CHARINDEX(',', @ColumnList) > 0 BEGIN SET @CharPosition = CHARINDEX(',', @ColumnList) INSERT INTO #ColumnList (COLUMN_NAME) VALUES (LTRIM(RTRIM(LEFT(@ColumnList, @CharPosition - 1)))) SET @ColumnList = STUFF(@ColumnList, 1, @CharPosition, '') END -- While loop -- update with datatype and length UPDATE CL SET CL.CHARACTER_MAXIMUM_LENGTH = ISNULL(ISC.CHARACTER_MAXIMUM_LENGTH,0) ,CL.DATA_TYPE = ISC.DATA_TYPE FROM #ColumnList CL INNER JOIN INFORMATION_SCHEMA.COLUMNS ISC ON CL.COLUMN_NAME = ISC.COLUMN_NAME WHERE ISC.TABLE_NAME = @TABLE_NAME AND ISC.TABLE_SCHEMA = @TABLE_SCHEMA END -- If test for list of column names ELSE BEGIN -- Use all column names, to avoid filtering IF @TextCol = 1 BEGIN INSERT INTO #ColumnList (COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH) SELECT COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH FROM INFORMATION_SCHEMA.COLUMNS WHERE DATA_TYPE IN ('varchar', 'nvarchar', 'char', 'nchar', 'binary') AND TABLE_NAME = @TABLE_NAME AND TABLE_SCHEMA = @TABLE_SCHEMA AND CHARACTER_MAXIMUM_LENGTH > 0 END IF @NumCol = 1 BEGIN INSERT INTO #ColumnList (COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH) SELECT COLUMN_NAME, DATA_TYPE, ISNULL(CHARACTER_MAXIMUM_LENGTH,0) FROM INFORMATION_SCHEMA.COLUMNS WHERE DATA_TYPE IN ('numeric', 'int', 'bigint', 'tinyint', 'smallint', 'decimal', 'money', 'smallmoney', 'float','real') AND TABLE_NAME = @TABLE_NAME AND TABLE_SCHEMA = @TABLE_SCHEMA END IF @DateCol = 1 BEGIN INSERT INTO #ColumnList (COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH) SELECT COLUMN_NAME, DATA_TYPE, ISNULL(CHARACTER_MAXIMUM_LENGTH,0) FROM INFORMATION_SCHEMA.COLUMNS WHERE DATA_TYPE IN ('Date', 'DateTime', 'SmallDateTime', #39;DateTime2', 'time') AND TABLE_NAME = @TABLE_NAME AND TABLE_SCHEMA = @TABLE_SCHEMA END IF @LOBCol = 1 BEGIN INSERT INTO #ColumnList (COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH) SELECT COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH FROM INFORMATION_SCHEMA.COLUMNS WHERE DATA_TYPE IN ('varchar', 'nvarchar', 'varbinary', 'xml') AND TABLE_NAME = @TABLE_NAME AND TABLE_SCHEMA = @TABLE_SCHEMA AND CHARACTER_MAXIMUM_LENGTH = -1 END END -- Else test to get all column names ----------------------------------------------------------------------- -- Test that there are columns to analyse SELECT @ColumnCheck = COUNT (*) FROM #ColumnList WHERE DATA_TYPE IS NOT NULL IF @ColumnCheck = 0 BEGIN RAISERROR('The columns do not exist in the selected database or no columns are selected',16,1) RETURN END ----------------------------------------------------------------------- -- Create Temp table used to hold profiling data ----------------------------------------------------------------------- IF OBJECT_ID('tempdb..#ProfileData') IS NOT NULL DROP TABLE tempdb..#ProfileData; CREATE TABLE #ProfileData ( TABLE_SCHEMA NVARCHAR(128), TABLE_NAME NVARCHAR(128), COLUMN_NAME NVARCHAR(128), ColumnDataLength INT, DataType VARCHAR(128), MinDataLength BIGINT, MaxDataLength BIGINT, AvgDataLength BIGINT, MinDate SQL_VARIANT, MaxDate SQL_VARIANT, NoDistinct BIGINT, NoNulls NUMERIC(32,4), NoZeroLength NUMERIC(32,4), PercentageNulls NUMERIC(9,4), PercentageZeroLength NUMERIC(9,4), NoDateWithHourminuteSecond BIGINT NULL, NoDateWithSecond BIGINT NULL, NoIsNumeric BIGINT NULL, NoIsDate BIGINT NULL, NoAtLimit BIGINT NULL, IsFK BIT NULL DEFAULT 0, DataTypeComments NVARCHAR(1500) ); -- Get row count DECLARE @ROWCOUNTTEXT NVARCHAR(1000) = '' DECLARE @ROWCOUNTPARAM NVARCHAR(50) = '' SET @ROWCOUNTTEXT = 'SELECT @ROWCOUNTOUT = COUNT (*) FROM ' + QUOTENAME(@TABLE_SCHEMA) + '.' + QUOTENAME(@TABLE_NAME) + ' WITH (NOLOCK)' SET @ROWCOUNTPARAM = '@ROWCOUNTOUT INT OUTPUT' EXECUTE sp_executesql @ROWCOUNTTEXT, @ROWCOUNTPARAM, @ROWCOUNTOUT = @ROWCOUNT OUTPUT ----------------------------------------------------------------------- -- Test that there are records to analyse IF @ROWCOUNT = 0 BEGIN RAISERROR('There is no data in the table to analyse',16,1) RETURN END ----------------------------------------------------------------------- -- Define the dynamic SQL used for each column to analyse ----------------------------------------------------------------------- SET @SQLMETADATA = 'INSERT INTO #ProfileData (ColumnDataLength,COLUMN_NAME,TABLE_SCHEMA,TABLE_NAME,DataType,MaxDataLength,MinDataLength,AvgDataLength,MaxDate,MinDate,NoDateWithHourminuteSecond,NoDateWithSecond,NoIsNumeric,NoIsDate,NoNulls,NoZeroLength,NoDistinct)' DECLARE SQLMETADATA_CUR CURSOR LOCAL FAST_FORWARD FOR SELECT COLUMN_NAME, CHARACTER_MAXIMUM_LENGTH, DATA_TYPE FROM #ColumnList OPEN SQLMETADATA_CUR FETCH NEXT FROM SQLMETADATA_CUR INTO @COLUMN_NAME, @CHARACTER_MAXIMUM_LENGTH, @DATA_TYPE WHILE @@FETCH_STATUS = 0 BEGIN SET @SQLMETADATA = @SQLMETADATA +' SELECT TOP 100 PERCENT ' + CAST(@CHARACTER_MAXIMUM_LENGTH AS VARCHAR(20)) + ' ,''' + QUOTENAME(@COLUMN_NAME) + ''' ,''' + QUOTENAME(@TABLE_SCHEMA) + ''' ,''' + QUOTENAME(@TABLE_NAME) + ''' ,''' + @DATA_TYPE + '''' + CASE WHEN @DATA_TYPE IN ('varchar', 'nvarchar', 'char', 'nchar') AND @CHARACTER_MAXIMUM_LENGTH >= 0 THEN + ' , MAX(LEN(' + QUOTENAME(@COLUMN_NAME) + ')) , MIN(LEN(' + QUOTENAME(@COLUMN_NAME) + ')) , AVG(LEN(' + QUOTENAME(@COLUMN_NAME) + ')) ,NULL ,NULL ,NULL ,NULL ,(SELECT COUNT (*) from ' + QUOTENAME(@TABLE_SCHEMA) + '.' + QUOTENAME(@TABLE_NAME) + ' WHERE ISNUMERIC(' + QUOTENAME(@COLUMN_NAME) + ') = 1) ,(SELECT COUNT (*) from ' + QUOTENAME(@TABLE_SCHEMA) + '.' + QUOTENAME(@TABLE_NAME) + ' WHERE ISDATE(' + QUOTENAME(@COLUMN_NAME) + ') = 1) ' WHEN @DATA_TYPE IN ('numeric', 'int', 'bigint', 'tinyint', 'smallint', 'decimal', 'money', 'smallmoney', 'float','real') THEN + ' ,MAX(' + QUOTENAME(@COLUMN_NAME) + ') ,MIN(' + QUOTENAME(@COLUMN_NAME) + ') ,AVG(CAST(' + QUOTENAME(@COLUMN_NAME) + ' AS NUMERIC(36,2))) ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ' WHEN @DATA_TYPE IN ('DateTime', 'SmallDateTime') THEN + ' ,NULL ,NULL ,NULL ,MAX(' + QUOTENAME(@COLUMN_NAME) + ') ,MIN(' + QUOTENAME(@COLUMN_NAME) + ') ,(SELECT COUNT (*) from ' + QUOTENAME(@TABLE_SCHEMA) + '.' + QUOTENAME(@TABLE_NAME) + ' WHERE (CONVERT(NUMERIC(20,12), ' + QUOTENAME(@COLUMN_NAME) + ' ) - FLOOR(CONVERT(NUMERIC(20,12), ' + QUOTENAME(@COLUMN_NAME) + ')) <> 0)) ,(SELECT COUNT (*) from ' + QUOTENAME(@TABLE_SCHEMA) + '.' + QUOTENAME(@TABLE_NAME) + ' WHERE DATEPART(ss,' + QUOTENAME(@COLUMN_NAME) + ') <> 0 OR DATEPART(mcs,' + QUOTENAME(@COLUMN_NAME) + ') <> 0) ,NULL ,NULL ' WHEN @DATA_TYPE IN ('DateTime2') THEN + ' ,NULL ,NULL ,NULL ,MAX(' + QUOTENAME(@COLUMN_NAME) + ') ,MIN(' + QUOTENAME(@COLUMN_NAME) + ') ,NULL ,NULL ,NULL ,NULL ' WHEN @DATA_TYPE IN ('Date') THEN + ' ,NULL ,NULL ,NULL ,MAX(' + QUOTENAME(@COLUMN_NAME) + ') ,MIN(' + QUOTENAME(@COLUMN_NAME) + ') ,NULL ,NLL ,NULL ,NULL ' WHEN @DATA_TYPE IN ('xml') THEN + ' ,MAX(LEN(CAST(' + QUOTENAME(@COLUMN_NAME) + ' AS NVARCHAR(MAX)))) ,MIN(LEN(CAST(' + QUOTENAME(@COLUMN_NAME) + ' AS NVARCHAR(MAX)))) ,AVG(LEN(CAST(' + QUOTENAME(@COLUMN_NAME) + ' AS NVARCHAR(MAX)))) ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ' WHEN @DATA_TYPE IN ('varbinary','varchar','nvarchar') AND @CHARACTER_MAXIMUM_LENGTH = -1 THEN + ' ,MAX(LEN(' + QUOTENAME(@COLUMN_NAME) + ')) ,MIN(LEN(' + QUOTENAME(@COLUMN_NAME) + ')) ,AVG(LEN(' + QUOTENAME(@COLUMN_NAME) + ')) ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ' WHEN @DATA_TYPE IN ('binary') THEN + ' ,MAX(LEN(' + QUOTENAME(@COLUMN_NAME) + ')) ,MIN(LEN(' + QUOTENAME(@COLUMN_NAME) + ')) ,AVG(LEN(' + QUOTENAME(@COLUMN_NAME) + ')) ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ' WHEN @DATA_TYPE IN ('time') THEN + ' ,NULL ,NULL ,NULL ,MAX(' + QUOTENAME(@COLUMN_NAME) + ') ,MIN(' + QUOTENAME(@COLUMN_NAME) + ') ,NULL ,NULL ,NULL ,NULL ' ELSE + ' ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ' END + ' ,(SELECT COUNT(*) FROM ' + QUOTENAME(@TABLE_SCHEMA) + '.' + QUOTENAME(@TABLE_NAME) + ' WHERE ' + QUOTENAME(@COLUMN_NAME) + ' IS NULL)' + CASE WHEN @DATA_TYPE IN ('varchar', 'nvarchar', 'char', 'nchar') THEN + ' ,(SELECT COUNT(*) FROM ' + QUOTENAME(@TABLE_SCHEMA) + '.' + QUOTENAME(@TABLE_NAME) + ' WHERE LEN(LTRIM(RTRIM(' + QUOTENAME(@COLUMN_NAME) + '))) = '''')' ELSE + ' ,NULL' END + ' ,(SELECT COUNT(DISTINCT ' + QUOTENAME(@COLUMN_NAME) + ') FROM ' + QUOTENAME(@TABLE_SCHEMA) + '.' + QUOTENAME(@TABLE_NAME) + ' WHERE ' + QUOTENAME(@COLUMN_NAME) + ' IS NOT NULL ) FROM ' + QUOTENAME(@TABLE_SCHEMA) + '.' + QUOTENAME(@TABLE_NAME) + ' WITH (NOLOCK) UNION' FETCH NEXT FROM SQLMETADATA_CUR INTO @COLUMN_NAME, @CHARACTER_MAXIMUM_LENGTH, @DATA_TYPE END CLOSE SQLMETADATA_CUR DEALLOCATE SQLMETADATA_CUR SET @SQLMETADATA = LEFT(@SQLMETADATA, LEN(@SQLMETADATA) -5) EXEC (@SQLMETADATA) ----------------------------------------------------------------------- -- Final Calculations ----------------------------------------------------------------------- -- Indicate Foreign Keys ; WITH FK_CTE (FKColumnName) AS ( SELECT DISTINCT CU.COLUMN_NAME FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS TC INNER JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE CU ON TC.CONSTRAINT_NAME = CU.CONSTRAINT_NAME AND TC.TABLE_SCHEMA = CU.TABLE_SCHEMA AND TC.TABLE_NAME = CU.TABLE_NAME AND TC.TABLE_SCHEMA = @TABLE_SCHEMA AND TC.TABLE_NAME = @TABLE_NAME AND CONSTRAINT_TYPE = 'FOREIGN KEY' ) UPDATE P SET P.IsFK = 1 FROM #ProfileData P INNER JOIN FK_CTE CTE ON P.COLUMN_NAME = CTE.FKColumnName -- Calculate percentages UPDATE #ProfileData SET PercentageNulls = (NoNulls / @ROWCOUNT) * 100 ,PercentageZeroLength = (NoZeroLength / @ROWCOUNT) * 100 -- Add any comments -- Datatype suggestions -- First get number of records where a variation could be an anomaly SET @DataTypeVariance = ROUND((@ROWCOUNT * @DataTypePercentage) / 100, 0) UPDATE #ProfileData SET DataTypeComments = 'Possibly could be one of the DATE types. ' WHERE NoIsDate BETWEEN (@ROWCOUNT -@DataTypeVariance) AND (@ROWCOUNT + @DataTypeVariance) AND DataType IN ('varchar', 'nvarchar', 'char', 'nchar') UPDATE #ProfileData SET DataTypeComments = 'Possibly could be one of the NUMERIC types. ' WHERE NoIsNumeric BETWEEN (@ROWCOUNT -@DataTypeVariance) AND (@ROWCOUNT + @DataTypeVariance) AND DataType IN ('varchar', 'nvarchar', 'char', 'nchar') UPDATE #ProfileData SET DataTypeComments = 'Possibly could be INT type. ' WHERE MinDataLength >= -2147483648 AND MaxDataLength <= 2147483648 AND DataType IN ('bigint') UPDATE #ProfileData SET DataTypeComments = 'Possibly could be SMALLINT type. ' WHERE MinDataLength >= -32768 AND MaxDataLength <= 32767 AND DataType IN ('bigint','int') UPDATE #ProfileData SET DataTypeComments = 'Possibly could be TINYINT type. ' WHERE MinDataLength >= 0 AND MaxDataLength <= 255 AND DataType IN ('bigint','int','smallint') UPDATE #ProfileData SET DataTypeComments = 'Possibly could be SMALLDATE type. ' WHERE NoDateWithSecond = 0 AND MinDate >= '19000101' AND MaxDate <= '20790606' AND DataType IN ('datetime','datetime2') UPDATE #ProfileData SET DataTypeComments = 'Possibly could be DATE type (SQL Server 2008 only). ' WHERE NoDateWithHourminuteSecond = 0 AND DataType IN ('datetime','datetime2') UPDATE #ProfileData SET DataTypeComments = 'Possibly could be DATETIME type. ' WHERE MinDate >= '17530101' AND MaxDate <= '99991231' AND DataType IN ('datetime2') -- Empty column suggestions UPDATE #ProfileData SET DataTypeComments = ISNULL(DataTypeComments,'') + 'Seems empty - is it required? ' WHERE (PercentageNulls = 100 OR PercentageZeroLength = 100) AND IsFK = 0 -- Null column suggestions UPDATE #ProfileData SET DataTypeComments = ISNULL(DataTypeComments,'') + 'There is a large percentage of NULLs - attention may be required. ' WHERE PercentageNulls >= @NullBoundaryPercent -- Distinct value suggestions UPDATE #ProfileData SET DataTypeComments = ISNULL(DataTypeComments,'') + 'Few distinct elements - potential for reference/lookup table (contains NULLs).' WHERE NoDistinct < @DistinctValuesMinimum AND @ROWCOUNT > @DistinctValuesMinimum AND IsFK = 0 AND PercentageNulls <> 100 AND NoNulls <> 0 -- FK suggestions UPDATE #ProfileData SET DataTypeComments = ISNULL(DataTypeComments,'') + 'Few distinct elements - potential for Foreign Key.' WHERE NoDistinct < @DistinctValuesMinimum AND @ROWCOUNT > @DistinctValuesMinimum AND IsFK = 0 AND NoNulls = 0 AND DataType NOT LIKE '%Date%' AND DataType <> 'Time' -- Filestream suggestions UPDATE #ProfileData SET DataTypeComments = 'Possibly a good candidate for FILESTREAM (SQL Server 2008 only).' WHERE AvgDataLength >= 1000000 AND DataType IN ('varbinary') AND ColumnDataLength = -1 UPDATE #ProfileData SET DataTypeComments = 'Possibly not a good candidate for FILESTREAM (SQL Server 2008 only).' WHERE AvgDataLength < 1000000 AND DataType IN ('varbinary') AND ColumnDataLength = -1 -- Sparse Column Suggestions IF OBJECT_ID('tempdb..#SparseThresholds') IS NOT NULL DROP TABLE tempdb..#SparseThresholds; CREATE TABLE #SparseThresholds (DataType VARCHAR(128), Threshold NUMERIC(9,4)) INSERT INTO #SparseThresholds (DataType, Threshold) VALUES ('tinyint',86), ('smallint',76), ('int',64), ('bigint',52), ('real',64), ('float',52), ('money',64), ('smallmoney',64), ('smalldatetime',52), ('datetime',52), ('uniqueidentifier',43), ('date',69), ('datetime2',52), ('decimal',42), ('nuumeric',42), ('char',60), ('varchar',60), ('nchar',60), ('nvarchar',60), ('binary',60), ('varbinary',60), ('xml',60) ; WITH Sparse_CTE (COLUMN_NAME, SparseComment) AS ( SELECT P.COLUMN_NAME ,CASE WHEN P.PercentageNulls >= T.Threshold THEN 'Could benefit from sparse columns. ' ELSE '' END AS SparseComment FROM #ProfileData P INNER JOIN #SparseThresholds T ON P.DataType = T.DataType ) UPDATE PT SET PT.DataTypeComments = CASE WHEN PT.DataTypeComments IS NULL THEN CTE.SparseComment ELSE ISNULL(PT.DataTypeComments,'') + CTE.SparseComment + '. ' END FROM #ProfileData PT INNER JOIN Sparse_CTE CTE ON PT.COLUMN_NAME = CTE.COLUMN_NAME ----------------------------------------------------------------------- -- Optional advanced analysis ----------------------------------------------------------------------- IF @AdvancedAnalysis = 1 BEGIN ----------------------------------------------------------------------- -- Data at data boundaries ----------------------------------------------------------------------- IF OBJECT_ID('tempdb..#LimitTest') IS NOT NULL DROP TABLE tempdb..#LimitTest; CREATE TABLE #LimitTest (COLUMN_NAME VARCHAR(128), NoAtLimit BIGINT); DECLARE @advancedtestSQL VARCHAR(MAX) = 'INSERT INTO #LimitTest (COLUMN_NAME, NoAtLimit)' + CHAR(13) SELECT @advancedtestSQL = @advancedtestSQL + 'SELECT '''+ COLUMN_NAME + ''', COUNT('+ COLUMN_NAME + ') FROM ' + @TABLE_SCHEMA + '.' + @TABLE_NAME + CASE WHEN DataType IN ('numeric', 'int', 'bigint', 'tinyint', 'smallint', 'decimal', 'money', 'smallmoney', 'float','real') THEN ' WHERE '+ COLUMN_NAME + ' = ' + CAST(ISNULL(MaxDataLength,0) AS VARCHAR(40)) + ' OR '+ COLUMN_NAME + ' = ' + CAST(ISNULL(MinDataLength,0) AS VARCHAR(40)) + CHAR(13) + ' UNION' + CHAR(13) ELSE ' WHERE LEN('+ COLUMN_NAME + ') = ' + CAST(ISNULL(MaxDataLength,0) AS VARCHAR(40)) + ' OR LEN('+ COLUMN_NAME + ') = ' + CAST(ISNULL(MinDataLength,0) AS VARCHAR(40)) + CHAR(13) + ' UNION' + CHAR(13) END FROM #ProfileData WHERE DataType IN ('numeric', 'int', 'bigint', 'tinyint', 'smallint', 'decimal', 'money', 'smallmoney', 'float','real','varchar', 'nvarchar', 'char', 'nchar', 'binary') SET @advancedtestSQL = LEFT(@advancedtestSQL,LEN(@advancedtestSQL) -6) EXEC (@advancedtestSQL) UPDATE M SET M.NoAtLimit = T.NoAtLimit ,M.DataTypeComments = CASE WHEN CAST(T.NoAtLimit AS NUMERIC(36,2)) / CAST(@ROWCOUNT AS NUMERIC(36,2)) >= @BoundaryPercent THEN ISNULL(M.DataTypeComments,'') + 'Large numbers of data elements at the max/minvalues. ' ELSE M.DataTypeComments END FROM #ProfileData M INNER JOIN #LimitTest T ON M.COLUMN_NAME = T.COLUMN_NAME ----------------------------------------------------------------------- -- Domain analysis ----------------------------------------------------------------------- IF OBJECT_ID('tempdb..#DomainAnalysis') IS NOT NULL DROP TABLE tempdb..#DomainAnalysis; CREATE TABLE #DomainAnalysis ( DomainName NVARCHAR(128) ,DomainElement NVARCHAR(4000) ,DomainCounter BIGINT ,DomainPercent NUMERIC(7,4) ); DECLARE @DOMAINSQL VARCHAR(MAX) = 'INSERT INTO #DomainAnalysis (DomainName, DomainElement, DomainCounter) ' DECLARE SQLDOMAIN_CUR CURSOR LOCAL FAST_FORWARD FOR SELECT COLUMN_NAME, DataType FROM #ProfileData WHERE NoDistinct < @DistinctValuesMinimum OPEN SQLDOMAIN_CUR FETCH NEXT FROM SQLDOMAIN_CUR INTO @COLUMN_NAME, @DATA_TYPE WHILE @@FETCH_STATUS = 0 BEGIN SET @DOMAINSQL = @DOMAINSQL + 'SELECT ''' + @COLUMN_NAME + ''' AS DomainName, CAST( '+ @COLUMN_NAME + ' AS VARCHAR(4000)) AS DomainElement, COUNT(ISNULL(CAST(' + @COLUMN_NAME + ' AS NVARCHAR(MAX)),'''')) AS DomainCounter FROM ' + @TABLE_SCHEMA + '.' + @TABLE_NAME + ' GROUP BY ' + @COLUMN_NAME + '' + ' UNION ' FETCH NEXT FROM SQLDOMAIN_CUR INTO @COLUMN_NAME, @DATA_TYPE END CLOSE SQLDOMAIN_CUR DEALLOCATE SQLDOMAIN_CUR SET @DOMAINSQL = LEFT(@DOMAINSQL, LEN(@DOMAINSQL) -5) + ' ORDER BY DomainName ASC, DomainCounter DESC ' EXEC (@DOMAINSQL) -- Now calculate percentages (this appraoch is faster than doing it when performing the domain analysis) ; WITH DomainCounter_CTE (DomainName, DomainCounterTotal) AS ( SELECT DomainName, SUM(ISNULL(DomainCounter,0)) AS DomainCounterTotal FROM #DomainAnalysis GROUP BY DomainName ) UPDATE D SET D.DomainPercent = (CAST(D.DomainCounter AS NUMERIC(36,4)) / CAST(CTE.DomainCounterTotal AS NUMERIC(36,4))) * 100 FROM #DomainAnalysis D INNER JOIN DomainCounter_CTE CTE ON D.DomainName = CTE.DomainName WHERE D.DomainCounter <> 0 END -- Advanced analysis ----------------------------------------------------------------------- -- Output results from the profile and domain data tables ----------------------------------------------------------------------- select * from #ProfileData IF @AdvancedAnalysis = 1 BEGIN select * from #DomainAnalysis END END TRY BEGIN CATCH SELECT ERROR_NUMBER() AS ErrorNumber ,ERROR_SEVERITY() AS ErrorSeverity ,ERROR_STATE() AS ErrorState ,ERROR_PROCEDURE() AS ErrorProcedure ,ERROR_LINE() AS ErrorLine ,ERROR_MESSAGE() AS ErrorMessage; END CATCH
Preview:
downloadDownload PNG
downloadDownload JPEG
downloadDownload SVG
Tip: You can change the style, width & colours of the snippet with the inspect tool before clicking Download!
Click to optimize width for Twitter