Based on comments on numerous Blog Postings wanted to take a look at Denzil Riberio’s script for Identifying Column Store Indexes ripe for defragging.
Denzil is MSFT and here is his blog post :
- SQL 2014 Clustered Columnstore index rebuild and maintenance considerations
Published On :- 2015-July-8th
use master go if object_id('[dbo].[sp_DenzilRibeiro_columnStoreIndexDefrag]') is null begin exec('create procedure [dbo].[sp_DenzilRibeiro_columnStoreIndexDefrag] as ') end go alter procedure [dbo].[sp_DenzilRibeiro_columnStoreIndexDefrag] ( @debug bit = 0 , @deletedRowsPercent int = 10 , @rowgroupQualityNumberofRows int = 500000 , @scriptOnly bit = 1 ) as begin set nocount on; set XACT_ABORT on; /* The sample scripts are not supported under any Microsoft standard support program or service and are intented as a supplement to online documentation.The sample scripts are provided AS IS without warranty of any kind either expressed or implied. Microsoft further disclaims all implied warranties including, without limitation, any implied warranties of merchantability or of fitness for a particular purpose. */ /* Rebuild index statement is printed at partition level if a. RGQualityMeasure is not met for @PercentageRGQualityPassed Rowgroups i) this is an arbitrary number, what we are saying is that if the average is above this number, don't bother rebuilding as we consider this number to be good quality rowgroups ii) Second constraint is the Deleted rows, currently the default that is set am setting is 10% of the partition itself. If the partition is very large or small consider adjusting this c. In SQL 2014, post index rebuild,the dmv doesn't show why the RG is trimmed to < 1 million in this case in SQL 2014. i) If the Dictionary is full ( 16MB) then no use in rebuilding this rowgroup as even after rebuild it may get trimmed ii) If dictionary is full only rebuild if deleted rows falls above the threshold */ if object_id('tempdb..#temp') IS NOT NULL begin drop table #temp end --Declare @DeletedRowsPercent Decimal(5,2) -- Debug = 1 if you need all rowgroup information regardless --Declare @Debug int =0 --Percent of deleted rows for the partition --Set @DeletedRowsPercent = 10 --RGQuality means we are saying anything over 500K compressed is good row group quality, anything less need to re-evaluate. --Declare @RGQuality int = 500000 -- means 50% of rowgroups are < @RGQUality from the rows/rowgroup perspective --Declare @PercentageRGQualityPassed smallint = 20 Declare @PercentageRGQualityPassed smallint --= 20 --— Maxdop Hint optionally added to ensure we don't spread small amount of rows accross many threads -- IF we do that, we may end up with smaller rowgroups anyways. declare @maxdophint smallint declare @effectivedop smallint declare @iNumberofRecords int declare @iRecordumber int declare @command nvarchar(4000) set @PercentageRGQualityPassed = 20 ;WITH CSAnalysis ( [object_id] , TableName , SchemaName , index_id , indexName , partition_number , CountRGs , TotalRows , AvgRowsPerRG , CountRGLessThanQualityMeasure , RGQualityMeasure , PercentageRGLessThanQualityMeasure , DeletedRowsPercent , NumRowgroupsWithDeletedRows ) AS ( SELECT rg.object_id , object_name(rg.object_id) as TableName , SCHEMA_NAME(t.schema_id) AS SchemaName , rg.index_id , indexName = tblSI.[name] , rg.partition_number , COUNT(*) as CountRGs , SUM(total_rows) as TotalRows , AVG(total_rows) as AvgRowsPerRG , CountRGLessThanQualityMeasure = SUM( CASE --WHEN rg.Total_Rows <@RGQuality THEN 1 WHEN rg.total_rows < @rowgroupQualityNumberofRows then 1 ELSE 0 END ) --as CountRGLessThanQualityMeasure --, @RGQuality as RGQualityMeasure , RGQualityMeasure = @rowgroupQualityNumberofRows , PercentageRGLessThanQualityMeasure = cast ( ( SUM ( CASE --WHEN rg.Total_Rows <@RGQuality THEN 1.0 ELSE 0 WHEN rg.Total_Rows <@rowgroupQualityNumberofRows THEN 1.0 ELSE 0 END ) /count(*) *100 ) as Decimal(5,2) ) -- PercentageRGLessThanQualityMeasure , [DeletedRowsPercent] = Sum ( rg.deleted_rows * 1.0 ) /sum (rg.total_rows *1.0) *100 --as 'DeletedRowsPercent' , [NumRowgroupsWithDeletedRows] = sum ( case when rg.deleted_rows >0 then 1 else 0 end ) --as 'NumRowgroupsWithDeletedRows' FROM sys.column_store_row_groups rg JOIN sys.tables t ON rg.object_id = t.object_id JOIN sys.indexes tblSI ON rg.object_id = tblSI.object_id and rg.index_id = tblSI.index_id where rg.state = 3 group by rg.object_id , t.schema_id , rg.index_id , tblSI.[name] , rg.partition_number ) , CSDictionaries ( maxdictionarysize -- int , maxdictionaryentrycount -- int , maxpartition_number , [object_id] -- int , [index_id] , partition_number -- int ) AS ( select max(dict.on_disk_size) as maxdictionarysize , max(dict.entry_count) as maxdictionaryentrycount , max(partition_number) as maxpartition_number , part.object_id , part.index_id , part.partition_number from sys.column_store_dictionaries dict join sys.partitions part on dict.hobt_id = part.hobt_id group by part.object_id , part.index_id , part.partition_number ) select [rowNumber] = rank() over ( order by a.SchemaName , a.TableName , a.index_id , a.partition_number ) , a.* , b.maxdictionarysize , b.maxdictionaryentrycount , maxpartition_number , [command] = cast(null as nvarchar(4000)) , [identified] = cast(0 as bit) into #temp from CSAnalysis a inner join CSDictionaries b on a.object_id = b.object_id and a.index_id = b.index_id and a.partition_number = b.partition_number --— True if running from the same context that will run the rebuild index. select @effectivedop=effective_max_dop from sys.dm_resource_governor_workload_groups where group_id in ( select group_id from sys.dm_exec_requests where session_id = @@spid ) --— Get the Alter Index Statements. update a set a.[command] = 'ALTER INDEX ' + QuoteName(a.IndexName) + ' ON ' + QUOTENAME(a.SchemaName) + '.' + QuoteName(a.TableName) + ' REBUILD ' + Case when maxpartition_number = 1 THEN ' ' else ' PARTITION = ' + cast(partition_number as varchar(10)) End + ' WITH (MAXDOP =' + cast( ( Case WHEN (TotalRows*1.0/1048576) < 1.0 THEN 1 WHEN (TotalRows*1.0/1048576) < @effectivedop THEN FLOOR(TotalRows*1.0/1048576) ELSE 0 END ) as varchar(10)) + ')' --as Command + ' -- comment ' + ' deletedRow% is ' + cast(DeletedRowsPercent as varchar(30)) + ' rowgroupQualityNumberofRows is ' + cast(AvgRowsPerRG as varchar(30)) , a.[identified] = 1 from #temp a where ( ( DeletedRowsPercent >= @DeletedRowsPercent) -- Rowgroup Quality trigger, percentage less than rowgroup quality as long as dictionary is not full OR ( ( ( --( AvgRowsPerRG < @RGQuality ) ( AvgRowsPerRG < @rowgroupQualityNumberofRows) --and ( TotalRows > @RGQuality) and ( TotalRows > @rowgroupQualityNumberofRows) ) AND ( ( PercentageRGLessThanQualityMeasure >= @PercentageRGQualityPassed ) ) ) ---- DictionaryNotFull, lower threshold than 16MB. AND maxdictionarysize < ( 16*1000*1000) ) ) -- Add logic to actually run those statements set @iRecordumber = 1 set @iNumberofRecords = ( select max([rowNumber]) from #temp) while (@iRecordumber<= @iNumberofRecords) begin select @command = [command] from #temp where [rowNumber] = 1 and [identified] = 1 if (@command is not null) begin print @command if (@scriptOnly = 0) begin exec(@command) end end set @iRecordumber = @iRecordumber + 1 end -- Debug print if needed if @debug=1 begin Select * from #temp order by TableName , index_id , partition_number end else begin -- Deleted rows trigger Select * from #temp where ( DeletedRowsPercent >= @DeletedRowsPercent) end end go exec sys.sp_MS_marksystemobject '[dbo].[sp_DenzilRibeiro_columnStoreIndexDefrag]' go
use [WideWorldImportersDW] go declare @debug bit declare @deletedRowsPercent int declare @rowgroupQualityNumberofRows int set @debug = 1 set @deletedRowsPercent = 10 set @rowgroupQualityNumberofRows = 500000 exec [dbo].[sp_DenzilRibeiro_columnStoreIndexDefrag] @debug = @debug , @deletedRowsPercent = @deletedRowsPercent , @rowgroupQualityNumberofRows = @rowgroupQualityNumberofRows
Source Code Control
Placed in Github here
Nothing new here.
The whole point is to avail on Github so that it will be easier to digest and learn from.