oracle-samples · nigelbayliss · Jun 18, 2020 · Jun 18, 2020 · Jun 18, 2020 · May 25, 2021
diff --git a/optimizer/column_groups/README.md b/optimizer/column_groups/README.md
@@ -0,0 +1,63 @@
+# Introduction
+
+How to detect the potential need for column groups (and create them automatically).
+
+These scripts are aimed and detecting the need for column groups to support a specific query rather than an entire workload. Nevertheless, if a table has columns with correlated values, and a column group is created, then this has the potential to help many queries.
+
+There are multiple approaches proposed here:
+
+1. Use EXPLAIN PLAN to parse a test query while column usage seeding is enabled 
+2. Use columnm usage seeding via a SQL tuning set
+3. Query all rows (or a sample of rows) for a given table and look for column value correlations
+
+# Overheads
+
+Checking for column value correlation for a table by scanning rows can be time-consuming (the "corr" scripts). You should expect this approach to take a long time on large tables. The runtime can be reduced by sampling a proportion of table rows. For this reason, a sample size parameter is provided (it is a decimal value >0 and <100).
+
+Column usage seeding has a small overhead and in some of the "cg" examples, a system-wide setting is used for a few seconds. Therefore, you should not use it on a production system running under high load. Also, bear in mind that scanning a large percentage of rows in a large table will have some overhead too (the "corr" scripts). For this reason, you may want to start with a small sample and build up.
+
+# Demos
+
+There are four demos:
+<pre>
+ SQL> -- Use column usage tracking to identify useful column groups
+ SQL> @run_test1
+ SQL> -- Scan tables and look for column value correlation
+ SQL> @run_test2
+ SQL> -- Use column usage tracking to identify useful column groups - for queries in a SQL tuning set
+ SQL> @run_test3
+ SQL> -- Introspect a SQL tuning set and scan the tables accessed ny the STS queries to look for column value correlations
+ SQL> @run_test4
+</pre>
+
+It may be worth taking a closer look at "t4.sql" because it shows how you can "hide" statistics until you are ready to expose them to the workload. In this way you can make changes and test your queries before implementation. 
+
+The demos make use of easy-to-use utility scripts. Here are some examples
+<pre>
+ SQL> -- Immediately create column groups that will benefit SQL ID "7kjpawwbyh1bz" (query must be in cursor cache) - uses column usage tracking
+ SQL> @cg_from_sqlid 7kjpawwbyh1bz y
+ SQL> -- Immediately create column groups for SQL statements in a SQL tuning set - uses column usage tracking
+ SQL> @@cg_from_sts name_of_sql_tuning_set y
+ SQL> -- Immediately create column groups that will benefit SQL ID "7kjpawwbyh1bz" by sampling 100% of rows in tables accessed by query. The query must be in cursor cache.
+ SQL> @@corr_from_sqlid 7kjpawwbyh1bz 100 y
+ SQL> -- Immediately create column groups for correlated columns on table current_user.TAB1 by sampling 10% of rows
+ SQL> @@corr_from_table user tab1 10 y
+ SQL> -- Immediately create column groups for correlated columns on tables accessed in the SQL tuning set "my_sql_tuning_set" - sample 10% of rows
+ SQL> @@corr_from_sts my_sql_tuning_set 10 y
+ SQL> -- Output a runnable SQL script that can be used to create column groups for correlated columns on tables accessed in the SQL tuning set "my_sql_tuning_set" - sample 50% of rows
+ SQL> @@corr_from_sts my_sql_tuning_set 50 n
+</pre>
+
+# Limitations
+
+The "sqlid" scripts rely on "execute immediate explain plan..." and this will not work for queries that exceed the maximum VARCHAR2 length. For cases like this, capture the relevant query in a SQL tuning set and then use the "sts" scripts provided. Check out "load_sqlset.sql" for an example.
+
+
+# Disclaimer
+
+ <br/>-- These scripts are provided for educational purposes only.
+ <br/>-- They are NOT supported by Oracle World Wide Technical Support.
+ <br/>-- The scripts have been tested and they appear to work as intended.
+ <br/>-- You should always run scripts on a test instance.
+
+
diff --git a/optimizer/column_groups/cg_from_plan.sql b/optimizer/column_groups/cg_from_plan.sql
@@ -0,0 +1,52 @@
+--
+-- Creates column groups for a parsed SQL statement
+-- where the plan is available in the PLAN_TABLE
+-- Parameter:
+-- Y/N where Y - will create the column groups immediately
+-- N - will print the column group creation script only 
+--
+set long 100000
+var create_now varchar2(1)
+
+exec dbms_stats.flush_database_monitoring_info;
+exec dbms_lock.sleep(2)
+
+exec select decode(nvl(upper('&1'),'N'),'N','N','Y') into :create_now from dual;
+
+set serveroutput on
+declare 
+ r clob;
+ cursor c1 is
+ select distinct statement_id,object_name,object_owner
+ from plan_table
+ where object_type = 'TABLE'
+ and timestamp = (select max(timestamp) from plan_table)
+ order by object_name;
+begin
+ for rec in c1
+ loop
+ r := dbms_stats.report_col_usage(rec.object_owner,rec.object_name) ;
+ dbms_output.put_line('-- ===========================================================');
+ dbms_output.put_line('-- Table Name : '||rec.object_name);
+ dbms_output.put_line('/*');
+ dbms_output.put_line(r);
+ dbms_output.put_line('*/');
+ if :create_now = 'Y'
+ then
+ select dbms_stats.create_extended_stats(rec.object_owner,rec.object_name) into r from dual;
+ dbms_output.put_line(r);
+ else
+ dbms_output.put_line('select dbms_stats.create_extended_stats('''||rec.object_owner||''','''||rec.object_name||''') es from dual;');
+ end if;
+ end loop;
+ dbms_output.put_line('-- === Stats need to be regathered on the following tables');
+ for rec in c1
+ loop
+ -- In theory, we could go ahead and gather stats here rather than just reporting the need.
+ -- In addition, we could choose to create the new stats unpublished if we wanted
+ -- to temporariy hide the change from the workload.
+ dbms_output.put_line('exec dbms_stats.gather_table_stats('''||rec.object_owner||''','''||rec.object_name||''')');
+ end loop;
+end;
+/
+set serveroutput off
diff --git a/optimizer/column_groups/cg_from_sqlid.sql b/optimizer/column_groups/cg_from_sqlid.sql
@@ -0,0 +1,22 @@
+--
+-- For a given SQL ID, parse and create column groups
+-- Parameters:
+-- SQL ID
+-- Y/N - where Y - create column groups immediately
+-- N - spool a reation script to verify and run later
+--
+set long 100000
+
+--
+-- This assumes that the parse will complete within 10 seconds
+--
+exec dbms_stats.seed_col_usage(null,null,10)
+exec dbms_lock.sleep(1)
+--
+-- Parse the relevant SQL statement
+--
+@@explain &1
+--
+-- Create the column groups
+--
+@@cg_from_plan &2
diff --git a/optimizer/column_groups/cg_from_sts.sql b/optimizer/column_groups/cg_from_sts.sql
@@ -0,0 +1,53 @@
+--
+-- Creates column groups for a parsed SQL statement
+-- where the plan is available in the PLAN_TABLE
+-- Parameter:
+-- Y/N where Y - will create the column groups immediately
+-- N - will print the column group creation script only 
+--
+set long 100000
+set feedback off
+var create_now varchar2(1)
+var sqlset varchar2(100)
+
+exec select '&1' into :sqlset from dual;
+exec select decode(nvl(upper('&2'),'N'),'N','N','Y') into :create_now from dual;
+
+set serveroutput on
+declare 
+ time_limit_sec number := 30;
+ cursor c1 is
+ select distinct object_owner owner, object_name table_name 
+ from user_sqlset_plans
+ where object_type = 'TABLE' 
+ and sqlset_name = :sqlset
+ order by object_owner,object_name;
+ r clob;
+begin
+ dbms_stats.seed_col_usage(:sqlset,user,time_limit_sec);
+ dbms_stats.flush_database_monitoring_info;
+
+ for rec in c1
+ loop
+ r := dbms_stats.report_col_usage(rec.owner,rec.table_name) ;
+ dbms_output.put_line('-- ===========================================================');
+ dbms_output.put_line('-- Table Name : '||rec.table_name);
+ dbms_output.put_line('/*');
+ dbms_output.put_line(r);
+ dbms_output.put_line('*/');
+ if :create_now = 'Y'
+ then
+ select dbms_stats.create_extended_stats(rec.owner,rec.table_name) into r from dual;
+ dbms_output.put_line('Extension created: '||r);
+ else
+ dbms_output.put_line('select dbms_stats.create_extended_stats('''||rec.owner||''','''||rec.table_name||''') es from dual;');
+ end if;
+ end loop;
+ dbms_output.put_line('-- === Stats need to be regathered on the following tables');
+ for rec in c1
+ loop
+ dbms_output.put_line('exec dbms_stats.gather_table_stats('''||rec.owner||''','''||rec.table_name||''')');
+ end loop;
+end;
+/
+set serveroutput off
diff --git a/optimizer/column_groups/cols.sql b/optimizer/column_groups/cols.sql
@@ -0,0 +1,2 @@
+select table_name,column_name,num_distinct from user_tab_col_statistics columns where table_name like '%CORR%'
+/
diff --git a/optimizer/column_groups/corr_from_plan.sql b/optimizer/column_groups/corr_from_plan.sql
@@ -0,0 +1,155 @@
+--
+--
+-- Scan tables to look for column values that correlate
+-- Only 2-column combinations are checked
+-- Correlation is set to 80% - an arbitary figure
+-- Data types limited
+-- Only columns with shorter strings compared 
+-- Columns checked must have a 'similar' number of distinct values (NDVs must not differ by 2X)
+-- A sample of rows can be used to speed up execution time - which can be substantial
+--
+-- Parameters:
+-- Sample percentage
+-- Y/N where Y - will create the column groups immediately
+-- N - will print the column group creation script only
+--
+var create_now varchar2(1)
+set echo off
+column tab_owner format a20
+column tab_name format a20
+set linesize 250
+set trims on
+set pagesize 10000
+set feedback off
+
+var tabname varchar2(100)
+var ownname varchar2(100)
+var samp number
+
+exec select '&1' into :samp from dual;
+exec select decode(nvl(upper('&2'),'N'),'N','N','Y') into :create_now from dual;
+
+set serveroutput on
+
+--
+-- Look for column value correlation
+--
+declare
+ --
+ -- Columns must correlate >0.8 to get a column group (this value is chosen arbitrarily and can be adjusted) 
+ --
+ minimum_correlation number(6,5) := 0.8;
+
+ cname1 varchar2(200);
+ cname2 varchar2(200);
+
+ cursor tabsc is
+ select distinct object_name,object_owner
+ from plan_table
+ where object_type = 'TABLE'
+ and timestamp = (select max(timestamp) from plan_table)
+ order by object_name;
+
+ cursor extc is
+ select count(*)
+ from dba_stat_extensions
+ where table_name = :tabname
+ and owner = :ownname
+ and extension like '%'||cname1||'%'
+ and extension like '%'||cname2||'%';
+
+ cursor ext is
+ select extension_name,extension,rownum r
+ from dba_stat_extensions
+ where table_name = :tabname
+ and owner = :ownname;
+ --
+ -- To reduce the number of column combinations checked, we will only check
+ -- column pairs that have similar NDV - so some NULL cases will be missed.
+ -- There is also an assumption that longer strings are rarely used in comparison
+ --
+ cursor c1 is
+ with w as (
+ select column_name, num_distinct
+ from dba_tab_columns
+ where table_name = :tabname
+ and owner = :ownname
+ and num_distinct is not null
+ and num_distinct > 0
+ and ( data_type in ('DATE','NUMBER')
+ or (data_type = 'CHAR' and data_length <= 20)
+ or (data_type = 'VARCHAR2' and data_length <= 20)
+ or (data_type like 'TIMESTAMP%')))
+ select t1.column_name c1, t2.column_name c2
+ from w t1, w t2 /* , (select num_rows from dba_tables where owner = :ownname and table_name = :tabname) t */
+ where t1.column_name > t2.column_name
+ and greatest(t1.num_distinct,t2.num_distinct)/least(t1.num_distinct,t2.num_distinct)<2 /* Similar number of distinct values */
+ --and t1.num_distinct < t.num_rows/10 /* Perhaps eliminate sequenced columns? */
+ order by t1.column_name;
+ c number(6,5);
+ n number;
+ num_ext number;
+ r clob;
+begin
+ if :samp>=100
+ then
+ :samp := 99.9999;
+ end if;
+ dbms_output.put_line('column es format a100');
+
+ for tabs in tabsc
+ loop
+ :tabname := tabs.object_name;
+ :ownname := tabs.object_owner;
+
+ dbms_output.put_line('-- ');
+ dbms_output.put_line('-- ******* '||:tabname||' *******');
+ execute immediate 'select /*+ FULL */ count(*) from "'||:ownname||'"."'||:tabname||'" sample('||:samp||') ' into n;
+ dbms_output.put_line('-- ');
+ dbms_output.put_line('-- Row sample size (approx): '||n);
+ dbms_output.put_line('-- ');
+ dbms_output.put_line(' ');
+ for x in ext
+ loop
+ dbms_output.put_line('-- Existing extension '||x.extension||' '||x.extension_name);
+ end loop;
+ dbms_output.put_line(' ');
+
+ for x in c1
+ loop
+ execute immediate 'select corr(ora_hash("'||x.c1||'"),ora_hash("'||x.c2||'")) from "'||:ownname||'"."'||:tabname||'" sample('||:samp||')' into c;
+ if (c is not null and c > minimum_correlation)
+ then
+ dbms_output.put('-- '||x.c1 || ',' || x.c2 ||': good correlation = '||c);
+ cname1 := x.c1;
+ cname2 := x.c2;
+ open extc;
+ fetch extc into num_ext;
+ close extc;
+ if (num_ext>0)
+ then
+ dbms_output.put_line(' SKIPPING (covered already)');
+ else
+ dbms_output.put_line(' ');
+ if :create_now = 'Y'
+ then
+ select dbms_stats.create_extended_stats(:ownname,:tabname,'("'||x.c1||'","'||x.c2||'")') into r from dual;
+ dbms_output.put_line('Extension created: ' || r);
+ else
+ dbms_output.put_line('select dbms_stats.create_extended_stats(''"'||:ownname||'"'',''"'||:tabname||'"'',''("'||x.c1||'","'||x.c2||'")'') es from dual;');
+ end if;
+ end if;
+ else
+ if c is not null
+ then
+ dbms_output.put_line('-- '||x.c1 || ',' || x.c2 ||': poor correlation = '||c);
+ else
+ dbms_output.put_line('-- '||x.c1 || ',' || x.c2 ||': NULL correlation');
+ end if; 
+ end if;
+ end loop;
+ end loop;
+end;
+/
+
+set serveroutput off
diff --git a/optimizer/column_groups/corr_from_sqlid.sql b/optimizer/column_groups/corr_from_sqlid.sql
@@ -0,0 +1,17 @@
+--
+-- For a given SQL ID, parse and create column groups
+-- Parameters:
+-- SQL ID
+-- Table sample percentage
+-- Y/N - Yes to create column groups immediately
+--
+set long 100000
+
+--
+-- Parse the relevant SQL statement
+--
+@@explain &1
+--
+-- Create the column groups
+--
+@@corr_from_plan &2 &3
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		select table_name,column_name,num_distinct from user_tab_col_statistics columns where table_name like '%CORR%'
		/