JL Computer Consultancy
Trouble-shooting |
Mar 2001 / Sep 2007 |
In my one-day tutorial session on trouble-shooting, I have an example of some code I use to create a package that summarises the work done and time lost by current sessions.
Last week someone asked me if they could have a copy of the source code and I told them that it was on my website – and then discovered that it wasn’t. So here it is.
It’s a package I use from time to time when someone says “the system is slow but there are not specific problems”. I use it to take a snapshot over a short time period to see what’s going on as it often picks up groups of sessions with very similar profiles (cpu_time, idle_time, wait_time). The benefit of this is that sessions with similar profiles may be doing the same thing, so if you can make the behaviour of one session more efficient you’ve probably done something that helps a lot of sessions. (I also use v$sess_io for the same pattern-oriented approach).
The code below is as it was when I last used it on a production system. I’ve had one problem with this code an 8i system that was running with optimizer_mode = first_rows and 3,000 sessions (the query ran for about two minutes of solid CPU) and 9i seems to have an odd bug in the x$ksles structure that makes the query against v$session_event CPU-intensive. So test carefully before running on a production system.
For a completely different implementation of the same kind of thing, take a look at Tanel Poder’s Session Snapper.
rem
rem Script: snap_trouble.sql
rem Author: Jonathan Lewis
rem Dated: March 2001
rem Purpose: Package to get snapshot of CPU and Wait times by session
rem
rem Last tested:
rem 10.2.0.3
rem 9.2.0.8 CPU-intensive
rem 8.1.7.4
rem
rem Notes
rem Has the usual issue about session_event and the +1 bug in
rem 9.2.0.1/2 which was fixed in 9.2.0.3/4, so you have to
rem worry about choosing the code line to match your version
rem at the points tagged with the text "sid_bug"
rem
rem 9.2 seems to have an odd bug that makes part of the access
rem to v$session_event very slow. Most of the CPU time is spent
rem scanning x$ksles.
rem
rem Has to be run by SYS to create the package
rem
rem Usage:
rem set serveroutput on size 1000000 format wrapped
rem set linesize 120
rem set trimspool on
rem execute snap_trouble.start_snap
rem -- do something
rem execute snap_trouble.end_snap
rem
rem Warning -
rem For a system with a large number of active sessions,
rem this will be expensive to run because of the query
rem on v$session_event (which currently happens twice)
rem
rem Possible enhancements
rem Use the session startup time for each SID if
rem the end_snap has been called without a start_snap
rem
rem Rewrite the cost to access the x$ direct - it would
rem be more efficient.
rem
rem Pick up the statistic# from v$statname before
rem running the query for CPU used.
rem
create or replace package snap_trouble as
procedure start_snap;
procedure end_snap;
end;
/
create or replace package body snap_trouble as
cursor c1 is
select
sid,
wait_time,
idle_time,
cpu_time
from (
select
sid,
round(sum(wait_time)/100) wait_time,
round(sum(idle_time)/100) idle_time,
round(sum(cpu_time)/100) cpu_time
from (
select
/*+ ordered */
sid,
0 wait_time,
0 idle_time,
value cpu_time
from
v$statname sn,
V$sesstat st
where
sn.statistic# = st.statistic#
and sn.name like 'CPU used by this session'
union all
select
sid, time_waited, 0, 0
/* sid_bug */
-- sid + 1, time_waited, 0, 0
from v$session_event
where
time_waited != 0
and event not in (
'PL/SQL lock timer',
'SQL*Net message from client'
)
union all
select -- lazy, should use decodes.
sid, 0, time_waited, 0
/* sid_bug */
-- sid + 1, 0, time_waited, 0
from v$session_event
where
time_waited != 0
and event in (
'PL/SQL lock timer',
'SQL*Net message from client'
)
)
group by
sid
)
;
type w_type is table of c1%rowtype index by binary_integer;
w_list w_type;
w_empty_list w_type;
m_start_time date;
m_start_flag char(1);
m_end_time date;
procedure start_snap is
begin
m_start_time := sysdate;
m_start_flag := 'U';
w_list := w_empty_list;
for r in c1 loop
w_list(r.sid).wait_time := r.wait_time;
w_list(r.sid).idle_time := r.idle_time;
w_list(r.sid).cpu_time := r.cpu_time;
end loop;
end start_snap;
procedure end_snap is
begin
m_end_time := sysdate;
dbms_output.put_line('---------------------------------');
dbms_output.put_line('CPU vs. wait:- ' ||
to_char(m_end_time,'dd-Mon hh24:mi:ss')
);
if m_start_flag = 'U' then
dbms_output.put_line('Interval:- ' ||
trunc(86400 * (m_end_time - m_start_time)) ||
' seconds'
);
else
dbms_output.put_line('Since Startup:- ' ||
to_char(m_start_time,'dd-Mon hh24:mi:ss')
);
end if;
dbms_output.put_line('---------------------------------');
dbms_output.put_line(
rpad('SID',6) ||
lpad('CPU time (s)',16) ||
lpad('Wait time (s)',16) ||
lpad('Idle time (s)',16) ||
lpad('Total time',16) ||
lpad('PCT CPU',16) ||
lpad('PCT waits',16)
);
dbms_output.put_line(
rpad('-',6,'-') ||
lpad('-------------',16) ||
lpad('-------------',16) ||
lpad('------------',16) ||
lpad('----------',16) ||
lpad('----------',16) ||
lpad('---------',16)
);
for r in c1 loop
if (not w_list.exists(r.sid)) then
w_list(r.sid).wait_time := 0;
w_list(r.sid).idle_time := 0;
w_list(r.sid).cpu_time := 0;
end if;
if (
(w_list(r.sid).wait_time != r.wait_time)
or (w_list(r.sid).idle_time != r.idle_time)
or (w_list(r.sid).cpu_time != r.cpu_time)
) then
dbms_output.put(rpad(r.sid,6));
dbms_output.put(to_char(
r.cpu_time - w_list(r.sid).cpu_time,
'999,999,999.990'));
dbms_output.put(to_char(
r.wait_time - w_list(r.sid).wait_time,
'999,999,999.990'));
dbms_output.put(to_char(
r.idle_time - w_list(r.sid).idle_time,
'999,999,999.990'));
dbms_output.put(to_char(
(r.wait_time - w_list(r.sid).wait_time) +
(r.idle_time - w_list(r.sid).idle_time) +
(r.cpu_time - w_list(r.sid).cpu_time),
'999,999,999.990'));
dbms_output.put(to_char(
100 *
(r.cpu_time - w_list(r.sid).cpu_time) /
(
(r.wait_time - w_list(r.sid).wait_time) +
(r.idle_time - w_list(r.sid).idle_time) +
(r.cpu_time - w_list(r.sid).cpu_time)
),
'999,999,999.990'));
dbms_output.put(to_char(
100 *
(r.wait_time - w_list(r.sid).wait_time) /
(
(r.wait_time - w_list(r.sid).wait_time) +
(r.idle_time - w_list(r.sid).idle_time) +
(r.cpu_time - w_list(r.sid).cpu_time)
),
'999,999,999.990'));
dbms_output.new_line;
end if;
end loop;
end end_snap;
begin
select
startup_time, 'S'
into
m_start_time, m_start_flag
from
v$instance;
end snap_trouble;
/
drop public synonym snap_trouble;
create public synonym snap_trouble for snap_trouble;
grant execute on snap_trouble to public;
The following is a (manufactured and cosmetically adjusted) example of the output of the code in its current form:
---------------------------------
CPU vs. wait:- 02-Sep 14:23:58
Interval:- 45 seconds
---------------------------------
SID CPU time (s) Wait time (s) Idle time (s) Total time PCT CPU PCT waits
--- ------------ ------------- ------------ ---------- -------- ---------
17 26.581 6.272 12.279 45.132 58.896 13.897
19 26.713 4.956 13.231 44.900 59.494 11.038
20 28.218 5.266 11.654 45.138 62.515 11.666




