脚本内容
#!/usr/local/bin/stap
#
# trace_logicalio_wait_events_physicalio_11204.stp
#
# This is a SystemTap probe to trace logical I/O wait events and physical I/O
# the script works by hooking into the OS syscalls and to Oracle kernel functions
#
# Dependencies (for Oracle userspace tracing):
# Use systemtap 2.5 or higher
# Kernel must have support for uprobes or utrace (for example RHEL/OL 7.x and 6.x)
# The oracle executable should be in the path: add $ORACLE_HOME/bin in $PATH
# Needs kernel debuginfo
#
# Usage: stap -v trace_logicalio_wait_events_physicalio_11204.stp -x <pid>
#
# This script version is for Oracle 11.2.0.4, support for other Oracle versions also available
# see also trace_logicalio_wait_events_physicalio_12102.stp
#
# Version 1.0, Oct 2014 by Luca.Canali@cern.ch
# Additional credits for original contributions: @FritsHoogland
# Logical I/O probes kcbgtcr and kcbgcur, from original work by Alexander Anokhin and @TanelPoder
#
# Note: This is experimental code, use at your own risk
# The overhead of this code can be high, use rather on test systems
#
####################
# Trace Logical IO #
####################
probe process("oracle").function("kcbgtcr") { # consistent reads
pointer_lio=register("rdi")
printf("DB LOGICAL IO Consistent Read (kcbgtcr) for block: tbs#=%ld, rfile#=%d, block#=%d, bigfile_block#=%d, obj#=%ld\n",
user_int32(pointer_lio), user_int32(pointer_lio+4) >> 22 & 0x003FFFFF, user_int32(pointer_lio+4) & 0x003FFFFF,
user_int32(pointer_lio+4), user_int32(pointer_lio+8))
}
probe process("oracle").function("kcbgcur") { # current reads
pointer_lio=register("rdi")
printf("DB LOGICAL IO Current Read (kcbgcur) for block: tbs#=%ld, rfile#=%d, block#=%d, bigfile_block#=%d, obj#=%ld\n",
user_int32(pointer_lio), user_int32(pointer_lio+4) >> 22 & 0x003FFFFF, user_int32(pointer_lio+4) & 0x003FFFFF,
user_int32(pointer_lio+4), user_int32(pointer_lio+8))
}
probe process("oracle").function("kcbzib") { # initiates physical reads into buffer cache
printf(" ->kcbzib, Oracle logical read operations require physical reads into the buffer cache\n")
}
probe process("oracle").function("kcbzgb") { # prepares physical read into buffer cache
printf(" -> kcbzgb, Oracle has allocated buffer cache space for block: tbs#=%ld, rfile#=%d, block#=%d, bigfile_block#=%d, obj#=%ld\n",
register("rsi"), register("rdx") >> 22 & 0x003FFFFF, register("rdx") & 0x003FFFFF, register("rdx"), register("r8"))
}
probe process("oracle").function("kcbzvb") { # data blocks who have undergone an I/O operation
printf(" ->kcbzvb, Oracle has performed I/O on: file#=%ld, block#=%d, rfile#=%d, bigfile_block#=%d\n",
register("rsi"), register("rdx") & 0x003FFFFF, register("rdx") >> 22 & 0x003FFFFF, register("rdx"))
}
#############################
# Trace wait events 11.2.0.4#
#############################
probe process("oracle").function("kskthbwt") {
xksuse = register("r13")-7912
ksusenum = user_uint16(xksuse + 5920)
printf("==========\nDB WAIT EVENT BEGIN: timestamp_ora=%ld, pid=%d, sid=%d, event#=%u\n",
register("rsi"), pid(), ksusenum, register("rdx"))
}
probe process("oracle").function("kskthewt") {
xksuse = register("r13")-7912
ksuudnam = user_string(xksuse + 132)
ksusenum = user_uint16(xksuse + 5920)
ksuseopc = user_uint16(xksuse + 5826)
ksusep1 = user_uint64(xksuse + 5832)
ksusep2 = user_uint64(xksuse + 5840)
ksusep3 = user_uint64(xksuse + 5848)
ksusetim = user_uint32(xksuse + 5856)
ksusesqh = user_uint32(xksuse + 6084)
ksuseobj = user_uint32(xksuse + 6464)
printf("DB WAIT EVENT END: timestamp_ora=%ld, pid=%d, sid=%d, name=%s, event#=%u, p1=%lu, p2=%lu, p3=%lu, wait_time=%u, obj=%d, sql_hash=%u\n==========\n",
register("rdi"), pid(), ksusenum, ksuudnam, ksuseopc, ksusep1, ksusep2, ksusep3, ksusetim, ksuseobj, ksusesqh)
}
###########################
# Trace Physical IO - ASM #
###########################
probe syscall.pread, syscall.pwrite {
if (pid() == target()) {
printf ("OS: ->%s: timestamp=%d, program=%s, pid=%d, fd=%d, offset=%d, count(bytes)=%d\n", name, local_clock_us(), execname(), pid(), fd, offset, count)
}
}
probe syscall.pread.return, syscall.pwrite.return {
if (pid() == target()) {
printf ("OS: <-%s: timestamp=%d, program=%s, pid=%d, return(bytes)=%d\n", name, local_clock_us(), execname(), pid(), $return)
}
}
# some ugly tricks added here for compatibility, in particular there seem to be problems in RHEL/EL 7.x debuginfo related to
# $iocbpp which is incorrectly reported as long int in those system instead of struct iocb**, also we use a trick to resolve the array
# the use of a probe on kernel.function instead of syscall.io_submit also needed for some platforms (RHEL 6.7)
# See below for a more basic probe that works on RHEL and RHEL6.6
probe kernel.function("sys_io_submit") {
if (pid() == target()) {
printf ("OS: ->%s: timestamp=%d, program=%s, pid=%d, nr(num I/O)=%d\n", "io_submit", local_clock_us(), execname(), pid(), $nr)
for (i=0; i<$nr; i++) {
printf(" %d: file descriptor=%d, offset=%d, bytes=%d, opcode=%d\n", i+1, @cast(user_int64($iocbpp+8*i), "struct iocb")->aio_fildes,
@cast(user_int64($iocbpp+8*i), "struct iocb")->aio_offset, @cast(user_int64($iocbpp+8*i), "struct iocb")->aio_nbytes,
@cast(user_int64($iocbpp+8*i), "struct iocb")->aio_lio_opcode)
}
}
}
# For reference this is the original probe on io_submit without the compatibility tricks used above
# probe syscall.io_submit {
# if (pid() == target()) {
# printf ("OS: ->%s: timestamp=%d, program=%s, pid=%d, nr(num I/O)=%d\n", name, local_clock_us(), execname(), pid(), nr)
# for (i=0; i<$nr; i++) {
# printf(" %d: file descriptor=%d, offset=%d, bytes=%d, opcode=%d\n", i+1, $iocbpp[i]->aio_fildes,
# $iocbpp[i]->aio_offset, $iocbpp[i]->aio_nbytes, $iocbpp[i]->aio_lio_opcode)
# }
# }
# }
probe syscall.io_submit.return {
if (pid() == target()) {
printf ("OS: <-%s: timestamp=%d, program=%s, pid=%d, return(num I/O)=%ld\n", name, local_clock_us(), execname(), pid(), $return)
}
}
probe syscall.io_getevents {
if (pid() == target()) {
printf ("OS: ->%s: timestamp=%d, program=%s, pid=%d, min_nr=%d, timeout=%s\n", name, local_clock_us(), execname(), pid(), min_nr, $timeout$)
}
}
# need to explicitly cast $events for RHEL/EL 7.x kernels, where debuginfo issues report $events as long int instead of struct io_event*
probe syscall.io_getevents.return {
if (pid() == target()) {
printf ("OS: <-%s: timestamp=%d, program=%s, pid=%d, return(num I/O)=%ld\n", name, local_clock_us(), execname(), pid(), $return)
for (i=0; i<$return; i++) { # cycle over the reaped I/Os
obj_addr = @cast($events, "struct io_event")[i]->obj # details of struct iocb in /usr/include/libaio.h
fildes = user_uint32(obj_addr +20)
bytes = user_uint64(obj_addr +32)
offset = user_uint64(obj_addr +40)
printf(" %d:, fildes=%d, offset=%lu, bytes=%lu\n", i+1, fildes, offset, bytes)
}
}
}
global active_bio[10000]
probe ioblock.request {
if (pid() == target()) {
printf("OS: ->%s, timestamp=%d, pid=%d, devname=%s, sector=%d, size=%d, rw=%d, address_bio=%lu\n",
name, local_clock_us(), pid(), devname, sector, size, rw, $bio )
active_bio[$bio] += 1
}
}
probe ioblock_trace.request {
if (pid() == target()) {
printf("OS: ->%s, timestamp=%d, pid=%d, devname=%s, sector=%d, size=%d, rw=%d, address_bio=%lu\n",
name, local_clock_us(), pid(), devname, sector, size, rw, $bio )
}
}
# the use of active_bio[] is a workaround as pid is not populated in ioblock.end and therefore cannot be used for filtering
probe ioblock.end {
if (active_bio[$bio] >= 1) {
printf("OS: <-%s, timestamp=%d, pid=%d, devname=%s, sector=%d, rw=%d, address_bio=%lu\n",
name, local_clock_us(), pid(), devname, sector, rw, $bio)
active_bio[$bio] -= 1
}
}
评论
贡献排行榜