1 目的
跟踪IO请求,分析磁盘读写性能瓶颈
2 工具
2.1 磁盘 geometry
# hdparm -g /dev/sdl
/dev/sdl:
geometry = 1191062/255/63, sectors = 19134414848, start = 0
2.2 读写请求
打开裸块设备
./block_syscall -device /dev/sdl
从offsett 0 开始读取64MB (offect sector 0; size sector 131072)
r 0 131072
// block_syscall.go
// go build block_syscall.go
package main
import (
"bufio"
"flag"
"fmt"
"log"
"os"
"strconv"
"strings"
"syscall"
"time"
)
var (
flagDevice = flag.String("device", "", "block device")
)
func init() {
log.SetFlags(log.Lshortfile | log.LstdFlags)
flag.Parse()
}
func main() {
fd, err := syscall.Open(*flagDevice, syscall.O_DIRECT|os.O_RDWR, 0666)
if err != nil {
log.Println(err)
return
}
inputReader := bufio.NewReader(os.Stdin)
for {
fmt.Printf(">")
input, err := inputReader.ReadString('\n')
if err != nil {
log.Println(err)
break
}
input = strings.Trim(input, "\n")
param := strings.Split(input, " ")
if len(param) < 3 {
log.Printf("err param count:%s", input)
continue
}
if param[0] != "r" && param[0] != "w" {
log.Println("not support cmd")
continue
}
offectSector, err := strconv.Atoi(param[1])
if err != nil {
log.Println(err)
continue
}
sizeSector, err := strconv.Atoi(param[2])
if err != nil {
log.Println(err)
continue
}
opStart := time.Now()
_, err = syscall.Seek(fd, int64(offectSector*512), 0)
if err != nil {
log.Println(err)
continue
}
buf := make([]byte, sizeSector*512)
switch param[0] {
case "r":
n, err := syscall.Read(fd, buf)
opEnd := time.Now()
if err != nil {
log.Println(err)
break
}
log.Printf("complete:%s %.2fms", greek(int64(n), 2), opEnd.Sub(opStart).Seconds()*1000)
case "w":
n, err := syscall.Write(fd, buf)
opEnd := time.Now()
if err != nil {
log.Println(err)
break
}
log.Printf("complete:%s %.2fms", greek(int64(n), 2), opEnd.Sub(opStart).Seconds()*1000)
}
}
}
const (
B = int64(1)
KB = 1024 * B
MB = 1024 * KB
GB = 1024 * MB
TB = 1024 * GB
PB = 1024 * TB
)
var unit = []struct {
Cap int64
Name string
}{{PB, "P"}, {TB, "T"}, {GB, "G"}, {MB, "M"}, {KB, "K"}, {B, "B"}}
func greek(v int64, precision int) string {
for _, s := range unit {
if v >= s.Cap {
if precision < 0 {
return fmt.Sprintf("%f%s", float64(v)/float64(s.Cap), s.Name)
} else {
format := fmt.Sprintf("%%.%df%%s", precision)
return fmt.Sprintf(format, float64(v)/float64(s.Cap), s.Name)
}
}
}
return ""
}
2.3 抓取IO协议栈日志
btrace /dev/sdl > cap.log
2.4 分析日志
对于磁盘驱动器来说是每1024个sector一个请求,64MB/512KB=128 所以生成了128个请求,与iostat看到的r/s一致
grep Q sdl.log |wc -l
>128
iostat -dx /dev/sdl 1 | grep sdl
Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util
sdl 0.00 0.00 128.00 0.00 65536.00 0.00 1024.00 20.81 162.60 162.60 0.00 2.28 29.20
128个IO请求用了24ms已经全部到了磁盘控制器
grep D cap.log | sort -n -k 4 | awk '{print $4*1000}'
磁盘控制器的处理耗时,发现每隔8*512KB=4MB,会有10+ms的间隔,怀疑是磁头在做seek操作
grep -E "D|C" cap.log | sort -rk 6 |awk '{if($6=="D") s[$8"+"$10]=$4 e;else s[$8"+"$10]=($4-s[$8"+"$10])*1000}END{for(j in s)print j,s[j]}' | sort -t '+' -n | sort -nk 2
,