4.2.2. 诊断步骤
使用以下脚本查看 CPU 时间与管理程序影响。
[root@ibm-x3550m4-9 ~]# cat generate-tx-drops.sh
#!/bin/bash
trap 'cleanup' INT
cleanup() {
echo "Cleanup ..."
if [ "x$HPING_PID" != "x" ]; then
echo "Killing hping3 with PID $HPING_PID"
kill $HPING_PID
fi
if [ "x$DD_PID" != "x" ]; then
echo "Killing dd with PID $DD_PID"
kill $DD_PID
fi
exit 0
}
VM_IP=10.0.0.20
VM_TAP=tapc18eb09e-01
VM_INSTANCE_ID=instance-00000012
LAST_CPU=$( lscpu | awk '/^CPU\(s\):/ { print $NF - 1 }' )
this is a 12 core system, we are sending everything to CPU 11,
so the taskset mask is 800 so set dd affinity only for last CPU
TASKSET_MASK=800
pinning vCPU to last pCPU
echo "virsh vcpupin $VM_INSTANCE_ID 0 $LAST_CPU"
virsh vcpupin $VM_INSTANCE_ID 0 $LAST_CPU
make sure that: nova secgroup-add-rule default udp 1 65535 0.0.0.0/0
make sure that: nova secgroup-add-rule default tcp 1 65535 0.0.0.0/0
make sure that: nova secgroup-add-rule default icmp -1 -1 0.0.0.0/0
# --fast, --faster or --flood can also be used
echo "hping3 -u -p 5000 $VM_IP --faster > /dev/null "
hping3 -u -p 5000 $VM_IP --faster > /dev/null &
HPING_PID=$!
echo "hping is running, but dd not yet:"
for i in { 1 .. 3 }; do
date
echo "ip -s -s link ls dev $VM_TAP"
ip -s -s link ls dev $VM_TAP
sleep 5
done
echo "Starting dd and pinning it to the same pCPU as the instance"
echo "dd if=/dev/zero of=/dev/null"
dd if=/dev/zero of=/dev/null &
DD_PID=$!
echo "taskset -p $TASKSET_MASK $DD_PID"
taskset -p $TASKSET_MASK $DD_PID
for i in { 1 .. 3 }; do
date
echo "ip -s -s link ls dev $VM_TAP"
ip -s -s link ls dev $VM_TAP
sleep 5
done
cleanup
[root@ibm-x3550m4-9 ~]# cat generate-tx-drops.sh
#!/bin/bash
trap 'cleanup' INT
cleanup() {
echo "Cleanup ..."
if [ "x$HPING_PID" != "x" ]; then
echo "Killing hping3 with PID $HPING_PID"
kill $HPING_PID
fi
if [ "x$DD_PID" != "x" ]; then
echo "Killing dd with PID $DD_PID"
kill $DD_PID
fi
exit 0
}
VM_IP=10.0.0.20
VM_TAP=tapc18eb09e-01
VM_INSTANCE_ID=instance-00000012
LAST_CPU=$( lscpu | awk '/^CPU\(s\):/ { print $NF - 1 }' )
# this is a 12 core system, we are sending everything to CPU 11,
# so the taskset mask is 800 so set dd affinity only for last CPU
TASKSET_MASK=800
# pinning vCPU to last pCPU
echo "virsh vcpupin $VM_INSTANCE_ID 0 $LAST_CPU"
virsh vcpupin $VM_INSTANCE_ID 0 $LAST_CPU
# make sure that: nova secgroup-add-rule default udp 1 65535 0.0.0.0/0
# make sure that: nova secgroup-add-rule default tcp 1 65535 0.0.0.0/0
# make sure that: nova secgroup-add-rule default icmp -1 -1 0.0.0.0/0
# --fast, --faster or --flood can also be used
echo "hping3 -u -p 5000 $VM_IP --faster > /dev/null "
hping3 -u -p 5000 $VM_IP --faster > /dev/null &
HPING_PID=$!
echo "hping is running, but dd not yet:"
for i in { 1 .. 3 }; do
date
echo "ip -s -s link ls dev $VM_TAP"
ip -s -s link ls dev $VM_TAP
sleep 5
done
echo "Starting dd and pinning it to the same pCPU as the instance"
echo "dd if=/dev/zero of=/dev/null"
dd if=/dev/zero of=/dev/null &
DD_PID=$!
echo "taskset -p $TASKSET_MASK $DD_PID"
taskset -p $TASKSET_MASK $DD_PID
for i in { 1 .. 3 }; do
date
echo "ip -s -s link ls dev $VM_TAP"
ip -s -s link ls dev $VM_TAP
sleep 5
done
cleanup
登录实例,再启动 dd if=/dev/zero of=/dev/null,以在它只在 vCPU 上生成额外负载。请注意,这是用于演示目的。您可以在虚拟机内重复相同的测试,且无需加载。仅在虚拟机监控程序上的另一个进程从实例的 vCPU 传输时间时进行 TX 丢弃。
以下示例显示了测试前的一个实例:
%Cpu(s): 22.3 us, 77.7 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st KiB Mem : 1884108 total, 1445636 free, 90536 used, 347936 buff/cache KiB Swap: 0 total, 0 free, 0 used. 1618720 avail Mem PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 30172 root 20 0 107936 620 528 R 99.9 0.0 0:05.89 dd
%Cpu(s): 22.3 us, 77.7 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
KiB Mem : 1884108 total, 1445636 free, 90536 used, 347936 buff/cache
KiB Swap: 0 total, 0 free, 0 used. 1618720 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
30172 root 20 0 107936 620 528 R 99.9 0.0 0:05.89 dd
运行以下脚本,并观察 TX 队列中丢弃的软件包。仅当 dd 进程从实例的 CPU 占用大量处理时间时,才会发生这些情况。
./generate-tx-drops.sh
[root@ibm-x3550m4-9 ~]# ./generate-tx-drops.sh
virsh vcpupin instance-00000012 0 11
hping3 -u -p 5000 10.0.0.20 --faster > /dev/null
hping is running, but dd not yet:
Tue Nov 29 12:28:22 EST 2016
ip -s -s link ls dev tapc18eb09e-01
69: tapc18eb09e-01: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master qbrc18eb09e-01 state UNKNOWN mode DEFAULT qlen 1000
link/ether fe:16:3e:a5:17:c0 brd ff:ff:ff:ff:ff:ff
RX: bytes packets errors dropped overrun mcast
5500034259301 132047795 0 0 0 0
RX errors: length crc frame fifo missed
0 0 0 0 0
TX: bytes packets errors dropped carrier collsns
5481296464 81741449 0 11155280 0 0
TX errors: aborted fifo window heartbeat transns
0 0 0 0 0
Tue Nov 29 12:28:27 EST 2016
ip -s -s link ls dev tapc18eb09e-01
69: tapc18eb09e-01: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master qbrc18eb09e-01 state UNKNOWN mode DEFAULT qlen 1000
link/ether fe:16:3e:a5:17:c0 brd ff:ff:ff:ff:ff:ff
RX: bytes packets errors dropped overrun mcast
5500055729011 132445382 0 0 0 0
RX errors: length crc frame fifo missed
0 0 0 0 0
TX: bytes packets errors dropped carrier collsns
5502766282 82139038 0 11155280 0 0
TX errors: aborted fifo window heartbeat transns
0 0 0 0 0
Tue Nov 29 12:28:32 EST 2016
ip -s -s link ls dev tapc18eb09e-01
69: tapc18eb09e-01: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master qbrc18eb09e-01 state UNKNOWN mode DEFAULT qlen 1000
link/ether fe:16:3e:a5:17:c0 brd ff:ff:ff:ff:ff:ff
RX: bytes packets errors dropped overrun mcast
5500077122125 132841551 0 0 0 0
RX errors: length crc frame fifo missed
0 0 0 0 0
TX: bytes packets errors dropped carrier collsns
5524159396 82535207 0 11155280 0 0
TX errors: aborted fifo window heartbeat transns
0 0 0 0 0
Tue Nov 29 12:28:37 EST 2016
ip -s -s link ls dev tapc18eb09e-01
69: tapc18eb09e-01: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master qbrc18eb09e-01 state UNKNOWN mode DEFAULT qlen 1000
link/ether fe:16:3e:a5:17:c0 brd ff:ff:ff:ff:ff:ff
RX: bytes packets errors dropped overrun mcast
5500098181033 133231531 0 0 0 0
RX errors: length crc frame fifo missed
0 0 0 0 0
TX: bytes packets errors dropped carrier collsns
5545218358 82925188 0 11155280 0 0
TX errors: aborted fifo window heartbeat transns
0 0 0 0 0
Tue Nov 29 12:28:42 EST 2016
ip -s -s link ls dev tapc18eb09e-01
69: tapc18eb09e-01: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master qbrc18eb09e-01 state UNKNOWN mode DEFAULT qlen 1000
link/ether fe:16:3e:a5:17:c0 brd ff:ff:ff:ff:ff:ff
RX: bytes packets errors dropped overrun mcast
5500119152685 133619793 0 0 0 0
RX errors: length crc frame fifo missed
0 0 0 0 0
TX: bytes packets errors dropped carrier collsns
5566184804 83313451 0 11155280 0 0
TX errors: aborted fifo window heartbeat transns
0 0 0 0 0
Starting dd and pinning it to the same pCPU as the instance
dd if=/dev/zero of=/dev/null
taskset -p 800 8763
pid 8763's current affinity mask: fff
pid 8763's new affinity mask: 800
Tue Nov 29 12:28:47 EST 2016
ip -s -s link ls dev tapc18eb09e-01
69: tapc18eb09e-01: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master qbrc18eb09e-01 state UNKNOWN mode DEFAULT qlen 1000
link/ether fe:16:3e:a5:17:c0 brd ff:ff:ff:ff:ff:ff
RX: bytes packets errors dropped overrun mcast
5500140267091 134010698 0 0 0 0
RX errors: length crc frame fifo missed
0 0 0 0 0
TX: bytes packets errors dropped carrier collsns
5587300452 83704477 0 11155280 0 0
TX errors: aborted fifo window heartbeat transns
0 0 0 0 0
Tue Nov 29 12:28:52 EST 2016
ip -s -s link ls dev tapc18eb09e-01
69: tapc18eb09e-01: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master qbrc18eb09e-01 state UNKNOWN mode DEFAULT qlen 1000
link/ether fe:16:3e:a5:17:c0 brd ff:ff:ff:ff:ff:ff
RX: bytes packets errors dropped overrun mcast
5500159822749 134372711 0 0 0 0
RX errors: length crc frame fifo missed
0 0 0 0 0
TX: bytes packets errors dropped carrier collsns
5606853168 84066563 0 11188074 0 0
TX errors: aborted fifo window heartbeat transns
0 0 0 0 0
Tue Nov 29 12:28:57 EST 2016
ip -s -s link ls dev tapc18eb09e-01
69: tapc18eb09e-01: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master qbrc18eb09e-01 state UNKNOWN mode DEFAULT qlen 1000
link/ether fe:16:3e:a5:17:c0 brd ff:ff:ff:ff:ff:ff
RX: bytes packets errors dropped overrun mcast
5500179161241 134730729 0 0 0 0
RX errors: length crc frame fifo missed
0 0 0 0 0
TX: bytes packets errors dropped carrier collsns
5626179144 84424451 0 11223096 0 0
TX errors: aborted fifo window heartbeat transns
0 0 0 0 0
Tue Nov 29 12:29:02 EST 2016
ip -s -s link ls dev tapc18eb09e-01
69: tapc18eb09e-01: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master qbrc18eb09e-01 state UNKNOWN mode DEFAULT qlen 1000
link/ether fe:16:3e:a5:17:c0 brd ff:ff:ff:ff:ff:ff
RX: bytes packets errors dropped overrun mcast
5500198344463 135085948 0 0 0 0
RX errors: length crc frame fifo missed
0 0 0 0 0
TX: bytes packets errors dropped carrier collsns
5645365410 84779752 0 11260740 0 0
TX errors: aborted fifo window heartbeat transns
0 0 0 0 0
Tue Nov 29 12:29:07 EST 2016
ip -s -s link ls dev tapc18eb09e-01
69: tapc18eb09e-01: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master qbrc18eb09e-01 state UNKNOWN mode DEFAULT qlen 1000
link/ether fe:16:3e:a5:17:c0 brd ff:ff:ff:ff:ff:ff
RX: bytes packets errors dropped overrun mcast
5500217014275 135431570 0 0 0 0
RX errors: length crc frame fifo missed
0 0 0 0 0
TX: bytes packets errors dropped carrier collsns
5664031398 85125418 0 11302179 0 0
TX errors: aborted fifo window heartbeat transns
0 0 0 0 0
Cleanup ...
Killing hping3 with PID 8722
Killing dd with PID 8763
[root@ibm-x3550m4-9 ~]#
--- 10.0.0.20 hping statistic ---
3919615 packets transmitted, 0 packets received, 100% packet loss
round-trip min/avg/max = 0.0/0.0/0.0 ms
以下示例显示了测试过程中 dd 对虚拟机监控程序的影响。st 标签标识虚拟机监控程序中被攻击的时间百分比。
%Cpu(s): 7.0 us, 27.5 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 20.2 si, 45.4 st KiB Mem : 1884108 total, 1445484 free, 90676 used, 347948 buff/cache KiB Swap: 0 total, 0 free, 0 used. 1618568 avail Mem PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 30172 root 20 0 107936 620 528 R 54.3 0.0 1:00.50 dd
%Cpu(s): 7.0 us, 27.5 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 20.2 si, 45.4 st
KiB Mem : 1884108 total, 1445484 free, 90676 used, 347948 buff/cache
KiB Swap: 0 total, 0 free, 0 used. 1618568 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
30172 root 20 0 107936 620 528 R 54.3 0.0 1:00.50 dd
请注意,ssh 可在实例上进行第二一半测试时变得缓慢,包括如果测试用时过长,可能会超时。