Commit ad9b58b3 by 刘航

【ADD】增加diagnose_tx1,诊断脚本,目前只有诊断MediaServer是否正常工作的功能

1 parent 7e816d7c
...@@ -46,3 +46,6 @@ echo "05 2 * * * root elasticsearch_clean.py" >> /etc/crontab ...@@ -46,3 +46,6 @@ echo "05 2 * * * root elasticsearch_clean.py" >> /etc/crontab
/etc/init.d/cron reload /etc/init.d/cron reload
``` ```
## diagnose
诊断脚本,详细说明见diagnose_tx1目录中的README文档
# diagnose诊断脚本
## 检测MediaServer是否正常工作。
部署说明:
- 部署在运行MediaServer的主节点上
- 路径:/root/UserApp/diagnose_tx1
- 添加守护:将diagnose.conf拷贝到/etc/supervisor/conf.d 然后执行supervisorctl reload
日志:
- 日志路径/root/Log/diagnose.log,每隔1秒输出一次当前状态。
工作步骤简介:
- 默认900秒执行一次检测动作,可修改diagnose.py check_mediaserver_period_seconds变量进行改变
- 启动rtsp_decode_encode_test_app(rtsp1),利用当前目录的jiaotong4.264搭建rtspserver
- 启动另一个rtsp_decode_encode_test_app(rtsp2)作为rtspclient,点播经本机MediaServer转发的rtsp1的视频。
- 分析rtsp2的输出判断是否点播成功,如果**连续3次**均点播失败(此状态下间隔120秒一次),重启MediaServer服务
import shlex
import subprocess
import re
import time
import os
import threading
import commands
#config
check_mediaserver_period_seconds = 900
if_failed_next_check_period_seconds = 120
mediaserver_last_status = 1
def check_mediaserver():
res = False
global mediaserver_last_status
rtspserver_cmd_str = './rtsp_decode_encode_test_app --url0=./jiaotong4.264 --serverport=18554'
rtspserver_cmd = shlex.split(rtspserver_cmd_str)
play_cmd_str = 'python try_play.py'
p1 = subprocess.Popen(rtspserver_cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
time.sleep(5)
(status, output) = commands.getstatusoutput(play_cmd_str)
if(status == 0):
res = True
else:
res = False
os.system('kill -9 `ps aux|grep rtsp_decode_encode_test_app| grep -v grep | awk \'{print $2}\'`')
p1.wait()
# os.system('kill -9 {}'.format(p1.pid))
return res
if __name__ == '__main__':
last_check_media = time.time();
try_count = 0;
checkperiod = check_mediaserver_period_seconds
while True:
now = time.time();
if(now-last_check_media> checkperiod):
last_check_media = now;
if(check_mediaserver()):
print('mediaserver is working');
checkperiod = check_mediaserver_period_seconds
try_count =0
mediaserver_last_status = 1
else:
try_count=try_count+1
checkperiod = if_failed_next_check_period_seconds
if(try_count >= 3):
print('maybe mediaserver has some problem, restart!')
os.system('kill -9 `ps aux|grep MediaServer| grep -v grep | awk \'{print $2}\'`')
checkperiod = check_mediaserver_period_seconds
try_count =0
mediaserver_last_status = 0
else:
print('dianose working tick, now-last_check_media:{}, try_count:{}, mediaserver_last_status:{}'.format(now-last_check_media, try_count, mediaserver_last_status))
time.sleep(1)
This file is too large to display.
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
import shlex
import subprocess
import re
import time
import os
import threading
#config
keywords = [r'rtsp client working']
shell_cmd = './rtsp_decode_encode_test_app --url0=rtsp://127.0.0.1:8555/rtsp://127.0.0.1:18554/0 --serverport=18654'
starttm = time.time()
has_keyword = False
cmd = shlex.split(shell_cmd)
p = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
f = open('./current.log', 'w')
def tryplay():
global p
global has_keyword
while p.poll() is None:
line = p.stdout.readline()
f.write(line)
line = line.strip()
if line:
for keyword in keywords:
m = re.search(keyword, line)
if(m != None):
has_keyword = True
print('play ok')
return True
return True
if __name__ == '__main__':
t = threading.Thread(target=tryplay)
t.start()
exitcode = -2
while exitcode == -2 :
print('has_keyword:{}'.format(has_keyword))
if has_keyword == True:
print('kill -9 {}'.format(p.pid))
os.system('kill -9 {}'.format(p.pid))
exitcode = 0;
break
now = time.time()
if(now- starttm > 10):
if(has_keyword == False):
print('play failed')
print('kill -9 {}'.format(p.pid))
os.system('kill -9 {}'.format(p.pid))
exitcode = -1
break
else:
print('time.sleep(1)')
time.sleep(1)
print('breaked')
t.join(timeout = 10)
print('joined')
f.close()
print('fclosed')
exit(exitcode)
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
watchmen在繁星的应用与开发 watchmen在繁星的应用与开发
********************************** **********************************
20181128整理需求以及实现 20181128整理需求以及设计
=============================== ===============================
原始需求以及应对方案 原始需求以及应对方案
...@@ -223,3 +223,10 @@ OperateServer ...@@ -223,3 +223,10 @@ OperateServer
^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^
- 长连接状态 - 长连接状态
20181128整理需求实现
===============================
tcp长连接状态
--------------------
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!