优化pm2的检测机制

This commit is contained in:
jingrow 2025-08-08 07:21:05 +08:00
parent a0fd4e5bc4
commit 6168a488fa

View File

@ -783,59 +783,36 @@ start_project_with_pm2() {
pm2 list | grep -w '$SITE_NAME' | grep -c 'online' || echo '0' pm2 list | grep -w '$SITE_NAME' | grep -c 'online' || echo '0'
") ")
if [ "$PM2_STATUS" != "0" ] && [ -n "$PM2_STATUS" ] && [ "$PM2_RUNNING" != "0" ]; then # 如果项目存在,先删除它(不管状态如何)
if [ "$FORCE_UPDATE" = true ]; then if [ "$PM2_STATUS" != "0" ] && [ -n "$PM2_STATUS" ]; then
log_warning "项目已在PM2中运行重新启动..." log_warning "项目已存在于PM2中删除旧进程..."
su - jingrow -c "
export NVM_DIR=\"\$HOME/.nvm\"
[ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\"
cd /home/jingrow/jsite/$SITE_NAME
pm2 delete $SITE_NAME 2>/dev/null || true
pm2 start ecosystem.config.cjs
"
if [ $? -ne 0 ]; then
log_error "PM2启动失败请检查项目配置和依赖"
log_info "检查PM2日志"
su - jingrow -c "
export NVM_DIR=\"\$HOME/.nvm\"
[ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\"
pm2 logs $SITE_NAME --lines 10
"
return 1
fi
else
log_warning "项目已在PM2中运行跳过启动"
log_success "现有PM2进程运行正常"
return 0
fi
else
# 如果项目存在但不在运行状态,先删除再重新启动
if [ "$PM2_STATUS" != "0" ] && [ -n "$PM2_STATUS" ]; then
log_warning "项目存在于PM2中但未正常运行重新启动..."
su - jingrow -c "
export NVM_DIR=\"\$HOME/.nvm\"
[ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\"
pm2 delete $SITE_NAME 2>/dev/null || true
"
fi
log_info "启动新项目..."
su - jingrow -c " su - jingrow -c "
export NVM_DIR=\"\$HOME/.nvm\" export NVM_DIR=\"\$HOME/.nvm\"
[ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\" [ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\"
cd /home/jingrow/jsite/$SITE_NAME pm2 delete $SITE_NAME 2>/dev/null || true
pm2 start ecosystem.config.cjs
" "
if [ $? -ne 0 ]; then # 等待一下确保删除完成
log_error "PM2启动失败请检查项目配置和依赖" sleep 2
log_info "检查PM2日志" fi
su - jingrow -c "
export NVM_DIR=\"\$HOME/.nvm\" # 启动新项目
[ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\" log_info "启动项目: $SITE_NAME..."
pm2 logs $SITE_NAME --lines 10 su - jingrow -c "
" export NVM_DIR=\"\$HOME/.nvm\"
return 1 [ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\"
fi cd /home/jingrow/jsite/$SITE_NAME
pm2 start ecosystem.config.cjs
"
if [ $? -ne 0 ]; then
log_error "PM2启动失败请检查项目配置和依赖"
log_info "检查PM2日志"
su - jingrow -c "
export NVM_DIR=\"\$HOME/.nvm\"
[ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\"
pm2 logs $SITE_NAME --lines 10
"
return 1
fi fi
# 等待PM2启动完成最多等待60秒 # 等待PM2启动完成最多等待60秒
@ -843,11 +820,14 @@ start_project_with_pm2() {
local wait_time=0 local wait_time=0
local max_wait=60 local max_wait=60
local pm2_running="0" local pm2_running="0"
local max_retries=3
local retry_count=0
while [ $wait_time -lt $max_wait ] && [ "$pm2_running" = "0" ]; do while [ $wait_time -lt $max_wait ] && [ "$pm2_running" = "0" ]; do
sleep 2 sleep 2
wait_time=$((wait_time + 2)) wait_time=$((wait_time + 2))
# 检查PM2进程状态
pm2_running=$(su - jingrow -c " pm2_running=$(su - jingrow -c "
export NVM_DIR=\"\$HOME/.nvm\" export NVM_DIR=\"\$HOME/.nvm\"
[ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\" [ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\"
@ -856,6 +836,35 @@ start_project_with_pm2() {
if [ "$pm2_running" = "0" ]; then if [ "$pm2_running" = "0" ]; then
log_info "等待PM2启动... (${wait_time}s/${max_wait}s)" log_info "等待PM2启动... (${wait_time}s/${max_wait}s)"
# 如果等待时间超过30秒检查是否有错误
if [ $wait_time -gt 30 ] && [ $retry_count -lt $max_retries ]; then
log_warning "启动时间较长检查PM2状态和日志..."
# 检查PM2状态
local pm2_status=$(su - jingrow -c "
export NVM_DIR=\"\$HOME/.nvm\"
[ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\"
pm2 list | grep -w '$SITE_NAME' || echo 'not_found'
")
if [ "$pm2_status" != "not_found" ]; then
log_info "PM2状态: $pm2_status"
fi
# 检查PM2日志
local pm2_logs=$(su - jingrow -c "
export NVM_DIR=\"\$HOME/.nvm\"
[ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\"
pm2 logs $SITE_NAME --lines 5 --nostream 2>/dev/null || echo 'no_logs'
")
if [ "$pm2_logs" != "no_logs" ]; then
log_info "PM2日志: $pm2_logs"
fi
retry_count=$((retry_count + 1))
fi
fi fi
done done
@ -877,20 +886,34 @@ start_project_with_pm2() {
return 1 return 1
fi fi
# 额外检查:验证项目是否真的在监听端口 log_success "PM2进程已启动开始健康检查..."
local project_port=$(get_or_assign_port "$SITE_NAME")
log_info "验证项目是否在端口 $project_port 上正常运行..."
# 等待一下让应用完全启动 # 获取项目端口
local project_port=$(get_or_assign_port "$SITE_NAME")
# 等待应用完全启动
sleep 3 sleep 3
# 检查端口是否被监听 # 1. 检查端口是否被监听(使用多种方法)
log_info "检查端口 $project_port 是否被监听..."
local port_check=0 local port_check=0
local max_port_checks=10 local max_port_checks=10
local port_check_count=0 local port_check_count=0
while [ $port_check_count -lt $max_port_checks ] && [ $port_check -eq 0 ]; do while [ $port_check_count -lt $max_port_checks ] && [ $port_check -eq 0 ]; do
# 方法1: 使用netstat
port_check=$(netstat -tlnp 2>/dev/null | grep ":$project_port " | wc -l || echo "0") port_check=$(netstat -tlnp 2>/dev/null | grep ":$project_port " | wc -l || echo "0")
# 方法2: 如果netstat失败使用ss
if [ $port_check -eq 0 ]; then
port_check=$(ss -tlnp 2>/dev/null | grep ":$project_port " | wc -l || echo "0")
fi
# 方法3: 如果ss也失败使用lsof
if [ $port_check -eq 0 ]; then
port_check=$(lsof -i :$project_port 2>/dev/null | wc -l || echo "0")
fi
if [ $port_check -eq 0 ]; then if [ $port_check -eq 0 ]; then
sleep 1 sleep 1
port_check_count=$((port_check_count + 1)) port_check_count=$((port_check_count + 1))
@ -915,6 +938,71 @@ start_project_with_pm2() {
return 1 return 1
fi fi
log_success "端口 $project_port 已被监听"
# 2. HTTP健康检查如果应用支持
log_info "执行HTTP健康检查..."
local health_check_success=false
local health_check_attempts=0
local max_health_checks=5
# 获取本地IP地址
local local_ip=$(get_optimal_host_ip)
while [ $health_check_attempts -lt $max_health_checks ] && [ "$health_check_success" = false ]; do
# 尝试HTTP健康检查
local http_response=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 --max-time 10 "http://$local_ip:$project_port/" 2>/dev/null || echo "000")
if [ "$http_response" = "200" ] || [ "$http_response" = "302" ] || [ "$http_response" = "404" ]; then
health_check_success=true
log_success "HTTP健康检查通过 (状态码: $http_response)"
else
health_check_attempts=$((health_check_attempts + 1))
log_info "HTTP健康检查失败 (状态码: $http_response),重试 ${health_check_attempts}/${max_health_checks}"
sleep 2
fi
done
# 如果HTTP健康检查失败但端口已监听仍然认为启动成功
if [ "$health_check_success" = false ]; then
log_warning "HTTP健康检查失败但端口已监听认为启动成功"
fi
# 3. 检查PM2进程状态和资源使用情况
log_info "检查PM2进程状态和资源使用情况..."
local pm2_status=$(su - jingrow -c "
export NVM_DIR=\"\$HOME/.nvm\"
[ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\"
pm2 list | grep -w '$SITE_NAME' || echo 'not_found'
")
if [ "$pm2_status" != "not_found" ]; then
log_success "PM2进程状态正常: $pm2_status"
else
log_error "PM2进程状态异常"
return 1
fi
# 4. 检查应用是否在正常运行通过进程ID
local app_pid=$(su - jingrow -c "
export NVM_DIR=\"\$HOME/.nvm\"
[ -s \"\$NVM_DIR/nvm.sh\" ] && \. \"\$NVM_DIR/nvm.sh\"
pm2 list | grep -w '$SITE_NAME' | awk '{print \$6}' || echo '0'
")
if [ "$app_pid" != "0" ] && [ -n "$app_pid" ]; then
# 检查进程是否真的在运行
if kill -0 "$app_pid" 2>/dev/null; then
log_success "应用进程正在运行 (PID: $app_pid)"
else
log_error "应用进程不存在 (PID: $app_pid)"
return 1
fi
else
log_error "无法获取应用进程ID"
return 1
fi
log_success "项目已成功启动并监听端口 $project_port" log_success "项目已成功启动并监听端口 $project_port"
# 保存PM2配置 # 保存PM2配置