修复Restart Agent processes导致cpu100%卡住不动的问题,拆分Restart Agent processes 并优化Clone Agent Repository步骤
This commit is contained in:
parent
2c20d4eec6
commit
5235bbbcc3
@ -6,6 +6,40 @@
|
||||
repo: '{{ agent_repository_url }}'
|
||||
dest: /home/jingrow/agent/repo
|
||||
remote: upstream
|
||||
depth: 1
|
||||
force: yes
|
||||
clone: yes
|
||||
update: no
|
||||
register: clone_result
|
||||
retries: 3
|
||||
delay: 10
|
||||
until: clone_result is success
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Clean up failed clone
|
||||
become: yes
|
||||
become_user: jingrow
|
||||
file:
|
||||
path: /home/jingrow/agent/repo
|
||||
state: absent
|
||||
when: clone_result is failed
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Verify clone success
|
||||
become: yes
|
||||
become_user: jingrow
|
||||
stat:
|
||||
path: /home/jingrow/agent/repo/.git
|
||||
register: git_check
|
||||
retries: 3
|
||||
delay: 5
|
||||
until: git_check.stat.exists
|
||||
when: clone_result is success
|
||||
|
||||
- name: Fail if clone not successful
|
||||
fail:
|
||||
msg: "Failed to clone agent repository after multiple attempts"
|
||||
when: clone_result is failed or (clone_result is success and not git_check.stat.exists)
|
||||
|
||||
- name: Install Agent
|
||||
become: yes
|
||||
|
||||
@ -81,7 +81,68 @@
|
||||
- name: Get Docker Info
|
||||
command: docker info
|
||||
|
||||
- name: Restart Agent processes
|
||||
- name: Stop Agent processes
|
||||
supervisorctl:
|
||||
name: "agent:"
|
||||
state: restarted
|
||||
state: stopped
|
||||
register: stop_result
|
||||
retries: 3
|
||||
delay: 5
|
||||
until: stop_result is success
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Wait for Agent processes to stop
|
||||
shell: |
|
||||
for i in $(seq 1 30); do
|
||||
if ! supervisorctl status agent: | grep -q "RUNNING"; then
|
||||
exit 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
exit 1
|
||||
register: wait_result
|
||||
changed_when: false
|
||||
when: stop_result is success
|
||||
|
||||
- name: Start Agent processes in order
|
||||
supervisorctl:
|
||||
name: "{{ item }}"
|
||||
state: started
|
||||
loop:
|
||||
- agent:redis
|
||||
- agent:web
|
||||
- agent:worker-0
|
||||
- agent:worker-1
|
||||
register: start_result
|
||||
retries: 3
|
||||
delay: 5
|
||||
until: start_result is success
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Wait for Agent processes to be ready
|
||||
shell: |
|
||||
for i in $(seq 1 30); do
|
||||
if supervisorctl status agent: | grep -q "RUNNING" && ! supervisorctl status agent: | grep -q "STOPPED\|FATAL\|EXITED"; then
|
||||
exit 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
exit 1
|
||||
register: wait_ready_result
|
||||
changed_when: false
|
||||
when: start_result is success
|
||||
|
||||
- name: Verify Agent processes status
|
||||
supervisorctl:
|
||||
name: "agent:"
|
||||
state: started
|
||||
register: verify_result
|
||||
retries: 3
|
||||
delay: 5
|
||||
until: verify_result is success
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Fail if Agent processes not running
|
||||
fail:
|
||||
msg: "Agent processes failed to start properly"
|
||||
when: verify_result is failed
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user