修复Restart Agent processes导致cpu100%卡住不动的问题,拆分Restart Agent processes 并优化Clone Agent Repository步骤
This commit is contained in:
parent
2c20d4eec6
commit
5235bbbcc3
@ -6,6 +6,40 @@
|
|||||||
repo: '{{ agent_repository_url }}'
|
repo: '{{ agent_repository_url }}'
|
||||||
dest: /home/jingrow/agent/repo
|
dest: /home/jingrow/agent/repo
|
||||||
remote: upstream
|
remote: upstream
|
||||||
|
depth: 1
|
||||||
|
force: yes
|
||||||
|
clone: yes
|
||||||
|
update: no
|
||||||
|
register: clone_result
|
||||||
|
retries: 3
|
||||||
|
delay: 10
|
||||||
|
until: clone_result is success
|
||||||
|
ignore_errors: yes
|
||||||
|
|
||||||
|
- name: Clean up failed clone
|
||||||
|
become: yes
|
||||||
|
become_user: jingrow
|
||||||
|
file:
|
||||||
|
path: /home/jingrow/agent/repo
|
||||||
|
state: absent
|
||||||
|
when: clone_result is failed
|
||||||
|
ignore_errors: yes
|
||||||
|
|
||||||
|
- name: Verify clone success
|
||||||
|
become: yes
|
||||||
|
become_user: jingrow
|
||||||
|
stat:
|
||||||
|
path: /home/jingrow/agent/repo/.git
|
||||||
|
register: git_check
|
||||||
|
retries: 3
|
||||||
|
delay: 5
|
||||||
|
until: git_check.stat.exists
|
||||||
|
when: clone_result is success
|
||||||
|
|
||||||
|
- name: Fail if clone not successful
|
||||||
|
fail:
|
||||||
|
msg: "Failed to clone agent repository after multiple attempts"
|
||||||
|
when: clone_result is failed or (clone_result is success and not git_check.stat.exists)
|
||||||
|
|
||||||
- name: Install Agent
|
- name: Install Agent
|
||||||
become: yes
|
become: yes
|
||||||
|
|||||||
@ -81,7 +81,68 @@
|
|||||||
- name: Get Docker Info
|
- name: Get Docker Info
|
||||||
command: docker info
|
command: docker info
|
||||||
|
|
||||||
- name: Restart Agent processes
|
- name: Stop Agent processes
|
||||||
supervisorctl:
|
supervisorctl:
|
||||||
name: "agent:"
|
name: "agent:"
|
||||||
state: restarted
|
state: stopped
|
||||||
|
register: stop_result
|
||||||
|
retries: 3
|
||||||
|
delay: 5
|
||||||
|
until: stop_result is success
|
||||||
|
ignore_errors: yes
|
||||||
|
|
||||||
|
- name: Wait for Agent processes to stop
|
||||||
|
shell: |
|
||||||
|
for i in $(seq 1 30); do
|
||||||
|
if ! supervisorctl status agent: | grep -q "RUNNING"; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
exit 1
|
||||||
|
register: wait_result
|
||||||
|
changed_when: false
|
||||||
|
when: stop_result is success
|
||||||
|
|
||||||
|
- name: Start Agent processes in order
|
||||||
|
supervisorctl:
|
||||||
|
name: "{{ item }}"
|
||||||
|
state: started
|
||||||
|
loop:
|
||||||
|
- agent:redis
|
||||||
|
- agent:web
|
||||||
|
- agent:worker-0
|
||||||
|
- agent:worker-1
|
||||||
|
register: start_result
|
||||||
|
retries: 3
|
||||||
|
delay: 5
|
||||||
|
until: start_result is success
|
||||||
|
ignore_errors: yes
|
||||||
|
|
||||||
|
- name: Wait for Agent processes to be ready
|
||||||
|
shell: |
|
||||||
|
for i in $(seq 1 30); do
|
||||||
|
if supervisorctl status agent: | grep -q "RUNNING" && ! supervisorctl status agent: | grep -q "STOPPED\|FATAL\|EXITED"; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
exit 1
|
||||||
|
register: wait_ready_result
|
||||||
|
changed_when: false
|
||||||
|
when: start_result is success
|
||||||
|
|
||||||
|
- name: Verify Agent processes status
|
||||||
|
supervisorctl:
|
||||||
|
name: "agent:"
|
||||||
|
state: started
|
||||||
|
register: verify_result
|
||||||
|
retries: 3
|
||||||
|
delay: 5
|
||||||
|
until: verify_result is success
|
||||||
|
ignore_errors: yes
|
||||||
|
|
||||||
|
- name: Fail if Agent processes not running
|
||||||
|
fail:
|
||||||
|
msg: "Agent processes failed to start properly"
|
||||||
|
when: verify_result is failed
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user