diff --git a/jcloud/playbooks/roles/agent/tasks/main.yml b/jcloud/playbooks/roles/agent/tasks/main.yml index 5de232a..d231b17 100644 --- a/jcloud/playbooks/roles/agent/tasks/main.yml +++ b/jcloud/playbooks/roles/agent/tasks/main.yml @@ -6,6 +6,40 @@ repo: '{{ agent_repository_url }}' dest: /home/jingrow/agent/repo remote: upstream + depth: 1 + force: yes + clone: yes + update: no + register: clone_result + retries: 3 + delay: 10 + until: clone_result is success + ignore_errors: yes + +- name: Clean up failed clone + become: yes + become_user: jingrow + file: + path: /home/jingrow/agent/repo + state: absent + when: clone_result is failed + ignore_errors: yes + +- name: Verify clone success + become: yes + become_user: jingrow + stat: + path: /home/jingrow/agent/repo/.git + register: git_check + retries: 3 + delay: 5 + until: git_check.stat.exists + when: clone_result is success + +- name: Fail if clone not successful + fail: + msg: "Failed to clone agent repository after multiple attempts" + when: clone_result is failed or (clone_result is success and not git_check.stat.exists) - name: Install Agent become: yes diff --git a/jcloud/playbooks/roles/docker/tasks/main.yml b/jcloud/playbooks/roles/docker/tasks/main.yml index b894186..7823f76 100644 --- a/jcloud/playbooks/roles/docker/tasks/main.yml +++ b/jcloud/playbooks/roles/docker/tasks/main.yml @@ -81,7 +81,68 @@ - name: Get Docker Info command: docker info -- name: Restart Agent processes +- name: Stop Agent processes supervisorctl: name: "agent:" - state: restarted + state: stopped + register: stop_result + retries: 3 + delay: 5 + until: stop_result is success + ignore_errors: yes + +- name: Wait for Agent processes to stop + shell: | + for i in $(seq 1 30); do + if ! supervisorctl status agent: | grep -q "RUNNING"; then + exit 0 + fi + sleep 1 + done + exit 1 + register: wait_result + changed_when: false + when: stop_result is success + +- name: Start Agent processes in order + supervisorctl: + name: "{{ item }}" + state: started + loop: + - agent:redis + - agent:web + - agent:worker-0 + - agent:worker-1 + register: start_result + retries: 3 + delay: 5 + until: start_result is success + ignore_errors: yes + +- name: Wait for Agent processes to be ready + shell: | + for i in $(seq 1 30); do + if supervisorctl status agent: | grep -q "RUNNING" && ! supervisorctl status agent: | grep -q "STOPPED\|FATAL\|EXITED"; then + exit 0 + fi + sleep 1 + done + exit 1 + register: wait_ready_result + changed_when: false + when: start_result is success + +- name: Verify Agent processes status + supervisorctl: + name: "agent:" + state: started + register: verify_result + retries: 3 + delay: 5 + until: verify_result is success + ignore_errors: yes + +- name: Fail if Agent processes not running + fail: + msg: "Agent processes failed to start properly" + when: verify_result is failed