import contextlib import os import platform import re import subprocess import urllib.request from urllib.parse import urlsplit, urlunsplit import click import jingrow import requests from bs4 import BeautifulSoup, SoupStrainer from frappe.core.utils import find from selenium import webdriver from selenium.common import WebDriverException from selenium.webdriver.chrome.service import Service as ChromeService CHROMEDRIVER_PATH = os.path.expanduser("~/chromedriver") try: WEBSITE = frappe.utils.get_url() except Exception: WEBSITE = "https://jcloud.jingrow.com" def checks(): if os.environ.get("CI"): return print("Running sanity checks...") try: if not initialize_webdriver(): return test_browser_assets() test_signup_flow() except Exception as e: click.secho(f"An error occurred: {e}", fg="yellow") return finally: with contextlib.suppress(Exception): chrome.quit() def initialize_webdriver(): if not os.path.exists(CHROMEDRIVER_PATH): click.secho( f"Chromedriver not found at {CHROMEDRIVER_PATH}, Downloading from https://chromedriver.chromium.org/", fg="yellow", ) download_chromedriver() global chrome options = webdriver.ChromeOptions() options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") options.add_argument("--disable-setuid-sandbox") service = ChromeService(executable_path=CHROMEDRIVER_PATH) try: chrome = webdriver.Chrome(service=service, options=options) except WebDriverException as e: version = re.search(r"is (\d+.\d+.\d+.\d+) with", e.msg).group(1) download_chromedriver(version=version) chrome = webdriver.Chrome(service=service, options=options) return True def download_chromedriver(version=None): if version: build_version = version.rsplit(".", 1)[0] release_url = "https://googlechromelabs.github.io/chrome-for-testing/latest-patch-versions-per-build-with-downloads.json" releases = requests.get(release_url).json() builds = releases["builds"][build_version]["downloads"]["chromedriver"] else: release_url = "https://googlechromelabs.github.io/chrome-for-testing/last-known-good-versions-with-downloads.json" releases = requests.get(release_url).json() builds = releases["channels"]["Stable"]["downloads"]["chromedriver"] platform = get_platform() download_url = find(builds, lambda x: x["platform"] == platform)["url"] subprocess.check_output(f"curl -o chromedriver.zip {download_url}".split()) subprocess.check_output( f"unzip -o -j chromedriver.zip chromedriver-{platform}/chromedriver -d {os.path.expanduser('~')}".split() ) def get_platform(): if platform.system().lower() == "linux": return "linux64" if platform.system().lower() == "darwin": if platform.machine().lower() == "arm64": return "mac-arm64" return "mac-x64" return None def test_browser_assets(): print(f"\nChecking health of assets and links for {WEBSITE}") hyperlinks = extract_hyperlinks(WEBSITE) for url in hyperlinks: Link(url).check() def test_signup_flow(): print(f"\nTesting signup flow for {WEBSITE}") click.secho("NOT IMPLEMENTED!", fg="yellow") class Link: def __init__(self, address): self.address = address def check(self, address=None): if not address: address = self.address try: res = urllib.request.Request( url=address, headers={ "user-agent": ( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6)" " AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90" " Safari/537.36" ) }, ) resp = urllib.request.urlopen(res) if resp.status in [400, 404, 403, 408, 409, 501, 502, 503]: click.secho(f"{address} ❌ ({resp.status}: {resp.reason})", fg="red") else: click.secho(f"{address} ✅", fg="green") except Exception as err: click.secho(f"{address} ⚠️ ({err})", fg="yellow") def pattern_adjust(a, address): if a.startswith("/"): return f"{WEBSITE}{a}" try: if re.match("^#", a): return 0 r = urlsplit(a) if r.scheme == "" and (r.netloc != "" or r.path != ""): d = urlunsplit(r) if re.match("^//", d): m = re.search(r"(?<=//)\S+", d) d = m.group(0) return "https://" + d elif r.scheme == "" and r.netloc == "": return address + a else: return a except Exception: pass def extract_hyperlinks(address): chrome.get(WEBSITE) chrome.implicitly_wait(5) response = chrome.page_source hyperlinks = set() tags = {"a": "href", "img": "src", "script": "src", "link": "href"} for key, value in iter(tags.items()): try: for link in BeautifulSoup(response, "html.parser", parse_only=SoupStrainer(key)): if link.has_attr(value): p = pattern_adjust(link[value], address) if p and (p not in hyperlinks): hyperlinks.add(p) except Exception as err: click.secho(f"{address} ⚠️ ({err})", fg="yellow") return hyperlinks if __name__ == "__main__": checks()