jcloude/press/sanity.py

189 lines
4.8 KiB
Python

import contextlib
import os
import platform
import re
import subprocess
import urllib.request
from urllib.parse import urlsplit, urlunsplit
import click
import frappe
import requests
from bs4 import BeautifulSoup, SoupStrainer
from frappe.core.utils import find
from selenium import webdriver
from selenium.common import WebDriverException
from selenium.webdriver.chrome.service import Service as ChromeService
CHROMEDRIVER_PATH = os.path.expanduser("~/chromedriver")
try:
WEBSITE = frappe.utils.get_url()
except Exception:
WEBSITE = "https://jcloud.jingrow.com"
def checks():
if os.environ.get("CI"):
return
print("Running sanity checks...")
try:
if not initialize_webdriver():
return
test_browser_assets()
test_signup_flow()
except Exception as e:
click.secho(f"An error occurred: {e}", fg="yellow")
return
finally:
with contextlib.suppress(Exception):
chrome.quit()
def initialize_webdriver():
if not os.path.exists(CHROMEDRIVER_PATH):
click.secho(
f"Chromedriver not found at {CHROMEDRIVER_PATH}, Downloading from https://chromedriver.chromium.org/",
fg="yellow",
)
download_chromedriver()
global chrome
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-setuid-sandbox")
service = ChromeService(executable_path=CHROMEDRIVER_PATH)
try:
chrome = webdriver.Chrome(service=service, options=options)
except WebDriverException as e:
version = re.search(r"is (\d+.\d+.\d+.\d+) with", e.msg).group(1)
download_chromedriver(version=version)
chrome = webdriver.Chrome(service=service, options=options)
return True
def download_chromedriver(version=None):
if version:
build_version = version.rsplit(".", 1)[0]
release_url = "https://googlechromelabs.github.io/chrome-for-testing/latest-patch-versions-per-build-with-downloads.json"
releases = requests.get(release_url).json()
builds = releases["builds"][build_version]["downloads"]["chromedriver"]
else:
release_url = "https://googlechromelabs.github.io/chrome-for-testing/last-known-good-versions-with-downloads.json"
releases = requests.get(release_url).json()
builds = releases["channels"]["Stable"]["downloads"]["chromedriver"]
platform = get_platform()
download_url = find(builds, lambda x: x["platform"] == platform)["url"]
subprocess.check_output(f"curl -o chromedriver.zip {download_url}".split())
subprocess.check_output(
f"unzip -o -j chromedriver.zip chromedriver-{platform}/chromedriver -d {os.path.expanduser('~')}".split()
)
def get_platform():
if platform.system().lower() == "linux":
return "linux64"
if platform.system().lower() == "darwin":
if platform.machine().lower() == "arm64":
return "mac-arm64"
return "mac-x64"
return None
def test_browser_assets():
print(f"\nChecking health of assets and links for {WEBSITE}")
hyperlinks = extract_hyperlinks(WEBSITE)
for url in hyperlinks:
Link(url).check()
def test_signup_flow():
print(f"\nTesting signup flow for {WEBSITE}")
click.secho("NOT IMPLEMENTED!", fg="yellow")
class Link:
def __init__(self, address):
self.address = address
def check(self, address=None):
if not address:
address = self.address
try:
res = urllib.request.Request(
url=address,
headers={
"user-agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6)"
" AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90"
" Safari/537.36"
)
},
)
resp = urllib.request.urlopen(res)
if resp.status in [400, 404, 403, 408, 409, 501, 502, 503]:
click.secho(f"{address} ❌ ({resp.status}: {resp.reason})", fg="red")
else:
click.secho(f"{address}", fg="green")
except Exception as err:
click.secho(f"{address} ⚠️ ({err})", fg="yellow")
def pattern_adjust(a, address):
if a.startswith("/"):
return f"{WEBSITE}{a}"
try:
if re.match("^#", a):
return 0
r = urlsplit(a)
if r.scheme == "" and (r.netloc != "" or r.path != ""):
d = urlunsplit(r)
if re.match("^//", d):
m = re.search(r"(?<=//)\S+", d)
d = m.group(0)
return "https://" + d
elif r.scheme == "" and r.netloc == "":
return address + a
else:
return a
except Exception:
pass
def extract_hyperlinks(address):
chrome.get(WEBSITE)
chrome.implicitly_wait(5)
response = chrome.page_source
hyperlinks = set()
tags = {"a": "href", "img": "src", "script": "src", "link": "href"}
for key, value in iter(tags.items()):
try:
for link in BeautifulSoup(response, "html.parser", parse_only=SoupStrainer(key)):
if link.has_attr(value):
p = pattern_adjust(link[value], address)
if p and (p not in hyperlinks):
hyperlinks.add(p)
except Exception as err:
click.secho(f"{address} ⚠️ ({err})", fg="yellow")
return hyperlinks
if __name__ == "__main__":
checks()