mirror of
https://github.com/frappe/gunicorn.git
synced 2026-01-14 11:09:11 +08:00
This replaces the very old sitemap generator which was over 2kloc and only compatible with Python 2. According to the stored lastmod, the generator wasn't used since 2010. The minimal replacement script scan the static site for html files and uses git to deduce the last modification date of each page. The sitemap xmlns version was updated to the latest 0.9 from sitemaps.org . The index page was given a higher priority since the other pages are just redirects to the index with anchors. The output file is pretty printed to help with diffs. Static assets (css, images...) aren't listed in the sitemap anymore.
42 lines
2.0 KiB
Python
42 lines
2.0 KiB
Python
import os
|
|
import subprocess
|
|
from xml.etree import ElementTree
|
|
|
|
|
|
def main():
|
|
generate(
|
|
site_path=os.path.join(os.path.dirname(__file__), 'site'),
|
|
special_priorities={'index.html': 1.0})
|
|
|
|
|
|
def generate(site_path, special_priorities, directory_index='index.html'):
|
|
urlset = ElementTree.Element('urlset', xmlns='http://www.sitemaps.org/schemas/sitemap/0.9')
|
|
urlset.text = '\n '
|
|
for root, dirs, filenames in os.walk(site_path):
|
|
for filename in filenames:
|
|
if filename.endswith('.html'):
|
|
absolute_filepath = os.path.join(root, filename)
|
|
relative_path = os.path.relpath(absolute_filepath, site_path)
|
|
relative_url = os.path.dirname(relative_path) if filename == directory_index else relative_path
|
|
last_modification = subprocess.check_output(
|
|
['git', 'log', '-1', '--pretty="%cI"', absolute_filepath]).decode('ascii').strip('\n"')
|
|
url_element = ElementTree.SubElement(urlset, 'url')
|
|
loc_element = ElementTree.SubElement(url_element, 'loc')
|
|
loc_element.text = 'http://gunicorn.org/' + relative_url
|
|
lastmod_element = ElementTree.SubElement(url_element, 'lastmod')
|
|
lastmod_element.text = last_modification
|
|
priority_element = ElementTree.SubElement(url_element, 'priority')
|
|
priority_element.text = str(special_priorities.get(relative_path, 0.5))
|
|
url_element.tail = priority_element.tail = '\n '
|
|
url_element.text = loc_element.tail = lastmod_element.tail = '\n '
|
|
# We sort the url nodes instead of the filenames because
|
|
# filenames might be altered by the directory_index option
|
|
urlset[:] = sorted([url for url in urlset], key=lambda url: url[0].text)
|
|
urlset.tail = urlset[-1].tail = '\n'
|
|
with open(os.path.join(site_path, 'sitemap.xml'), 'wb') as sitemap_file:
|
|
ElementTree.ElementTree(urlset).write(sitemap_file, encoding='UTF-8', xml_declaration=True)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|