strict HTTP header field name validation

Do the validation on the original, not the result from unicode case folding. Background: latin-1 0xDF is traditionally uppercased 0x53+0x53 which puts it back in ASCII
2026-07-02 10:41:30 +08:00 · 2023-12-07 09:41:10 +01:00 · 2023-12-07 09:41:10 +01:00 · f5501111a2
commit f5501111a2
parent fd67112f40
3 changed files with 17 additions and 3 deletions
--- a/gunicorn/http/message.py
+++ b/gunicorn/http/message.py
@ -98,12 +98,16 @@ class Message(object):
                raise InvalidHeader(curr)
            name, value = curr.split(":", 1)
            if self.cfg.strip_header_spaces:
-                name = name.rstrip(" \t").upper()
-            else:
-                name = name.upper()
+                name = name.rstrip(" \t")
            if not TOKEN_RE.fullmatch(name):
                raise InvalidHeaderName(name)

+            # this is still a dangerous place to do this
+            #  but it is more correct than doing it before the pattern match:
+            # after we entered Unicode wonderland, 8bits could case-shift into ASCII:
+            # b"\xDF".decode("latin-1").upper().encode("ascii") == b"SS"
+            name = name.upper()
+
            value = [value.lstrip(" \t")]

            # Consume value continuation lines
--- a/tests/requests/invalid/nonascii_03.http
+++ b/tests/requests/invalid/nonascii_03.http
@ -0,0 +1,5 @@
+GET /germans.. HTTP/1.1\r\n
+Content-Lengthß: 3\r\n
+Content-Length: 3\r\n
+\r\n
+ÄÄÄ
--- a/tests/requests/invalid/nonascii_03.py
+++ b/tests/requests/invalid/nonascii_03.py
@ -0,0 +1,5 @@
+from gunicorn.config import Config
+from gunicorn.http.errors import InvalidHeaderName
+
+cfg = Config()
+request = InvalidHeaderName