3441 lines
114 KiB
Python
3441 lines
114 KiB
Python
![]() |
# -*- test-case-name: twisted.web.test.test_http -*-
|
||
|
# Copyright (c) Twisted Matrix Laboratories.
|
||
|
# See LICENSE for details.
|
||
|
|
||
|
"""
|
||
|
HyperText Transfer Protocol implementation.
|
||
|
|
||
|
This is the basic server-side protocol implementation used by the Twisted
|
||
|
Web server. It can parse HTTP 1.0 requests and supports many HTTP 1.1
|
||
|
features as well. Additionally, some functionality implemented here is
|
||
|
also useful for HTTP clients (such as the chunked encoding parser).
|
||
|
|
||
|
@var CACHED: A marker value to be returned from cache-related request methods
|
||
|
to indicate to the caller that a cached response will be usable and no
|
||
|
response body should be generated.
|
||
|
|
||
|
@var FOUND: An HTTP response code indicating a temporary redirect.
|
||
|
|
||
|
@var NOT_MODIFIED: An HTTP response code indicating that a requested
|
||
|
pre-condition (for example, the condition represented by an
|
||
|
I{If-Modified-Since} header is present in the request) has succeeded. This
|
||
|
indicates a response body cached by the client can be used.
|
||
|
|
||
|
@var PRECONDITION_FAILED: An HTTP response code indicating that a requested
|
||
|
pre-condition (for example, the condition represented by an I{If-None-Match}
|
||
|
header is present in the request) has failed. This should typically
|
||
|
indicate that the server has not taken the requested action.
|
||
|
|
||
|
@var maxChunkSizeLineLength: Maximum allowable length of the CRLF-terminated
|
||
|
line that indicates the size of a chunk and the extensions associated with
|
||
|
it, as in the HTTP 1.1 chunked I{Transfer-Encoding} (RFC 7230 section 4.1).
|
||
|
This limits how much data may be buffered when decoding the line.
|
||
|
"""
|
||
|
|
||
|
from __future__ import annotations
|
||
|
|
||
|
__all__ = [
|
||
|
"SWITCHING",
|
||
|
"OK",
|
||
|
"CREATED",
|
||
|
"ACCEPTED",
|
||
|
"NON_AUTHORITATIVE_INFORMATION",
|
||
|
"NO_CONTENT",
|
||
|
"RESET_CONTENT",
|
||
|
"PARTIAL_CONTENT",
|
||
|
"MULTI_STATUS",
|
||
|
"MULTIPLE_CHOICE",
|
||
|
"MOVED_PERMANENTLY",
|
||
|
"FOUND",
|
||
|
"SEE_OTHER",
|
||
|
"NOT_MODIFIED",
|
||
|
"USE_PROXY",
|
||
|
"TEMPORARY_REDIRECT",
|
||
|
"PERMANENT_REDIRECT",
|
||
|
"BAD_REQUEST",
|
||
|
"UNAUTHORIZED",
|
||
|
"PAYMENT_REQUIRED",
|
||
|
"FORBIDDEN",
|
||
|
"NOT_FOUND",
|
||
|
"NOT_ALLOWED",
|
||
|
"NOT_ACCEPTABLE",
|
||
|
"PROXY_AUTH_REQUIRED",
|
||
|
"REQUEST_TIMEOUT",
|
||
|
"CONFLICT",
|
||
|
"GONE",
|
||
|
"LENGTH_REQUIRED",
|
||
|
"PRECONDITION_FAILED",
|
||
|
"REQUEST_ENTITY_TOO_LARGE",
|
||
|
"REQUEST_URI_TOO_LONG",
|
||
|
"UNSUPPORTED_MEDIA_TYPE",
|
||
|
"REQUESTED_RANGE_NOT_SATISFIABLE",
|
||
|
"EXPECTATION_FAILED",
|
||
|
"IM_A_TEAPOT",
|
||
|
"INTERNAL_SERVER_ERROR",
|
||
|
"NOT_IMPLEMENTED",
|
||
|
"BAD_GATEWAY",
|
||
|
"SERVICE_UNAVAILABLE",
|
||
|
"GATEWAY_TIMEOUT",
|
||
|
"HTTP_VERSION_NOT_SUPPORTED",
|
||
|
"INSUFFICIENT_STORAGE_SPACE",
|
||
|
"NOT_EXTENDED",
|
||
|
"RESPONSES",
|
||
|
"CACHED",
|
||
|
"urlparse",
|
||
|
"parse_qs",
|
||
|
"datetimeToString",
|
||
|
"datetimeToLogString",
|
||
|
"timegm",
|
||
|
"stringToDatetime",
|
||
|
"toChunk",
|
||
|
"fromChunk",
|
||
|
"parseContentRange",
|
||
|
"StringTransport",
|
||
|
"HTTPClient",
|
||
|
"NO_BODY_CODES",
|
||
|
"Request",
|
||
|
"PotentialDataLoss",
|
||
|
"HTTPChannel",
|
||
|
"HTTPFactory",
|
||
|
]
|
||
|
|
||
|
|
||
|
import base64
|
||
|
import binascii
|
||
|
import calendar
|
||
|
import math
|
||
|
import os
|
||
|
import re
|
||
|
import tempfile
|
||
|
import warnings
|
||
|
from email import message_from_bytes
|
||
|
from email.message import EmailMessage, Message
|
||
|
from io import BufferedIOBase, BytesIO, TextIOWrapper
|
||
|
from time import gmtime, time
|
||
|
from typing import (
|
||
|
AnyStr,
|
||
|
Callable,
|
||
|
Dict,
|
||
|
List,
|
||
|
Optional,
|
||
|
Protocol as TypingProtocol,
|
||
|
Tuple,
|
||
|
)
|
||
|
from urllib.parse import (
|
||
|
ParseResultBytes,
|
||
|
unquote_to_bytes as unquote,
|
||
|
urlparse as _urlparse,
|
||
|
)
|
||
|
|
||
|
from zope.interface import Attribute, Interface, implementer, provider
|
||
|
|
||
|
from incremental import Version
|
||
|
|
||
|
from twisted.internet import address, interfaces, protocol
|
||
|
from twisted.internet._producer_helpers import _PullToPush
|
||
|
from twisted.internet.defer import Deferred
|
||
|
from twisted.internet.interfaces import (
|
||
|
IAddress,
|
||
|
IDelayedCall,
|
||
|
IProtocol,
|
||
|
IReactorTime,
|
||
|
ITCPTransport,
|
||
|
)
|
||
|
from twisted.internet.protocol import Protocol
|
||
|
from twisted.logger import Logger
|
||
|
from twisted.protocols import basic, policies
|
||
|
from twisted.python import log
|
||
|
from twisted.python.compat import nativeString, networkString
|
||
|
from twisted.python.components import proxyForInterface
|
||
|
from twisted.python.deprecate import deprecated, deprecatedModuleAttribute
|
||
|
from twisted.python.failure import Failure
|
||
|
from twisted.web._abnf import _hexint, _istoken
|
||
|
from twisted.web._responses import (
|
||
|
ACCEPTED,
|
||
|
BAD_GATEWAY,
|
||
|
BAD_REQUEST,
|
||
|
CONFLICT,
|
||
|
CREATED,
|
||
|
EXPECTATION_FAILED,
|
||
|
FORBIDDEN,
|
||
|
FOUND,
|
||
|
GATEWAY_TIMEOUT,
|
||
|
GONE,
|
||
|
HTTP_VERSION_NOT_SUPPORTED,
|
||
|
IM_A_TEAPOT,
|
||
|
INSUFFICIENT_STORAGE_SPACE,
|
||
|
INTERNAL_SERVER_ERROR,
|
||
|
LENGTH_REQUIRED,
|
||
|
MOVED_PERMANENTLY,
|
||
|
MULTI_STATUS,
|
||
|
MULTIPLE_CHOICE,
|
||
|
NO_CONTENT,
|
||
|
NON_AUTHORITATIVE_INFORMATION,
|
||
|
NOT_ACCEPTABLE,
|
||
|
NOT_ALLOWED,
|
||
|
NOT_EXTENDED,
|
||
|
NOT_FOUND,
|
||
|
NOT_IMPLEMENTED,
|
||
|
NOT_MODIFIED,
|
||
|
OK,
|
||
|
PARTIAL_CONTENT,
|
||
|
PAYMENT_REQUIRED,
|
||
|
PERMANENT_REDIRECT,
|
||
|
PRECONDITION_FAILED,
|
||
|
PROXY_AUTH_REQUIRED,
|
||
|
REQUEST_ENTITY_TOO_LARGE,
|
||
|
REQUEST_TIMEOUT,
|
||
|
REQUEST_URI_TOO_LONG,
|
||
|
REQUESTED_RANGE_NOT_SATISFIABLE,
|
||
|
RESET_CONTENT,
|
||
|
RESPONSES,
|
||
|
SEE_OTHER,
|
||
|
SERVICE_UNAVAILABLE,
|
||
|
SWITCHING,
|
||
|
TEMPORARY_REDIRECT,
|
||
|
UNAUTHORIZED,
|
||
|
UNSUPPORTED_MEDIA_TYPE,
|
||
|
USE_PROXY,
|
||
|
)
|
||
|
from twisted.web.http_headers import (
|
||
|
Headers,
|
||
|
InvalidHeaderName,
|
||
|
_nameEncoder,
|
||
|
_sanitizeLinearWhitespace,
|
||
|
)
|
||
|
from twisted.web.iweb import IAccessLogFormatter, INonQueuedRequestFactory, IRequest
|
||
|
|
||
|
try:
|
||
|
from twisted.web._http2 import H2Connection
|
||
|
|
||
|
H2_ENABLED = True
|
||
|
except ImportError:
|
||
|
H2_ENABLED = False
|
||
|
|
||
|
|
||
|
# A common request timeout -- 1 minute. This is roughly what nginx uses, and
|
||
|
# so it seems to be a good choice for us too.
|
||
|
_REQUEST_TIMEOUT = 1 * 60
|
||
|
|
||
|
protocol_version = "HTTP/1.1"
|
||
|
|
||
|
CACHED = """Magic constant returned by http.Request methods to set cache
|
||
|
validation headers when the request is conditional and the value fails
|
||
|
the condition."""
|
||
|
|
||
|
# backwards compatibility
|
||
|
responses = RESPONSES
|
||
|
|
||
|
|
||
|
# datetime parsing and formatting
|
||
|
weekdayname = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
|
||
|
_months = [
|
||
|
"Jan",
|
||
|
"Feb",
|
||
|
"Mar",
|
||
|
"Apr",
|
||
|
"May",
|
||
|
"Jun",
|
||
|
"Jul",
|
||
|
"Aug",
|
||
|
"Sep",
|
||
|
"Oct",
|
||
|
"Nov",
|
||
|
"Dec",
|
||
|
]
|
||
|
monthname = [None] + _months
|
||
|
_weekdaynameBytes = [s.encode("ascii") for s in weekdayname]
|
||
|
_monthnameBytes = [None] + [s.encode("ascii") for s in _months]
|
||
|
weekdayname_lower = [name.lower() for name in weekdayname]
|
||
|
monthname_lower = [name and name.lower() for name in monthname]
|
||
|
|
||
|
|
||
|
def _parseRequestLine(line: bytes) -> tuple[bytes, bytes, bytes]:
|
||
|
"""
|
||
|
Parse an HTTP request line, which looks like:
|
||
|
|
||
|
GET /foo/bar HTTP/1.1
|
||
|
|
||
|
This function attempts to validate the well-formedness of
|
||
|
the line. RFC 9112 section 3 provides this ABNF:
|
||
|
|
||
|
request-line = method SP request-target SP HTTP-version
|
||
|
|
||
|
We allow any method that is a valid token:
|
||
|
|
||
|
method = token
|
||
|
token = 1*tchar
|
||
|
tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
|
||
|
/ "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
|
||
|
/ DIGIT / ALPHA
|
||
|
|
||
|
We allow any non-empty request-target that contains only printable
|
||
|
ASCII characters (no whitespace).
|
||
|
|
||
|
The RFC defines HTTP-version like this:
|
||
|
|
||
|
HTTP-version = HTTP-name "/" DIGIT "." DIGIT
|
||
|
HTTP-name = %s"HTTP"
|
||
|
|
||
|
However, this function is more strict than the RFC: we only allow
|
||
|
HTTP versions of 1.0 and 1.1, as later versions of HTTP don't use
|
||
|
a request line.
|
||
|
|
||
|
@returns: C{(method, request, version)} three-tuple
|
||
|
|
||
|
@raises: L{ValueError} when malformed
|
||
|
"""
|
||
|
method, request, version = line.split(b" ")
|
||
|
|
||
|
if not _istoken(method):
|
||
|
raise ValueError("Invalid method")
|
||
|
|
||
|
for c in request:
|
||
|
if c <= 32 or c > 176:
|
||
|
raise ValueError("Invalid request-target")
|
||
|
if request == b"":
|
||
|
raise ValueError("Empty request-target")
|
||
|
|
||
|
if version != b"HTTP/1.1" and version != b"HTTP/1.0":
|
||
|
raise ValueError("Invalid version")
|
||
|
|
||
|
return method, request, version
|
||
|
|
||
|
|
||
|
def _parseContentType(line: bytes) -> bytes:
|
||
|
"""
|
||
|
Parse the Content-Type header.
|
||
|
"""
|
||
|
msg = EmailMessage()
|
||
|
msg["content-type"] = line.decode("charmap")
|
||
|
key = msg.get_content_type()
|
||
|
encodedKey = key.encode("charmap")
|
||
|
return encodedKey
|
||
|
|
||
|
|
||
|
class _MultiPartParseException(Exception):
|
||
|
"""
|
||
|
Failed to parse the multipart/form-data payload.
|
||
|
"""
|
||
|
|
||
|
|
||
|
def _getMultiPartArgs(content: bytes, ctype: bytes) -> dict[bytes, list[bytes]]:
|
||
|
"""
|
||
|
Parse the content of a multipart/form-data request.
|
||
|
"""
|
||
|
result = {}
|
||
|
multiPartHeaders = b"MIME-Version: 1.0\r\n" + b"Content-Type: " + ctype + b"\r\n"
|
||
|
msg = message_from_bytes(multiPartHeaders + content)
|
||
|
if not msg.is_multipart():
|
||
|
raise _MultiPartParseException("Not a multipart.")
|
||
|
|
||
|
part: Message
|
||
|
# "per Python docs, a list of Message objects when is_multipart() is True,
|
||
|
# or a string when is_multipart() is False"
|
||
|
for part in msg.get_payload(): # type:ignore[assignment]
|
||
|
name: str | None = part.get_param(
|
||
|
"name", header="content-disposition"
|
||
|
) # type:ignore[assignment]
|
||
|
if not name:
|
||
|
continue
|
||
|
payload: bytes = part.get_payload(decode=True) # type:ignore[assignment]
|
||
|
result[name.encode("utf8")] = [payload]
|
||
|
return result
|
||
|
|
||
|
|
||
|
def urlparse(url):
|
||
|
"""
|
||
|
Parse an URL into six components.
|
||
|
|
||
|
This is similar to C{urlparse.urlparse}, but rejects C{str} input
|
||
|
and always produces C{bytes} output.
|
||
|
|
||
|
@type url: C{bytes}
|
||
|
|
||
|
@raise TypeError: The given url was a C{str} string instead of a
|
||
|
C{bytes}.
|
||
|
|
||
|
@return: The scheme, net location, path, params, query string, and fragment
|
||
|
of the URL - all as C{bytes}.
|
||
|
@rtype: C{ParseResultBytes}
|
||
|
"""
|
||
|
if isinstance(url, str):
|
||
|
raise TypeError("url must be bytes, not unicode")
|
||
|
scheme, netloc, path, params, query, fragment = _urlparse(url)
|
||
|
if isinstance(scheme, str):
|
||
|
scheme = scheme.encode("ascii")
|
||
|
netloc = netloc.encode("ascii")
|
||
|
path = path.encode("ascii")
|
||
|
query = query.encode("ascii")
|
||
|
fragment = fragment.encode("ascii")
|
||
|
return ParseResultBytes(scheme, netloc, path, params, query, fragment)
|
||
|
|
||
|
|
||
|
def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
|
||
|
"""
|
||
|
Like C{cgi.parse_qs}, but with support for parsing byte strings on Python 3.
|
||
|
|
||
|
This was created to help with Python 2 to Python 3 migration.
|
||
|
Consider using L{urllib.parse.parse_qs}.
|
||
|
|
||
|
@type qs: C{bytes}
|
||
|
"""
|
||
|
d = {}
|
||
|
items = [s2 for s1 in qs.split(b"&") for s2 in s1.split(b";")]
|
||
|
for item in items:
|
||
|
try:
|
||
|
k, v = item.split(b"=", 1)
|
||
|
except ValueError:
|
||
|
if strict_parsing:
|
||
|
raise
|
||
|
continue
|
||
|
if v or keep_blank_values:
|
||
|
k = unquote(k.replace(b"+", b" "))
|
||
|
v = unquote(v.replace(b"+", b" "))
|
||
|
if k in d:
|
||
|
d[k].append(v)
|
||
|
else:
|
||
|
d[k] = [v]
|
||
|
return d
|
||
|
|
||
|
|
||
|
def datetimeToString(msSinceEpoch=None):
|
||
|
"""
|
||
|
Convert seconds since epoch to HTTP datetime string.
|
||
|
|
||
|
@rtype: C{bytes}
|
||
|
"""
|
||
|
year, month, day, hh, mm, ss, wd, _, _ = (
|
||
|
gmtime() if msSinceEpoch is None else gmtime(msSinceEpoch)
|
||
|
)
|
||
|
return b"%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
|
||
|
_weekdaynameBytes[wd],
|
||
|
day,
|
||
|
_monthnameBytes[month],
|
||
|
year,
|
||
|
hh,
|
||
|
mm,
|
||
|
ss,
|
||
|
)
|
||
|
|
||
|
|
||
|
def datetimeToLogString(msSinceEpoch=None):
|
||
|
"""
|
||
|
Convert seconds since epoch to log datetime string.
|
||
|
|
||
|
@rtype: C{str}
|
||
|
"""
|
||
|
if msSinceEpoch == None:
|
||
|
# This code path is apparently never used in practice inside Twisted.
|
||
|
msSinceEpoch = time() # pragma: no cover
|
||
|
year, month, day, hh, mm, ss, wd, y, z = gmtime(msSinceEpoch)
|
||
|
s = "[%02d/%3s/%4d:%02d:%02d:%02d +0000]" % (
|
||
|
day,
|
||
|
monthname[month],
|
||
|
year,
|
||
|
hh,
|
||
|
mm,
|
||
|
ss,
|
||
|
)
|
||
|
return s
|
||
|
|
||
|
|
||
|
def timegm(year, month, day, hour, minute, second):
|
||
|
"""
|
||
|
Convert time tuple in GMT to seconds since epoch, GMT
|
||
|
"""
|
||
|
EPOCH = 1970
|
||
|
if year < EPOCH:
|
||
|
raise ValueError("Years prior to %d not supported" % (EPOCH,))
|
||
|
assert 1 <= month <= 12
|
||
|
days = 365 * (year - EPOCH) + calendar.leapdays(EPOCH, year)
|
||
|
for i in range(1, month):
|
||
|
days = days + calendar.mdays[i]
|
||
|
if month > 2 and calendar.isleap(year):
|
||
|
days = days + 1
|
||
|
days = days + day - 1
|
||
|
hours = days * 24 + hour
|
||
|
minutes = hours * 60 + minute
|
||
|
seconds = minutes * 60 + second
|
||
|
return seconds
|
||
|
|
||
|
|
||
|
def stringToDatetime(dateString):
|
||
|
"""
|
||
|
Convert an HTTP date string (one of three formats) to seconds since epoch.
|
||
|
|
||
|
@type dateString: C{bytes}
|
||
|
"""
|
||
|
parts = dateString.decode("ascii").split()
|
||
|
|
||
|
if not parts[0][0:3].lower() in weekdayname_lower:
|
||
|
# Weekday is stupid. Might have been omitted.
|
||
|
try:
|
||
|
return stringToDatetime(b"Sun, " + dateString)
|
||
|
except ValueError:
|
||
|
# Guess not.
|
||
|
pass
|
||
|
|
||
|
partlen = len(parts)
|
||
|
if (partlen == 5 or partlen == 6) and parts[1].isdigit():
|
||
|
# 1st date format: Sun, 06 Nov 1994 08:49:37 GMT
|
||
|
# (Note: "GMT" is literal, not a variable timezone)
|
||
|
# (also handles without "GMT")
|
||
|
# This is the normal format
|
||
|
day = parts[1]
|
||
|
month = parts[2]
|
||
|
year = parts[3]
|
||
|
time = parts[4]
|
||
|
elif (partlen == 3 or partlen == 4) and parts[1].find("-") != -1:
|
||
|
# 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT
|
||
|
# (Note: "GMT" is literal, not a variable timezone)
|
||
|
# (also handles without without "GMT")
|
||
|
# Two digit year, yucko.
|
||
|
day, month, year = parts[1].split("-")
|
||
|
time = parts[2]
|
||
|
year = int(year)
|
||
|
if year < 69:
|
||
|
year = year + 2000
|
||
|
elif year < 100:
|
||
|
year = year + 1900
|
||
|
elif len(parts) == 5:
|
||
|
# 3rd date format: Sun Nov 6 08:49:37 1994
|
||
|
# ANSI C asctime() format.
|
||
|
day = parts[2]
|
||
|
month = parts[1]
|
||
|
year = parts[4]
|
||
|
time = parts[3]
|
||
|
else:
|
||
|
raise ValueError("Unknown datetime format %r" % dateString)
|
||
|
|
||
|
day = int(day)
|
||
|
month = int(monthname_lower.index(month.lower()))
|
||
|
year = int(year)
|
||
|
hour, min, sec = map(int, time.split(":"))
|
||
|
return int(timegm(year, month, day, hour, min, sec))
|
||
|
|
||
|
|
||
|
def toChunk(data):
|
||
|
"""
|
||
|
Convert string to a chunk.
|
||
|
|
||
|
@type data: C{bytes}
|
||
|
|
||
|
@returns: a tuple of C{bytes} representing the chunked encoding of data
|
||
|
"""
|
||
|
return (networkString(f"{len(data):x}"), b"\r\n", data, b"\r\n")
|
||
|
|
||
|
|
||
|
def fromChunk(data: bytes) -> Tuple[bytes, bytes]:
|
||
|
"""
|
||
|
Convert chunk to string.
|
||
|
|
||
|
Note that this function is not specification compliant: it doesn't handle
|
||
|
chunk extensions.
|
||
|
|
||
|
@type data: C{bytes}
|
||
|
|
||
|
@return: tuple of (result, remaining) - both C{bytes}.
|
||
|
|
||
|
@raise ValueError: If the given data is not a correctly formatted chunked
|
||
|
byte string.
|
||
|
"""
|
||
|
prefix, rest = data.split(b"\r\n", 1)
|
||
|
length = _hexint(prefix)
|
||
|
if length < 0:
|
||
|
raise ValueError("Chunk length must be >= 0, not %d" % (length,))
|
||
|
if rest[length : length + 2] != b"\r\n":
|
||
|
raise ValueError("chunk must end with CRLF")
|
||
|
return rest[:length], rest[length + 2 :]
|
||
|
|
||
|
|
||
|
def parseContentRange(header):
|
||
|
"""
|
||
|
Parse a content-range header into (start, end, realLength).
|
||
|
|
||
|
realLength might be None if real length is not known ('*').
|
||
|
"""
|
||
|
kind, other = header.strip().split()
|
||
|
if kind.lower() != "bytes":
|
||
|
raise ValueError("a range of type %r is not supported")
|
||
|
startend, realLength = other.split("/")
|
||
|
start, end = map(int, startend.split("-"))
|
||
|
if realLength == "*":
|
||
|
realLength = None
|
||
|
else:
|
||
|
realLength = int(realLength)
|
||
|
return (start, end, realLength)
|
||
|
|
||
|
|
||
|
class _IDeprecatedHTTPChannelToRequestInterface(Interface):
|
||
|
"""
|
||
|
The interface L{HTTPChannel} expects of L{Request}.
|
||
|
"""
|
||
|
|
||
|
requestHeaders = Attribute(
|
||
|
"A L{http_headers.Headers} instance giving all received HTTP request "
|
||
|
"headers."
|
||
|
)
|
||
|
|
||
|
responseHeaders = Attribute(
|
||
|
"A L{http_headers.Headers} instance holding all HTTP response "
|
||
|
"headers to be sent."
|
||
|
)
|
||
|
|
||
|
def connectionLost(reason):
|
||
|
"""
|
||
|
The underlying connection has been lost.
|
||
|
|
||
|
@param reason: A failure instance indicating the reason why
|
||
|
the connection was lost.
|
||
|
@type reason: L{twisted.python.failure.Failure}
|
||
|
"""
|
||
|
|
||
|
def gotLength(length):
|
||
|
"""
|
||
|
Called when L{HTTPChannel} has determined the length, if any,
|
||
|
of the incoming request's body.
|
||
|
|
||
|
@param length: The length of the request's body.
|
||
|
@type length: L{int} if the request declares its body's length
|
||
|
and L{None} if it does not.
|
||
|
"""
|
||
|
|
||
|
def handleContentChunk(data):
|
||
|
"""
|
||
|
Deliver a received chunk of body data to the request. Note
|
||
|
this does not imply chunked transfer encoding.
|
||
|
|
||
|
@param data: The received chunk.
|
||
|
@type data: L{bytes}
|
||
|
"""
|
||
|
|
||
|
def parseCookies():
|
||
|
"""
|
||
|
Parse the request's cookies out of received headers.
|
||
|
"""
|
||
|
|
||
|
def requestReceived(command, path, version):
|
||
|
"""
|
||
|
Called when the entire request, including its body, has been
|
||
|
received.
|
||
|
|
||
|
@param command: The request's HTTP command.
|
||
|
@type command: L{bytes}
|
||
|
|
||
|
@param path: The request's path. Note: this is actually what
|
||
|
RFC7320 calls the URI.
|
||
|
@type path: L{bytes}
|
||
|
|
||
|
@param version: The request's HTTP version.
|
||
|
@type version: L{bytes}
|
||
|
"""
|
||
|
|
||
|
def __eq__(other: object) -> bool:
|
||
|
"""
|
||
|
Determines if two requests are the same object.
|
||
|
|
||
|
@param other: Another object whose identity will be compared
|
||
|
to this instance's.
|
||
|
|
||
|
@return: L{True} when the two are the same object and L{False}
|
||
|
when not.
|
||
|
"""
|
||
|
|
||
|
def __ne__(other: object) -> bool:
|
||
|
"""
|
||
|
Determines if two requests are not the same object.
|
||
|
|
||
|
@param other: Another object whose identity will be compared
|
||
|
to this instance's.
|
||
|
|
||
|
@return: L{True} when the two are not the same object and
|
||
|
L{False} when they are.
|
||
|
"""
|
||
|
|
||
|
def __hash__():
|
||
|
"""
|
||
|
Generate a hash value for the request.
|
||
|
|
||
|
@return: The request's hash value.
|
||
|
@rtype: L{int}
|
||
|
"""
|
||
|
|
||
|
|
||
|
class StringTransport:
|
||
|
"""
|
||
|
I am a BytesIO wrapper that conforms for the transport API. I support
|
||
|
the `writeSequence' method.
|
||
|
"""
|
||
|
|
||
|
def __init__(self):
|
||
|
self.s = BytesIO()
|
||
|
|
||
|
def writeSequence(self, seq):
|
||
|
self.s.write(b"".join(seq))
|
||
|
|
||
|
def __getattr__(self, attr):
|
||
|
return getattr(self.__dict__["s"], attr)
|
||
|
|
||
|
|
||
|
class HTTPClient(basic.LineReceiver):
|
||
|
"""
|
||
|
A client for HTTP 1.0.
|
||
|
|
||
|
Notes:
|
||
|
You probably want to send a 'Host' header with the name of the site you're
|
||
|
connecting to, in order to not break name based virtual hosting.
|
||
|
|
||
|
@ivar length: The length of the request body in bytes.
|
||
|
@type length: C{int}
|
||
|
|
||
|
@ivar firstLine: Are we waiting for the first header line?
|
||
|
@type firstLine: C{bool}
|
||
|
|
||
|
@ivar __buffer: The buffer that stores the response to the HTTP request.
|
||
|
@type __buffer: A C{BytesIO} object.
|
||
|
|
||
|
@ivar _header: Part or all of an HTTP request header.
|
||
|
@type _header: C{bytes}
|
||
|
"""
|
||
|
|
||
|
length = None
|
||
|
firstLine = True
|
||
|
__buffer = None
|
||
|
_header = b""
|
||
|
|
||
|
def sendCommand(self, command, path):
|
||
|
self.transport.writeSequence([command, b" ", path, b" HTTP/1.0\r\n"])
|
||
|
|
||
|
def sendHeader(self, name, value):
|
||
|
if not isinstance(value, bytes):
|
||
|
# XXX Deprecate this case
|
||
|
value = networkString(str(value))
|
||
|
santizedName = _sanitizeLinearWhitespace(name)
|
||
|
santizedValue = _sanitizeLinearWhitespace(value)
|
||
|
self.transport.writeSequence([santizedName, b": ", santizedValue, b"\r\n"])
|
||
|
|
||
|
def endHeaders(self):
|
||
|
self.transport.write(b"\r\n")
|
||
|
|
||
|
def extractHeader(self, header):
|
||
|
"""
|
||
|
Given a complete HTTP header, extract the field name and value and
|
||
|
process the header.
|
||
|
|
||
|
@param header: a complete HTTP request header of the form
|
||
|
'field-name: value'.
|
||
|
@type header: C{bytes}
|
||
|
"""
|
||
|
key, val = header.split(b":", 1)
|
||
|
val = val.lstrip()
|
||
|
self.handleHeader(key, val)
|
||
|
if key.lower() == b"content-length":
|
||
|
self.length = int(val)
|
||
|
|
||
|
def lineReceived(self, line):
|
||
|
"""
|
||
|
Parse the status line and headers for an HTTP request.
|
||
|
|
||
|
@param line: Part of an HTTP request header. Request bodies are parsed
|
||
|
in L{HTTPClient.rawDataReceived}.
|
||
|
@type line: C{bytes}
|
||
|
"""
|
||
|
if self.firstLine:
|
||
|
self.firstLine = False
|
||
|
l = line.split(None, 2)
|
||
|
version = l[0]
|
||
|
status = l[1]
|
||
|
try:
|
||
|
message = l[2]
|
||
|
except IndexError:
|
||
|
# sometimes there is no message
|
||
|
message = b""
|
||
|
self.handleStatus(version, status, message)
|
||
|
return
|
||
|
if not line:
|
||
|
if self._header != b"":
|
||
|
# Only extract headers if there are any
|
||
|
self.extractHeader(self._header)
|
||
|
self.__buffer = BytesIO()
|
||
|
self.handleEndHeaders()
|
||
|
self.setRawMode()
|
||
|
return
|
||
|
|
||
|
if line.startswith(b"\t") or line.startswith(b" "):
|
||
|
# This line is part of a multiline header. According to RFC 822, in
|
||
|
# "unfolding" multiline headers you do not strip the leading
|
||
|
# whitespace on the continuing line.
|
||
|
self._header = self._header + line
|
||
|
elif self._header:
|
||
|
# This line starts a new header, so process the previous one.
|
||
|
self.extractHeader(self._header)
|
||
|
self._header = line
|
||
|
else: # First header
|
||
|
self._header = line
|
||
|
|
||
|
def connectionLost(self, reason):
|
||
|
self.handleResponseEnd()
|
||
|
|
||
|
def handleResponseEnd(self):
|
||
|
"""
|
||
|
The response has been completely received.
|
||
|
|
||
|
This callback may be invoked more than once per request.
|
||
|
"""
|
||
|
if self.__buffer is not None:
|
||
|
b = self.__buffer.getvalue()
|
||
|
self.__buffer = None
|
||
|
self.handleResponse(b)
|
||
|
|
||
|
def handleResponsePart(self, data):
|
||
|
self.__buffer.write(data)
|
||
|
|
||
|
def connectionMade(self):
|
||
|
pass
|
||
|
|
||
|
def handleStatus(self, version, status, message):
|
||
|
"""
|
||
|
Called when the status-line is received.
|
||
|
|
||
|
@param version: e.g. 'HTTP/1.0'
|
||
|
@param status: e.g. '200'
|
||
|
@type status: C{bytes}
|
||
|
@param message: e.g. 'OK'
|
||
|
"""
|
||
|
|
||
|
def handleHeader(self, key, val):
|
||
|
"""
|
||
|
Called every time a header is received.
|
||
|
"""
|
||
|
|
||
|
def handleEndHeaders(self):
|
||
|
"""
|
||
|
Called when all headers have been received.
|
||
|
"""
|
||
|
|
||
|
def rawDataReceived(self, data):
|
||
|
if self.length is not None:
|
||
|
data, rest = data[: self.length], data[self.length :]
|
||
|
self.length -= len(data)
|
||
|
else:
|
||
|
rest = b""
|
||
|
self.handleResponsePart(data)
|
||
|
if self.length == 0:
|
||
|
self.handleResponseEnd()
|
||
|
self.setLineMode(rest)
|
||
|
|
||
|
|
||
|
deprecatedModuleAttribute(
|
||
|
Version("Twisted", 24, 7, 0),
|
||
|
"Use twisted.web.client.Agent instead.",
|
||
|
__name__,
|
||
|
HTTPClient.__name__,
|
||
|
)
|
||
|
|
||
|
|
||
|
# response codes that must have empty bodies
|
||
|
NO_BODY_CODES = (204, 304)
|
||
|
|
||
|
|
||
|
# Sentinel object that detects people explicitly passing `queued` to Request.
|
||
|
_QUEUED_SENTINEL = object()
|
||
|
|
||
|
|
||
|
def _getContentFile(length):
|
||
|
"""
|
||
|
Get a writeable file-like object to which request content can be written.
|
||
|
"""
|
||
|
if length is not None and length < 100000:
|
||
|
return BytesIO()
|
||
|
return tempfile.TemporaryFile()
|
||
|
|
||
|
|
||
|
_hostHeaderExpression = re.compile(rb"^\[?(?P<host>.*?)\]?(:\d+)?$")
|
||
|
|
||
|
|
||
|
@implementer(interfaces.IConsumer, _IDeprecatedHTTPChannelToRequestInterface)
|
||
|
class Request:
|
||
|
"""
|
||
|
A HTTP request.
|
||
|
|
||
|
Subclasses should override the process() method to determine how
|
||
|
the request will be processed.
|
||
|
|
||
|
@ivar method: The HTTP method that was used, e.g. C{b'GET'}.
|
||
|
@type method: L{bytes}
|
||
|
|
||
|
@ivar uri: The full encoded URI which was requested (including query
|
||
|
arguments), e.g. C{b'/a/b%20/c?q=v'}.
|
||
|
@type uri: L{bytes}
|
||
|
|
||
|
@ivar path: The encoded path of the request URI (not including query
|
||
|
arguments), e.g. C{b'/a/b%20/c'}.
|
||
|
@type path: L{bytes}
|
||
|
|
||
|
@ivar args: A mapping of decoded query argument names as L{bytes} to
|
||
|
corresponding query argument values as L{list}s of L{bytes}.
|
||
|
For example, for a URI with C{foo=bar&foo=baz&quux=spam}
|
||
|
as its query part C{args} will be C{{b'foo': [b'bar', b'baz'],
|
||
|
b'quux': [b'spam']}}.
|
||
|
@type args: L{dict} of L{bytes} to L{list} of L{bytes}
|
||
|
|
||
|
@ivar content: A file-like object giving the request body. This may be
|
||
|
a file on disk, an L{io.BytesIO}, or some other type. The
|
||
|
implementation is free to decide on a per-request basis.
|
||
|
@type content: L{typing.BinaryIO}
|
||
|
|
||
|
@ivar cookies: The cookies that will be sent in the response.
|
||
|
@type cookies: L{list} of L{bytes}
|
||
|
|
||
|
@type requestHeaders: L{http_headers.Headers}
|
||
|
@ivar requestHeaders: All received HTTP request headers.
|
||
|
|
||
|
@type responseHeaders: L{http_headers.Headers}
|
||
|
@ivar responseHeaders: All HTTP response headers to be sent.
|
||
|
|
||
|
@ivar notifications: A L{list} of L{Deferred}s which are waiting for
|
||
|
notification that the response to this request has been finished
|
||
|
(successfully or with an error). Don't use this attribute directly,
|
||
|
instead use the L{Request.notifyFinish} method.
|
||
|
|
||
|
@ivar _disconnected: A flag which is C{False} until the connection over
|
||
|
which this request was received is closed and which is C{True} after
|
||
|
that.
|
||
|
@type _disconnected: L{bool}
|
||
|
|
||
|
@ivar _log: A logger instance for request related messages.
|
||
|
@type _log: L{twisted.logger.Logger}
|
||
|
"""
|
||
|
|
||
|
producer = None
|
||
|
finished = 0
|
||
|
code = OK
|
||
|
code_message = RESPONSES[OK]
|
||
|
method = b"(no method yet)"
|
||
|
clientproto = b"(no clientproto yet)"
|
||
|
uri = b"(no uri yet)"
|
||
|
startedWriting = 0
|
||
|
chunked = 0
|
||
|
sentLength = 0 # content-length of response, or total bytes sent via chunking
|
||
|
etag = None
|
||
|
lastModified = None
|
||
|
args = None
|
||
|
path = None
|
||
|
content = None
|
||
|
_forceSSL = 0
|
||
|
_disconnected = False
|
||
|
_log = Logger()
|
||
|
|
||
|
def __init__(self, channel: HTTPChannel, queued: object = _QUEUED_SENTINEL) -> None:
|
||
|
"""
|
||
|
@param channel: the channel we're connected to.
|
||
|
@param queued: (deprecated) are we in the request queue, or can we
|
||
|
start writing to the transport?
|
||
|
"""
|
||
|
self.notifications: List[Deferred[None]] = []
|
||
|
self.channel = channel
|
||
|
|
||
|
# Cache the client and server information, we'll need this
|
||
|
# later to be serialized and sent with the request so CGIs
|
||
|
# will work remotely
|
||
|
self.client = self.channel.getPeer()
|
||
|
self.host = self.channel.getHost()
|
||
|
|
||
|
self.requestHeaders: Headers = Headers()
|
||
|
self.received_cookies: Dict[bytes, bytes] = {}
|
||
|
self.responseHeaders: Headers = Headers()
|
||
|
self.cookies: List[bytes] = [] # outgoing cookies
|
||
|
self.transport = self.channel.transport
|
||
|
|
||
|
if queued is _QUEUED_SENTINEL:
|
||
|
queued = False
|
||
|
|
||
|
self.queued = queued
|
||
|
|
||
|
def _cleanup(self):
|
||
|
"""
|
||
|
Called when have finished responding and are no longer queued.
|
||
|
"""
|
||
|
if self.producer:
|
||
|
self._log.failure(
|
||
|
"",
|
||
|
Failure(RuntimeError(f"Producer was not unregistered for {self.uri}")),
|
||
|
)
|
||
|
self.unregisterProducer()
|
||
|
self.channel.requestDone(self)
|
||
|
del self.channel
|
||
|
if self.content is not None:
|
||
|
try:
|
||
|
self.content.close()
|
||
|
except OSError:
|
||
|
# win32 suckiness, no idea why it does this
|
||
|
pass
|
||
|
del self.content
|
||
|
for d in self.notifications:
|
||
|
d.callback(None)
|
||
|
self.notifications = []
|
||
|
|
||
|
# methods for channel - end users should not use these
|
||
|
|
||
|
@deprecated(Version("Twisted", 16, 3, 0))
|
||
|
def noLongerQueued(self):
|
||
|
"""
|
||
|
Notify the object that it is no longer queued.
|
||
|
|
||
|
We start writing whatever data we have to the transport, etc.
|
||
|
|
||
|
This method is not intended for users.
|
||
|
|
||
|
In 16.3 this method was changed to become a no-op, as L{Request}
|
||
|
objects are now never queued.
|
||
|
"""
|
||
|
pass
|
||
|
|
||
|
def gotLength(self, length):
|
||
|
"""
|
||
|
Called when HTTP channel got length of content in this request.
|
||
|
|
||
|
This method is not intended for users.
|
||
|
|
||
|
@param length: The length of the request body, as indicated by the
|
||
|
request headers. L{None} if the request headers do not indicate a
|
||
|
length.
|
||
|
"""
|
||
|
self.content = _getContentFile(length)
|
||
|
|
||
|
def parseCookies(self):
|
||
|
"""
|
||
|
Parse cookie headers.
|
||
|
|
||
|
This method is not intended for users.
|
||
|
"""
|
||
|
cookieheaders = self.requestHeaders.getRawHeaders(b"Cookie")
|
||
|
|
||
|
if cookieheaders is None:
|
||
|
return
|
||
|
|
||
|
for cookietxt in cookieheaders:
|
||
|
if cookietxt:
|
||
|
for cook in cookietxt.split(b";"):
|
||
|
cook = cook.lstrip()
|
||
|
try:
|
||
|
k, v = cook.split(b"=", 1)
|
||
|
self.received_cookies[k] = v
|
||
|
except ValueError:
|
||
|
pass
|
||
|
|
||
|
def handleContentChunk(self, data):
|
||
|
"""
|
||
|
Write a chunk of data.
|
||
|
|
||
|
This method is not intended for users.
|
||
|
"""
|
||
|
self.content.write(data)
|
||
|
|
||
|
def requestReceived(self, command, path, version):
|
||
|
"""
|
||
|
Called by channel when all data has been received.
|
||
|
|
||
|
This method is not intended for users.
|
||
|
|
||
|
@type command: C{bytes}
|
||
|
@param command: The HTTP verb of this request. This has the case
|
||
|
supplied by the client (eg, it maybe "get" rather than "GET").
|
||
|
|
||
|
@type path: C{bytes}
|
||
|
@param path: The URI of this request.
|
||
|
|
||
|
@type version: C{bytes}
|
||
|
@param version: The HTTP version of this request.
|
||
|
"""
|
||
|
clength = self.content.tell()
|
||
|
self.content.seek(0, 0)
|
||
|
self.args = {}
|
||
|
|
||
|
self.method, self.uri = command, path
|
||
|
self.clientproto = version
|
||
|
x = self.uri.split(b"?", 1)
|
||
|
|
||
|
if len(x) == 1:
|
||
|
self.path = self.uri
|
||
|
else:
|
||
|
self.path, argstring = x
|
||
|
self.args = parse_qs(argstring, 1)
|
||
|
|
||
|
# Argument processing
|
||
|
args = self.args
|
||
|
ctype = self.requestHeaders.getRawHeaders(b"Content-Type")
|
||
|
if ctype is not None:
|
||
|
ctype = ctype[0]
|
||
|
|
||
|
if self.method == b"POST" and ctype and clength:
|
||
|
mfd = b"multipart/form-data"
|
||
|
key = _parseContentType(ctype)
|
||
|
if key == b"application/x-www-form-urlencoded":
|
||
|
args.update(parse_qs(self.content.read(), 1))
|
||
|
elif key == mfd:
|
||
|
try:
|
||
|
self.content.seek(0)
|
||
|
content = self.content.read()
|
||
|
self.args.update(_getMultiPartArgs(content, ctype))
|
||
|
except _MultiPartParseException:
|
||
|
# It was a bad request.
|
||
|
self.channel._respondToBadRequestAndDisconnect()
|
||
|
return
|
||
|
|
||
|
self.content.seek(0, 0)
|
||
|
|
||
|
self.process()
|
||
|
|
||
|
def __repr__(self) -> str:
|
||
|
"""
|
||
|
Return a string description of the request including such information
|
||
|
as the request method and request URI.
|
||
|
|
||
|
@return: A string loosely describing this L{Request} object.
|
||
|
@rtype: L{str}
|
||
|
"""
|
||
|
return "<{} at 0x{:x} method={} uri={} clientproto={}>".format(
|
||
|
self.__class__.__name__,
|
||
|
id(self),
|
||
|
nativeString(self.method),
|
||
|
nativeString(self.uri),
|
||
|
nativeString(self.clientproto),
|
||
|
)
|
||
|
|
||
|
def process(self):
|
||
|
"""
|
||
|
Override in subclasses.
|
||
|
|
||
|
This method is not intended for users.
|
||
|
"""
|
||
|
pass
|
||
|
|
||
|
# consumer interface
|
||
|
|
||
|
def registerProducer(self, producer, streaming):
|
||
|
"""
|
||
|
Register a producer.
|
||
|
"""
|
||
|
if self.producer:
|
||
|
raise ValueError(
|
||
|
"registering producer %s before previous one (%s) was "
|
||
|
"unregistered" % (producer, self.producer)
|
||
|
)
|
||
|
|
||
|
self.streamingProducer = streaming
|
||
|
self.producer = producer
|
||
|
self.channel.registerProducer(producer, streaming)
|
||
|
|
||
|
def unregisterProducer(self):
|
||
|
"""
|
||
|
Unregister the producer.
|
||
|
"""
|
||
|
self.channel.unregisterProducer()
|
||
|
self.producer = None
|
||
|
|
||
|
# The following is the public interface that people should be
|
||
|
# writing to.
|
||
|
def getHeader(self, key: AnyStr) -> Optional[AnyStr]:
|
||
|
"""
|
||
|
Get an HTTP request header.
|
||
|
|
||
|
@type key: C{bytes} or C{str}
|
||
|
@param key: The name of the header to get the value of.
|
||
|
|
||
|
@rtype: C{bytes} or C{str} or L{None}
|
||
|
@return: The value of the specified header, or L{None} if that header
|
||
|
was not present in the request. The string type of the result
|
||
|
matches the type of C{key}.
|
||
|
"""
|
||
|
value = self.requestHeaders.getRawHeaders(key)
|
||
|
if value is not None:
|
||
|
return value[-1]
|
||
|
return None
|
||
|
|
||
|
def getCookie(self, key):
|
||
|
"""
|
||
|
Get a cookie that was sent from the network.
|
||
|
|
||
|
@type key: C{bytes}
|
||
|
@param key: The name of the cookie to get.
|
||
|
|
||
|
@rtype: C{bytes} or C{None}
|
||
|
@returns: The value of the specified cookie, or L{None} if that cookie
|
||
|
was not present in the request.
|
||
|
"""
|
||
|
return self.received_cookies.get(key)
|
||
|
|
||
|
def notifyFinish(self) -> Deferred[None]:
|
||
|
"""
|
||
|
Notify when the response to this request has finished.
|
||
|
|
||
|
@note: There are some caveats around the reliability of the delivery of
|
||
|
this notification.
|
||
|
|
||
|
1. If this L{Request}'s channel is paused, the notification
|
||
|
will not be delivered. This can happen in one of two ways;
|
||
|
either you can call C{request.transport.pauseProducing}
|
||
|
yourself, or,
|
||
|
|
||
|
2. In order to deliver this notification promptly when a client
|
||
|
disconnects, the reactor must continue reading from the
|
||
|
transport, so that it can tell when the underlying network
|
||
|
connection has gone away. Twisted Web will only keep
|
||
|
reading up until a finite (small) maximum buffer size before
|
||
|
it gives up and pauses the transport itself. If this
|
||
|
occurs, you will not discover that the connection has gone
|
||
|
away until a timeout fires or until the application attempts
|
||
|
to send some data via L{Request.write}.
|
||
|
|
||
|
3. It is theoretically impossible to distinguish between
|
||
|
successfully I{sending} a response and the peer successfully
|
||
|
I{receiving} it. There are several networking edge cases
|
||
|
where the L{Deferred}s returned by C{notifyFinish} will
|
||
|
indicate success, but the data will never be received.
|
||
|
There are also edge cases where the connection will appear
|
||
|
to fail, but in reality the response was delivered. As a
|
||
|
result, the information provided by the result of the
|
||
|
L{Deferred}s returned by this method should be treated as a
|
||
|
guess; do not make critical decisions in your applications
|
||
|
based upon it.
|
||
|
|
||
|
@rtype: L{Deferred}
|
||
|
@return: A L{Deferred} which will be triggered when the request is
|
||
|
finished -- with a L{None} value if the request finishes
|
||
|
successfully or with an error if the request is interrupted by an
|
||
|
error (for example, the client closing the connection prematurely).
|
||
|
"""
|
||
|
self.notifications.append(Deferred())
|
||
|
return self.notifications[-1]
|
||
|
|
||
|
def finish(self):
|
||
|
"""
|
||
|
Indicate that all response data has been written to this L{Request}.
|
||
|
"""
|
||
|
if self._disconnected:
|
||
|
raise RuntimeError(
|
||
|
"Request.finish called on a request after its connection was lost; "
|
||
|
"use Request.notifyFinish to keep track of this."
|
||
|
)
|
||
|
if self.finished:
|
||
|
warnings.warn("Warning! request.finish called twice.", stacklevel=2)
|
||
|
return
|
||
|
|
||
|
if not self.startedWriting:
|
||
|
# write headers
|
||
|
self.write(b"")
|
||
|
|
||
|
if self.chunked:
|
||
|
# write last chunk and closing CRLF
|
||
|
self.channel.write(b"0\r\n\r\n")
|
||
|
|
||
|
# log request
|
||
|
if hasattr(self.channel, "factory") and self.channel.factory is not None:
|
||
|
self.channel.factory.log(self)
|
||
|
|
||
|
self.finished = 1
|
||
|
if not self.queued:
|
||
|
self._cleanup()
|
||
|
|
||
|
def write(self, data):
|
||
|
"""
|
||
|
Write some data as a result of an HTTP request. The first
|
||
|
time this is called, it writes out response data.
|
||
|
|
||
|
@type data: C{bytes}
|
||
|
@param data: Some bytes to be sent as part of the response body.
|
||
|
"""
|
||
|
if self.finished:
|
||
|
raise RuntimeError(
|
||
|
"Request.write called on a request after Request.finish was called."
|
||
|
)
|
||
|
|
||
|
if self._disconnected:
|
||
|
# Don't attempt to write any data to a disconnected client.
|
||
|
# The RuntimeError exception will be thrown as usual when
|
||
|
# request.finish is called
|
||
|
return
|
||
|
|
||
|
if not self.startedWriting:
|
||
|
self.startedWriting = 1
|
||
|
version = self.clientproto
|
||
|
code = b"%d" % (self.code,)
|
||
|
reason = self.code_message
|
||
|
|
||
|
# if we don't have a content length, we send data in
|
||
|
# chunked mode, so that we can support pipelining in
|
||
|
# persistent connections.
|
||
|
if (
|
||
|
(version == b"HTTP/1.1")
|
||
|
and (self.responseHeaders.getRawHeaders(b"Content-Length") is None)
|
||
|
and self.method != b"HEAD"
|
||
|
and self.code not in NO_BODY_CODES
|
||
|
):
|
||
|
self.responseHeaders.setRawHeaders("Transfer-Encoding", [b"chunked"])
|
||
|
self.chunked = 1
|
||
|
|
||
|
if self.lastModified is not None:
|
||
|
if self.responseHeaders.hasHeader(b"Last-Modified"):
|
||
|
self._log.info(
|
||
|
"Warning: last-modified specified both in"
|
||
|
" header list and lastModified attribute."
|
||
|
)
|
||
|
else:
|
||
|
self.responseHeaders.setRawHeaders(
|
||
|
b"Last-Modified", [datetimeToString(self.lastModified)]
|
||
|
)
|
||
|
|
||
|
if self.etag is not None:
|
||
|
self.responseHeaders.setRawHeaders(b"ETag", [self.etag])
|
||
|
|
||
|
if self.cookies:
|
||
|
self.responseHeaders.setRawHeaders(b"Set-Cookie", self.cookies)
|
||
|
|
||
|
self.channel.writeHeaders(version, code, reason, self.responseHeaders)
|
||
|
|
||
|
# if this is a "HEAD" request, we shouldn't return any data
|
||
|
if self.method == b"HEAD":
|
||
|
self.write = lambda data: None
|
||
|
return
|
||
|
|
||
|
# for certain result codes, we should never return any data
|
||
|
if self.code in NO_BODY_CODES:
|
||
|
self.write = lambda data: None
|
||
|
return
|
||
|
|
||
|
self.sentLength = self.sentLength + len(data)
|
||
|
if data:
|
||
|
if self.chunked:
|
||
|
self.channel.writeSequence(toChunk(data))
|
||
|
else:
|
||
|
self.channel.write(data)
|
||
|
|
||
|
def addCookie(
|
||
|
self,
|
||
|
k,
|
||
|
v,
|
||
|
expires=None,
|
||
|
domain=None,
|
||
|
path=None,
|
||
|
max_age=None,
|
||
|
comment=None,
|
||
|
secure=None,
|
||
|
httpOnly=False,
|
||
|
sameSite=None,
|
||
|
):
|
||
|
"""
|
||
|
Set an outgoing HTTP cookie.
|
||
|
|
||
|
In general, you should consider using sessions instead of cookies, see
|
||
|
L{twisted.web.server.Request.getSession} and the
|
||
|
L{twisted.web.server.Session} class for details.
|
||
|
|
||
|
@param k: cookie name
|
||
|
@type k: L{bytes} or L{str}
|
||
|
|
||
|
@param v: cookie value
|
||
|
@type v: L{bytes} or L{str}
|
||
|
|
||
|
@param expires: cookie expire attribute value in
|
||
|
"Wdy, DD Mon YYYY HH:MM:SS GMT" format
|
||
|
@type expires: L{bytes} or L{str}
|
||
|
|
||
|
@param domain: cookie domain
|
||
|
@type domain: L{bytes} or L{str}
|
||
|
|
||
|
@param path: cookie path
|
||
|
@type path: L{bytes} or L{str}
|
||
|
|
||
|
@param max_age: cookie expiration in seconds from reception
|
||
|
@type max_age: L{bytes} or L{str}
|
||
|
|
||
|
@param comment: cookie comment
|
||
|
@type comment: L{bytes} or L{str}
|
||
|
|
||
|
@param secure: direct browser to send the cookie on encrypted
|
||
|
connections only
|
||
|
@type secure: L{bool}
|
||
|
|
||
|
@param httpOnly: direct browser not to expose cookies through channels
|
||
|
other than HTTP (and HTTPS) requests
|
||
|
@type httpOnly: L{bool}
|
||
|
|
||
|
@param sameSite: One of L{None} (default), C{'lax'} or C{'strict'}.
|
||
|
Direct browsers not to send this cookie on cross-origin requests.
|
||
|
Please see:
|
||
|
U{https://tools.ietf.org/html/draft-west-first-party-cookies-07}
|
||
|
@type sameSite: L{None}, L{bytes} or L{str}
|
||
|
|
||
|
@raise ValueError: If the value for C{sameSite} is not supported.
|
||
|
"""
|
||
|
|
||
|
def _ensureBytes(val):
|
||
|
"""
|
||
|
Ensure that C{val} is bytes, encoding using UTF-8 if
|
||
|
needed.
|
||
|
|
||
|
@param val: L{bytes} or L{str}
|
||
|
|
||
|
@return: L{bytes}
|
||
|
"""
|
||
|
if val is None:
|
||
|
# It's None, so we don't want to touch it
|
||
|
return val
|
||
|
|
||
|
if isinstance(val, bytes):
|
||
|
return val
|
||
|
else:
|
||
|
return val.encode("utf8")
|
||
|
|
||
|
def _sanitize(val):
|
||
|
r"""
|
||
|
Replace linear whitespace (C{\r}, C{\n}, C{\r\n}) and
|
||
|
semicolons C{;} in C{val} with a single space.
|
||
|
|
||
|
@param val: L{bytes}
|
||
|
@return: L{bytes}
|
||
|
"""
|
||
|
return _sanitizeLinearWhitespace(val).replace(b";", b" ")
|
||
|
|
||
|
cookie = _sanitize(_ensureBytes(k)) + b"=" + _sanitize(_ensureBytes(v))
|
||
|
if expires is not None:
|
||
|
cookie = cookie + b"; Expires=" + _sanitize(_ensureBytes(expires))
|
||
|
if domain is not None:
|
||
|
cookie = cookie + b"; Domain=" + _sanitize(_ensureBytes(domain))
|
||
|
if path is not None:
|
||
|
cookie = cookie + b"; Path=" + _sanitize(_ensureBytes(path))
|
||
|
if max_age is not None:
|
||
|
cookie = cookie + b"; Max-Age=" + _sanitize(_ensureBytes(max_age))
|
||
|
if comment is not None:
|
||
|
cookie = cookie + b"; Comment=" + _sanitize(_ensureBytes(comment))
|
||
|
if secure:
|
||
|
cookie = cookie + b"; Secure"
|
||
|
if httpOnly:
|
||
|
cookie = cookie + b"; HttpOnly"
|
||
|
if sameSite:
|
||
|
sameSite = _ensureBytes(sameSite).lower()
|
||
|
if sameSite not in [b"lax", b"strict"]:
|
||
|
raise ValueError("Invalid value for sameSite: " + repr(sameSite))
|
||
|
cookie += b"; SameSite=" + sameSite
|
||
|
self.cookies.append(cookie)
|
||
|
|
||
|
def setResponseCode(self, code: int, message: Optional[bytes] = None) -> None:
|
||
|
"""
|
||
|
Set the HTTP response code.
|
||
|
|
||
|
@type code: L{int}
|
||
|
@type message: L{bytes}
|
||
|
"""
|
||
|
self.code = code
|
||
|
if message is not None:
|
||
|
self.code_message = message
|
||
|
else:
|
||
|
self.code_message = RESPONSES.get(code, b"Unknown Status")
|
||
|
|
||
|
def setHeader(self, name, value):
|
||
|
"""
|
||
|
Set an HTTP response header. Overrides any previously set values for
|
||
|
this header.
|
||
|
|
||
|
@type name: L{bytes} or L{str}
|
||
|
@param name: The name of the header for which to set the value.
|
||
|
|
||
|
@type value: L{bytes} or L{str}
|
||
|
@param value: The value to set for the named header. A L{str} will be
|
||
|
UTF-8 encoded, which may not interoperable with other
|
||
|
implementations. Avoid passing non-ASCII characters if possible.
|
||
|
"""
|
||
|
self.responseHeaders.setRawHeaders(name, [value])
|
||
|
|
||
|
def redirect(self, url):
|
||
|
"""
|
||
|
Utility function that does a redirect.
|
||
|
|
||
|
Set the response code to L{FOUND} and the I{Location} header to the
|
||
|
given URL.
|
||
|
|
||
|
The request should have C{finish()} called after this.
|
||
|
|
||
|
@param url: I{Location} header value.
|
||
|
@type url: L{bytes} or L{str}
|
||
|
"""
|
||
|
self.setResponseCode(FOUND)
|
||
|
self.setHeader(b"Location", url)
|
||
|
|
||
|
def setLastModified(self, when):
|
||
|
"""
|
||
|
Set the C{Last-Modified} time for the response to this request.
|
||
|
|
||
|
If I am called more than once, I ignore attempts to set
|
||
|
Last-Modified earlier, only replacing the Last-Modified time
|
||
|
if it is to a later value.
|
||
|
|
||
|
If I am a conditional request, I may modify my response code
|
||
|
to L{NOT_MODIFIED} if appropriate for the time given.
|
||
|
|
||
|
@param when: The last time the resource being returned was
|
||
|
modified, in seconds since the epoch.
|
||
|
@type when: number
|
||
|
@return: If I am a I{If-Modified-Since} conditional request and
|
||
|
the time given is not newer than the condition, I return
|
||
|
L{http.CACHED<CACHED>} to indicate that you should write no
|
||
|
body. Otherwise, I return a false value.
|
||
|
"""
|
||
|
# time.time() may be a float, but the HTTP-date strings are
|
||
|
# only good for whole seconds.
|
||
|
when = int(math.ceil(when))
|
||
|
if (not self.lastModified) or (self.lastModified < when):
|
||
|
self.lastModified = when
|
||
|
|
||
|
modifiedSince = self.getHeader(b"If-Modified-Since")
|
||
|
if modifiedSince:
|
||
|
firstPart = modifiedSince.split(b";", 1)[0]
|
||
|
try:
|
||
|
modifiedSince = stringToDatetime(firstPart)
|
||
|
except ValueError:
|
||
|
return None
|
||
|
if modifiedSince >= self.lastModified:
|
||
|
self.setResponseCode(NOT_MODIFIED)
|
||
|
return CACHED
|
||
|
return None
|
||
|
|
||
|
def setETag(self, etag):
|
||
|
"""
|
||
|
Set an C{entity tag} for the outgoing response.
|
||
|
|
||
|
That's \"entity tag\" as in the HTTP/1.1 C{ETag} header, \"used
|
||
|
for comparing two or more entities from the same requested
|
||
|
resource.\"
|
||
|
|
||
|
If I am a conditional request, I may modify my response code
|
||
|
to L{NOT_MODIFIED} or L{PRECONDITION_FAILED}, if appropriate
|
||
|
for the tag given.
|
||
|
|
||
|
@param etag: The entity tag for the resource being returned.
|
||
|
@type etag: string
|
||
|
@return: If I am a C{If-None-Match} conditional request and
|
||
|
the tag matches one in the request, I return
|
||
|
L{http.CACHED<CACHED>} to indicate that you should write
|
||
|
no body. Otherwise, I return a false value.
|
||
|
"""
|
||
|
if etag:
|
||
|
self.etag = etag
|
||
|
|
||
|
tags = self.getHeader(b"If-None-Match")
|
||
|
if tags:
|
||
|
tags = tags.split()
|
||
|
if (etag in tags) or (b"*" in tags):
|
||
|
self.setResponseCode(
|
||
|
((self.method in (b"HEAD", b"GET")) and NOT_MODIFIED)
|
||
|
or PRECONDITION_FAILED
|
||
|
)
|
||
|
return CACHED
|
||
|
return None
|
||
|
|
||
|
def getAllHeaders(self):
|
||
|
"""
|
||
|
Return dictionary mapping the names of all received headers to the last
|
||
|
value received for each.
|
||
|
|
||
|
Since this method does not return all header information,
|
||
|
C{self.requestHeaders.getAllRawHeaders()} may be preferred.
|
||
|
"""
|
||
|
headers = {}
|
||
|
for k, v in self.requestHeaders.getAllRawHeaders():
|
||
|
headers[k.lower()] = v[-1]
|
||
|
return headers
|
||
|
|
||
|
def getRequestHostname(self):
|
||
|
"""
|
||
|
Get the hostname that the HTTP client passed in to the request.
|
||
|
|
||
|
@see: L{IRequest.getRequestHostname}
|
||
|
|
||
|
@returns: the requested hostname
|
||
|
|
||
|
@rtype: C{bytes}
|
||
|
"""
|
||
|
host = self.getHeader(b"Host")
|
||
|
if host is not None:
|
||
|
match = _hostHeaderExpression.match(host)
|
||
|
if match is not None:
|
||
|
return match.group("host")
|
||
|
return networkString(self.getHost().host)
|
||
|
|
||
|
def getHost(self):
|
||
|
"""
|
||
|
Get my originally requesting transport's host.
|
||
|
|
||
|
Don't rely on the 'transport' attribute, since Request objects may be
|
||
|
copied remotely. For information on this method's return value, see
|
||
|
L{twisted.internet.tcp.Port}.
|
||
|
"""
|
||
|
return self.host
|
||
|
|
||
|
def setHost(self, host, port, ssl=0):
|
||
|
"""
|
||
|
Change the host and port the request thinks it's using.
|
||
|
|
||
|
This method is useful for working with reverse HTTP proxies (e.g.
|
||
|
both Squid and Apache's mod_proxy can do this), when the address
|
||
|
the HTTP client is using is different than the one we're listening on.
|
||
|
|
||
|
For example, Apache may be listening on https://www.example.com/, and
|
||
|
then forwarding requests to http://localhost:8080/, but we don't want
|
||
|
HTML produced by Twisted to say b'http://localhost:8080/', they should
|
||
|
say b'https://www.example.com/', so we do::
|
||
|
|
||
|
request.setHost(b'www.example.com', 443, ssl=1)
|
||
|
|
||
|
@type host: C{bytes}
|
||
|
@param host: The value to which to change the host header.
|
||
|
|
||
|
@type ssl: C{bool}
|
||
|
@param ssl: A flag which, if C{True}, indicates that the request is
|
||
|
considered secure (if C{True}, L{isSecure} will return C{True}).
|
||
|
"""
|
||
|
self._forceSSL = ssl # set first so isSecure will work
|
||
|
if self.isSecure():
|
||
|
default = 443
|
||
|
else:
|
||
|
default = 80
|
||
|
if port == default:
|
||
|
hostHeader = host
|
||
|
else:
|
||
|
hostHeader = b"%b:%d" % (host, port)
|
||
|
self.requestHeaders.setRawHeaders(b"Host", [hostHeader])
|
||
|
self.host = address.IPv4Address("TCP", host, port)
|
||
|
|
||
|
@deprecated(Version("Twisted", 18, 4, 0), replacement="getClientAddress")
|
||
|
def getClientIP(self):
|
||
|
"""
|
||
|
Return the IP address of the client who submitted this request.
|
||
|
|
||
|
This method is B{deprecated}. Use L{getClientAddress} instead.
|
||
|
|
||
|
@returns: the client IP address
|
||
|
@rtype: C{str}
|
||
|
"""
|
||
|
if isinstance(self.client, (address.IPv4Address, address.IPv6Address)):
|
||
|
return self.client.host
|
||
|
else:
|
||
|
return None
|
||
|
|
||
|
def getClientAddress(self):
|
||
|
"""
|
||
|
Return the address of the client who submitted this request.
|
||
|
|
||
|
This may not be a network address (e.g., a server listening on
|
||
|
a UNIX domain socket will cause this to return
|
||
|
L{UNIXAddress}). Callers must check the type of the returned
|
||
|
address.
|
||
|
|
||
|
@since: 18.4
|
||
|
|
||
|
@return: the client's address.
|
||
|
@rtype: L{IAddress}
|
||
|
"""
|
||
|
return self.client
|
||
|
|
||
|
def isSecure(self):
|
||
|
"""
|
||
|
Return L{True} if this request is using a secure transport.
|
||
|
|
||
|
Normally this method returns L{True} if this request's L{HTTPChannel}
|
||
|
instance is using a transport that implements
|
||
|
L{interfaces.ISSLTransport}.
|
||
|
|
||
|
This will also return L{True} if L{Request.setHost} has been called
|
||
|
with C{ssl=True}.
|
||
|
|
||
|
@returns: L{True} if this request is secure
|
||
|
@rtype: C{bool}
|
||
|
"""
|
||
|
if self._forceSSL:
|
||
|
return True
|
||
|
channel = getattr(self, "channel", None)
|
||
|
if channel is None:
|
||
|
return False
|
||
|
return channel.isSecure()
|
||
|
|
||
|
def _authorize(self):
|
||
|
# Authorization, (mostly) per the RFC
|
||
|
try:
|
||
|
authh = self.getHeader(b"Authorization")
|
||
|
if not authh:
|
||
|
self.user = self.password = b""
|
||
|
return
|
||
|
bas, upw = authh.split()
|
||
|
if bas.lower() != b"basic":
|
||
|
raise ValueError()
|
||
|
upw = base64.b64decode(upw)
|
||
|
self.user, self.password = upw.split(b":", 1)
|
||
|
except (binascii.Error, ValueError):
|
||
|
self.user = self.password = b""
|
||
|
except BaseException:
|
||
|
self._log.failure("")
|
||
|
self.user = self.password = b""
|
||
|
|
||
|
def getUser(self):
|
||
|
"""
|
||
|
Return the HTTP user sent with this request, if any.
|
||
|
|
||
|
If no user was supplied, return the empty string.
|
||
|
|
||
|
@returns: the HTTP user, if any
|
||
|
@rtype: C{bytes}
|
||
|
"""
|
||
|
try:
|
||
|
return self.user
|
||
|
except BaseException:
|
||
|
pass
|
||
|
self._authorize()
|
||
|
return self.user
|
||
|
|
||
|
def getPassword(self):
|
||
|
"""
|
||
|
Return the HTTP password sent with this request, if any.
|
||
|
|
||
|
If no password was supplied, return the empty string.
|
||
|
|
||
|
@returns: the HTTP password, if any
|
||
|
@rtype: C{bytes}
|
||
|
"""
|
||
|
try:
|
||
|
return self.password
|
||
|
except BaseException:
|
||
|
pass
|
||
|
self._authorize()
|
||
|
return self.password
|
||
|
|
||
|
def connectionLost(self, reason):
|
||
|
"""
|
||
|
There is no longer a connection for this request to respond over.
|
||
|
Clean up anything which can't be useful anymore.
|
||
|
"""
|
||
|
self._disconnected = True
|
||
|
self.channel = None
|
||
|
if self.content is not None:
|
||
|
self.content.close()
|
||
|
for d in self.notifications:
|
||
|
d.errback(reason)
|
||
|
self.notifications = []
|
||
|
|
||
|
def loseConnection(self):
|
||
|
"""
|
||
|
Pass the loseConnection through to the underlying channel.
|
||
|
"""
|
||
|
if self.channel is not None:
|
||
|
self.channel.loseConnection()
|
||
|
|
||
|
def __eq__(self, other: object) -> bool:
|
||
|
"""
|
||
|
Determines if two requests are the same object.
|
||
|
|
||
|
@param other: Another object whose identity will be compared
|
||
|
to this instance's.
|
||
|
|
||
|
@return: L{True} when the two are the same object and L{False}
|
||
|
when not.
|
||
|
@rtype: L{bool}
|
||
|
"""
|
||
|
# When other is not an instance of request, return
|
||
|
# NotImplemented so that Python uses other.__eq__ to perform
|
||
|
# the comparison. This ensures that a Request proxy generated
|
||
|
# by proxyForInterface compares equal to an actual Request
|
||
|
# instanceby turning request != proxy into proxy != request.
|
||
|
if isinstance(other, Request):
|
||
|
return self is other
|
||
|
return NotImplemented
|
||
|
|
||
|
def __hash__(self):
|
||
|
"""
|
||
|
A C{Request} is hashable so that it can be used as a mapping key.
|
||
|
|
||
|
@return: A C{int} based on the instance's identity.
|
||
|
"""
|
||
|
return id(self)
|
||
|
|
||
|
|
||
|
class _DataLoss(Exception):
|
||
|
"""
|
||
|
L{_DataLoss} indicates that not all of a message body was received. This
|
||
|
is only one of several possible exceptions which may indicate that data
|
||
|
was lost. Because of this, it should not be checked for by
|
||
|
specifically; any unexpected exception should be treated as having
|
||
|
caused data loss.
|
||
|
"""
|
||
|
|
||
|
|
||
|
class PotentialDataLoss(Exception):
|
||
|
"""
|
||
|
L{PotentialDataLoss} may be raised by a transfer encoding decoder's
|
||
|
C{noMoreData} method to indicate that it cannot be determined if the
|
||
|
entire response body has been delivered. This only occurs when making
|
||
|
requests to HTTP servers which do not set I{Content-Length} or a
|
||
|
I{Transfer-Encoding} in the response because in this case the end of the
|
||
|
response is indicated by the connection being closed, an event which may
|
||
|
also be due to a transient network problem or other error.
|
||
|
"""
|
||
|
|
||
|
|
||
|
class _MalformedChunkedDataError(Exception):
|
||
|
"""
|
||
|
C{_ChunkedTransferDecoder} raises L{_MalformedChunkedDataError} from its
|
||
|
C{dataReceived} method when it encounters malformed data. This exception
|
||
|
indicates a client-side error. If this exception is raised, the connection
|
||
|
should be dropped with a 400 error.
|
||
|
"""
|
||
|
|
||
|
|
||
|
class _IdentityTransferDecoder:
|
||
|
"""
|
||
|
Protocol for accumulating bytes up to a specified length. This handles the
|
||
|
case where no I{Transfer-Encoding} is specified.
|
||
|
|
||
|
@ivar contentLength: Counter keeping track of how many more bytes there are
|
||
|
to receive.
|
||
|
|
||
|
@ivar dataCallback: A one-argument callable which will be invoked each
|
||
|
time application data is received.
|
||
|
|
||
|
@ivar finishCallback: A one-argument callable which will be invoked when
|
||
|
the terminal chunk is received. It will be invoked with all bytes
|
||
|
which were delivered to this protocol which came after the terminal
|
||
|
chunk.
|
||
|
"""
|
||
|
|
||
|
__slots__ = ["contentLength", "dataCallback", "finishCallback"]
|
||
|
|
||
|
def __init__(self, contentLength, dataCallback, finishCallback):
|
||
|
self.contentLength = contentLength
|
||
|
self.dataCallback = dataCallback
|
||
|
self.finishCallback = finishCallback
|
||
|
|
||
|
def dataReceived(self, data):
|
||
|
"""
|
||
|
Interpret the next chunk of bytes received. Either deliver them to the
|
||
|
data callback or invoke the finish callback if enough bytes have been
|
||
|
received.
|
||
|
|
||
|
@raise RuntimeError: If the finish callback has already been invoked
|
||
|
during a previous call to this methood.
|
||
|
"""
|
||
|
if self.dataCallback is None:
|
||
|
raise RuntimeError(
|
||
|
"_IdentityTransferDecoder cannot decode data after finishing"
|
||
|
)
|
||
|
|
||
|
if self.contentLength is None:
|
||
|
self.dataCallback(data)
|
||
|
elif len(data) < self.contentLength:
|
||
|
self.contentLength -= len(data)
|
||
|
self.dataCallback(data)
|
||
|
else:
|
||
|
# Make the state consistent before invoking any code belonging to
|
||
|
# anyone else in case noMoreData ends up being called beneath this
|
||
|
# stack frame.
|
||
|
contentLength = self.contentLength
|
||
|
dataCallback = self.dataCallback
|
||
|
finishCallback = self.finishCallback
|
||
|
self.dataCallback = self.finishCallback = None
|
||
|
self.contentLength = 0
|
||
|
|
||
|
dataCallback(data[:contentLength])
|
||
|
finishCallback(data[contentLength:])
|
||
|
|
||
|
def noMoreData(self):
|
||
|
"""
|
||
|
All data which will be delivered to this decoder has been. Check to
|
||
|
make sure as much data as was expected has been received.
|
||
|
|
||
|
@raise PotentialDataLoss: If the content length is unknown.
|
||
|
@raise _DataLoss: If the content length is known and fewer than that
|
||
|
many bytes have been delivered.
|
||
|
|
||
|
@return: L{None}
|
||
|
"""
|
||
|
finishCallback = self.finishCallback
|
||
|
self.dataCallback = self.finishCallback = None
|
||
|
if self.contentLength is None:
|
||
|
finishCallback(b"")
|
||
|
raise PotentialDataLoss()
|
||
|
elif self.contentLength != 0:
|
||
|
raise _DataLoss()
|
||
|
|
||
|
|
||
|
maxChunkSizeLineLength = 1024
|
||
|
|
||
|
_chunkExtChars = (
|
||
|
b"\t !\"#$%&'()*+,-./0123456789:;<=>?@"
|
||
|
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`"
|
||
|
b"abcdefghijklmnopqrstuvwxyz{|}~"
|
||
|
b"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
|
||
|
b"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
|
||
|
b"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
|
||
|
b"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
|
||
|
b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
|
||
|
b"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
|
||
|
b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
|
||
|
b"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
|
||
|
)
|
||
|
"""
|
||
|
Characters that are valid in a chunk extension.
|
||
|
|
||
|
See RFC 7230 section 4.1.1::
|
||
|
|
||
|
chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
|
||
|
|
||
|
chunk-ext-name = token
|
||
|
chunk-ext-val = token / quoted-string
|
||
|
|
||
|
And section 3.2.6::
|
||
|
|
||
|
token = 1*tchar
|
||
|
|
||
|
tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
|
||
|
/ "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
|
||
|
/ DIGIT / ALPHA
|
||
|
; any VCHAR, except delimiters
|
||
|
|
||
|
quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
|
||
|
qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
|
||
|
obs-text = %x80-FF
|
||
|
|
||
|
We don't check if chunk extensions are well-formed beyond validating that they
|
||
|
don't contain characters outside this range.
|
||
|
"""
|
||
|
|
||
|
|
||
|
class _ChunkedTransferDecoder:
|
||
|
"""
|
||
|
Protocol for decoding I{chunked} Transfer-Encoding, as defined by RFC 7230,
|
||
|
section 4.1. This protocol can interpret the contents of a request or
|
||
|
response body which uses the I{chunked} Transfer-Encoding. It cannot
|
||
|
interpret any of the rest of the HTTP protocol.
|
||
|
|
||
|
It may make sense for _ChunkedTransferDecoder to be an actual IProtocol
|
||
|
implementation. Currently, the only user of this class will only ever
|
||
|
call dataReceived on it. However, it might be an improvement if the
|
||
|
user could connect this to a transport and deliver connection lost
|
||
|
notification. This way, `dataCallback` becomes `self.transport.write`
|
||
|
and perhaps `finishCallback` becomes `self.transport.loseConnection()`
|
||
|
(although I'm not sure where the extra data goes in that case). This
|
||
|
could also allow this object to indicate to the receiver of data that
|
||
|
the stream was not completely received, an error case which should be
|
||
|
noticed. -exarkun
|
||
|
|
||
|
@ivar dataCallback: A one-argument callable which will be invoked each
|
||
|
time application data is received. This callback is not reentrant.
|
||
|
|
||
|
@ivar finishCallback: A one-argument callable which will be invoked when
|
||
|
the terminal chunk is received. It will be invoked with all bytes
|
||
|
which were delivered to this protocol which came after the terminal
|
||
|
chunk.
|
||
|
|
||
|
@ivar length: Counter keeping track of how many more bytes in a chunk there
|
||
|
are to receive.
|
||
|
|
||
|
@ivar state: One of C{'CHUNK_LENGTH'}, C{'CRLF'}, C{'TRAILER'},
|
||
|
C{'BODY'}, or C{'FINISHED'}. For C{'CHUNK_LENGTH'}, data for the
|
||
|
chunk length line is currently being read. For C{'CRLF'}, the CR LF
|
||
|
pair which follows each chunk is being read. For C{'TRAILER'}, the CR
|
||
|
LF pair which follows the terminal 0-length chunk is currently being
|
||
|
read. For C{'BODY'}, the contents of a chunk are being read. For
|
||
|
C{'FINISHED'}, the last chunk has been completely read and no more
|
||
|
input is valid.
|
||
|
|
||
|
@ivar _buffer: Accumulated received data for the current state. At each
|
||
|
state transition this is truncated at the front so that index 0 is
|
||
|
where the next state shall begin.
|
||
|
|
||
|
@ivar _start: While in the C{'CHUNK_LENGTH'} and C{'TRAILER'} states,
|
||
|
tracks the index into the buffer at which search for CRLF should resume.
|
||
|
Resuming the search at this position avoids doing quadratic work if the
|
||
|
chunk length line arrives over many calls to C{dataReceived}.
|
||
|
|
||
|
@ivar _trailerHeaders: Accumulates raw/unparsed trailer headers.
|
||
|
See https://github.com/twisted/twisted/issues/12014
|
||
|
|
||
|
@ivar _maxTrailerHeadersSize: Maximum bytes for trailer header from the
|
||
|
response.
|
||
|
@type _maxTrailerHeadersSize: C{int}
|
||
|
|
||
|
@ivar _receivedTrailerHeadersSize: Bytes received so far for the tailer headers.
|
||
|
@type _receivedTrailerHeadersSize: C{int}
|
||
|
"""
|
||
|
|
||
|
state = "CHUNK_LENGTH"
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
dataCallback: Callable[[bytes], None],
|
||
|
finishCallback: Callable[[bytes], None],
|
||
|
) -> None:
|
||
|
self.dataCallback = dataCallback
|
||
|
self.finishCallback = finishCallback
|
||
|
self._buffer = bytearray()
|
||
|
self._start = 0
|
||
|
self._trailerHeaders: List[bytearray] = []
|
||
|
self._maxTrailerHeadersSize = 2**16
|
||
|
self._receivedTrailerHeadersSize = 0
|
||
|
|
||
|
def _dataReceived_CHUNK_LENGTH(self) -> bool:
|
||
|
"""
|
||
|
Read the chunk size line, ignoring any extensions.
|
||
|
|
||
|
@returns: C{True} once the line has been read and removed from
|
||
|
C{self._buffer}. C{False} when more data is required.
|
||
|
|
||
|
@raises _MalformedChunkedDataError: when the chunk size cannot be
|
||
|
decoded or the length of the line exceeds L{maxChunkSizeLineLength}.
|
||
|
"""
|
||
|
eolIndex = self._buffer.find(b"\r\n", self._start)
|
||
|
|
||
|
if eolIndex >= maxChunkSizeLineLength or (
|
||
|
eolIndex == -1 and len(self._buffer) > maxChunkSizeLineLength
|
||
|
):
|
||
|
raise _MalformedChunkedDataError(
|
||
|
"Chunk size line exceeds maximum of {} bytes.".format(
|
||
|
maxChunkSizeLineLength
|
||
|
)
|
||
|
)
|
||
|
|
||
|
if eolIndex == -1:
|
||
|
# Restart the search upon receipt of more data at the start of the
|
||
|
# new data, minus one in case the last character of the buffer is
|
||
|
# CR.
|
||
|
self._start = len(self._buffer) - 1
|
||
|
return False
|
||
|
|
||
|
endOfLengthIndex = self._buffer.find(b";", 0, eolIndex)
|
||
|
if endOfLengthIndex == -1:
|
||
|
endOfLengthIndex = eolIndex
|
||
|
rawLength = self._buffer[0:endOfLengthIndex]
|
||
|
try:
|
||
|
length = _hexint(rawLength)
|
||
|
except ValueError:
|
||
|
raise _MalformedChunkedDataError("Chunk-size must be an integer.")
|
||
|
|
||
|
ext = self._buffer[endOfLengthIndex + 1 : eolIndex]
|
||
|
if ext and ext.translate(None, _chunkExtChars) != b"":
|
||
|
raise _MalformedChunkedDataError(
|
||
|
f"Invalid characters in chunk extensions: {ext!r}."
|
||
|
)
|
||
|
|
||
|
if length == 0:
|
||
|
self.state = "TRAILER"
|
||
|
else:
|
||
|
self.state = "BODY"
|
||
|
|
||
|
self.length = length
|
||
|
del self._buffer[0 : eolIndex + 2]
|
||
|
self._start = 0
|
||
|
return True
|
||
|
|
||
|
def _dataReceived_CRLF(self) -> bool:
|
||
|
"""
|
||
|
Await the carriage return and line feed characters that are the end of
|
||
|
chunk marker that follow the chunk data.
|
||
|
|
||
|
@returns: C{True} when the CRLF have been read, otherwise C{False}.
|
||
|
|
||
|
@raises _MalformedChunkedDataError: when anything other than CRLF are
|
||
|
received.
|
||
|
"""
|
||
|
if len(self._buffer) < 2:
|
||
|
return False
|
||
|
|
||
|
if not self._buffer.startswith(b"\r\n"):
|
||
|
raise _MalformedChunkedDataError("Chunk did not end with CRLF")
|
||
|
|
||
|
self.state = "CHUNK_LENGTH"
|
||
|
del self._buffer[0:2]
|
||
|
return True
|
||
|
|
||
|
def _dataReceived_TRAILER(self) -> bool:
|
||
|
"""
|
||
|
Collect trailer headers if received and finish at the terminal zero-length
|
||
|
chunk. Then invoke C{finishCallback} and switch to state C{'FINISHED'}.
|
||
|
|
||
|
@returns: C{False}, as there is either insufficient data to continue,
|
||
|
or no data remains.
|
||
|
"""
|
||
|
eolIndex = self._buffer.find(b"\r\n", self._start)
|
||
|
|
||
|
if eolIndex == -1:
|
||
|
# Still no end of network line marker found.
|
||
|
#
|
||
|
# Check if we've run up against the trailer size limit: if the next
|
||
|
# read contains the terminating CRLF then we'll have this many bytes
|
||
|
# of trailers (including the CRLFs).
|
||
|
minTrailerSize = (
|
||
|
self._receivedTrailerHeadersSize
|
||
|
+ len(self._buffer)
|
||
|
+ (1 if self._buffer.endswith(b"\r") else 2)
|
||
|
)
|
||
|
if minTrailerSize > self._maxTrailerHeadersSize:
|
||
|
raise _MalformedChunkedDataError("Trailer headers data is too long.")
|
||
|
# Continue processing more data.
|
||
|
return False
|
||
|
|
||
|
if eolIndex > 0:
|
||
|
# A trailer header was detected.
|
||
|
self._trailerHeaders.append(self._buffer[0:eolIndex])
|
||
|
del self._buffer[0 : eolIndex + 2]
|
||
|
self._start = 0
|
||
|
self._receivedTrailerHeadersSize += eolIndex + 2
|
||
|
if self._receivedTrailerHeadersSize > self._maxTrailerHeadersSize:
|
||
|
raise _MalformedChunkedDataError("Trailer headers data is too long.")
|
||
|
return True
|
||
|
|
||
|
# eolIndex in this part of code is equal to 0
|
||
|
|
||
|
data = memoryview(self._buffer)[2:].tobytes()
|
||
|
|
||
|
del self._buffer[:]
|
||
|
self.state = "FINISHED"
|
||
|
self.finishCallback(data)
|
||
|
return False
|
||
|
|
||
|
def _dataReceived_BODY(self) -> bool:
|
||
|
"""
|
||
|
Deliver any available chunk data to the C{dataCallback}. When all the
|
||
|
remaining data for the chunk arrives, switch to state C{'CRLF'}.
|
||
|
|
||
|
@returns: C{True} to continue processing of any buffered data.
|
||
|
"""
|
||
|
if len(self._buffer) >= self.length:
|
||
|
chunk = memoryview(self._buffer)[: self.length].tobytes()
|
||
|
del self._buffer[: self.length]
|
||
|
self.state = "CRLF"
|
||
|
self.dataCallback(chunk)
|
||
|
else:
|
||
|
chunk = bytes(self._buffer)
|
||
|
self.length -= len(chunk)
|
||
|
del self._buffer[:]
|
||
|
self.dataCallback(chunk)
|
||
|
return True
|
||
|
|
||
|
def _dataReceived_FINISHED(self) -> bool:
|
||
|
"""
|
||
|
Once C{finishCallback} has been invoked receipt of additional data
|
||
|
raises L{RuntimeError} because it represents a programming error in
|
||
|
the caller.
|
||
|
"""
|
||
|
raise RuntimeError(
|
||
|
"_ChunkedTransferDecoder.dataReceived called after last "
|
||
|
"chunk was processed"
|
||
|
)
|
||
|
|
||
|
def dataReceived(self, data: bytes) -> None:
|
||
|
"""
|
||
|
Interpret data from a request or response body which uses the
|
||
|
I{chunked} Transfer-Encoding.
|
||
|
"""
|
||
|
self._buffer += data
|
||
|
goOn = True
|
||
|
while goOn and self._buffer:
|
||
|
goOn = getattr(self, "_dataReceived_" + self.state)()
|
||
|
|
||
|
def noMoreData(self) -> None:
|
||
|
"""
|
||
|
Verify that all data has been received. If it has not been, raise
|
||
|
L{_DataLoss}.
|
||
|
"""
|
||
|
if self.state != "FINISHED":
|
||
|
raise _DataLoss(
|
||
|
"Chunked decoder in %r state, still expecting more data to "
|
||
|
"get to 'FINISHED' state." % (self.state,)
|
||
|
)
|
||
|
|
||
|
|
||
|
@implementer(interfaces.IPushProducer)
|
||
|
class _NoPushProducer:
|
||
|
"""
|
||
|
A no-op version of L{interfaces.IPushProducer}, used to abstract over the
|
||
|
possibility that a L{HTTPChannel} transport does not provide
|
||
|
L{IPushProducer}.
|
||
|
"""
|
||
|
|
||
|
def pauseProducing(self):
|
||
|
"""
|
||
|
Pause producing data.
|
||
|
|
||
|
Tells a producer that it has produced too much data to process for
|
||
|
the time being, and to stop until resumeProducing() is called.
|
||
|
"""
|
||
|
|
||
|
def resumeProducing(self):
|
||
|
"""
|
||
|
Resume producing data.
|
||
|
|
||
|
This tells a producer to re-add itself to the main loop and produce
|
||
|
more data for its consumer.
|
||
|
"""
|
||
|
|
||
|
def registerProducer(self, producer, streaming):
|
||
|
"""
|
||
|
Register to receive data from a producer.
|
||
|
|
||
|
@param producer: The producer to register.
|
||
|
@param streaming: Whether this is a streaming producer or not.
|
||
|
"""
|
||
|
|
||
|
def unregisterProducer(self):
|
||
|
"""
|
||
|
Stop consuming data from a producer, without disconnecting.
|
||
|
"""
|
||
|
|
||
|
def stopProducing(self):
|
||
|
"""
|
||
|
IProducer.stopProducing
|
||
|
"""
|
||
|
|
||
|
|
||
|
@implementer(interfaces.ITransport, interfaces.IPushProducer, interfaces.IConsumer)
|
||
|
class HTTPChannel(basic.LineReceiver, policies.TimeoutMixin):
|
||
|
"""
|
||
|
A receiver for HTTP requests.
|
||
|
|
||
|
The L{HTTPChannel} provides L{interfaces.ITransport} and
|
||
|
L{interfaces.IConsumer} to the L{Request} objects it creates. It also
|
||
|
implements L{interfaces.IPushProducer} to C{self.transport}, allowing the
|
||
|
transport to pause it.
|
||
|
|
||
|
@ivar MAX_LENGTH: Maximum length for initial request line and each line
|
||
|
from the header.
|
||
|
|
||
|
@ivar _transferDecoder: L{None} or a decoder instance if the request body
|
||
|
uses the I{chunked} Transfer-Encoding.
|
||
|
@type _transferDecoder: L{_ChunkedTransferDecoder}
|
||
|
|
||
|
@ivar maxHeaders: Maximum number of headers allowed per request.
|
||
|
@type maxHeaders: C{int}
|
||
|
|
||
|
@ivar totalHeadersSize: Maximum bytes for request line plus all headers
|
||
|
from the request.
|
||
|
@type totalHeadersSize: C{int}
|
||
|
|
||
|
@ivar _receivedHeaderSize: Bytes received so far for the header.
|
||
|
@type _receivedHeaderSize: C{int}
|
||
|
|
||
|
@ivar _handlingRequest: Whether a request is currently being processed.
|
||
|
@type _handlingRequest: L{bool}
|
||
|
|
||
|
@ivar _dataBuffer: Any data that has been received from the connection
|
||
|
while processing an outstanding request.
|
||
|
@type _dataBuffer: L{list} of L{bytes}
|
||
|
|
||
|
@ivar _networkProducer: Either the transport, if it provides
|
||
|
L{interfaces.IPushProducer}, or a null implementation of
|
||
|
L{interfaces.IPushProducer}. Used to attempt to prevent the transport
|
||
|
from producing excess data when we're responding to a request.
|
||
|
@type _networkProducer: L{interfaces.IPushProducer}
|
||
|
|
||
|
@ivar _requestProducer: If the L{Request} object or anything it calls
|
||
|
registers itself as an L{interfaces.IProducer}, it will be stored here.
|
||
|
This is used to create a producing pipeline: pause/resume producing
|
||
|
methods will be propagated from the C{transport}, through the
|
||
|
L{HTTPChannel} instance, to the c{_requestProducer}.
|
||
|
|
||
|
The reason we proxy through the producing methods rather than the old
|
||
|
behaviour (where we literally just set the L{Request} object as the
|
||
|
producer on the transport) is because we want to be able to exert
|
||
|
backpressure on the client to prevent it from sending in arbitrarily
|
||
|
many requests without ever reading responses. Essentially, if the
|
||
|
client never reads our responses we will eventually stop reading its
|
||
|
requests.
|
||
|
|
||
|
@type _requestProducer: L{interfaces.IPushProducer}
|
||
|
|
||
|
@ivar _requestProducerStreaming: A boolean that tracks whether the producer
|
||
|
on the L{Request} side of this channel has registered itself as a
|
||
|
L{interfaces.IPushProducer} or an L{interfaces.IPullProducer}.
|
||
|
@type _requestProducerStreaming: L{bool} or L{None}
|
||
|
|
||
|
@ivar _waitingForTransport: A boolean that tracks whether the transport has
|
||
|
asked us to stop producing. This is used to keep track of what we're
|
||
|
waiting for: if the transport has asked us to stop producing then we
|
||
|
don't want to unpause the transport until it asks us to produce again.
|
||
|
@type _waitingForTransport: L{bool}
|
||
|
|
||
|
@ivar abortTimeout: The number of seconds to wait after we attempt to shut
|
||
|
the transport down cleanly to give up and forcibly terminate it. This
|
||
|
is only used when we time a connection out, to prevent errors causing
|
||
|
the FD to get leaked. If this is L{None}, we will wait forever.
|
||
|
@type abortTimeout: L{int}
|
||
|
|
||
|
@ivar _abortingCall: The L{twisted.internet.base.DelayedCall} that will be
|
||
|
used to forcibly close the transport if it doesn't close cleanly.
|
||
|
@type _abortingCall: L{twisted.internet.base.DelayedCall}
|
||
|
|
||
|
@ivar _optimisticEagerReadSize: When a resource takes a long time to answer
|
||
|
a request (via L{twisted.web.server.NOT_DONE_YET}, hopefully one day by
|
||
|
a L{Deferred}), we would like to be able to let that resource know
|
||
|
about the underlying transport disappearing as promptly as possible,
|
||
|
via L{Request.notifyFinish}, and therefore via
|
||
|
C{self.requests[...].connectionLost()} on this L{HTTPChannel}.
|
||
|
|
||
|
However, in order to simplify application logic, we implement
|
||
|
head-of-line blocking, and do not relay pipelined requests to the
|
||
|
application until the previous request has been answered. This means
|
||
|
that said application cannot dispose of any entity-body that comes in
|
||
|
from those subsequent requests, which may be arbitrarily large, and it
|
||
|
may need to be buffered in memory.
|
||
|
|
||
|
To implement this tradeoff between prompt notification when possible
|
||
|
(in the most frequent case of non-pipelined requests) and correct
|
||
|
behavior when not (say, if a client sends a very long-running GET
|
||
|
request followed by a PUT request with a very large body) we will
|
||
|
continue reading pipelined requests into C{self._dataBuffer} up to a
|
||
|
given limit.
|
||
|
|
||
|
C{_optimisticEagerReadSize} is the number of bytes we will accept from
|
||
|
the client and buffer before pausing the transport.
|
||
|
|
||
|
This behavior has been in place since Twisted 17.9.0 .
|
||
|
|
||
|
@type _optimisticEagerReadSize: L{int}
|
||
|
"""
|
||
|
|
||
|
maxHeaders = 500
|
||
|
totalHeadersSize = 16384
|
||
|
abortTimeout = 15
|
||
|
|
||
|
length: Optional[int] = 0
|
||
|
persistent = 1
|
||
|
__header = b""
|
||
|
__first_line = 1
|
||
|
__content = None
|
||
|
|
||
|
# set in instances or subclasses
|
||
|
requestFactory = Request
|
||
|
|
||
|
_savedTimeOut = None
|
||
|
_receivedHeaderCount = 0
|
||
|
_receivedHeaderSize = 0
|
||
|
_requestProducer = None
|
||
|
_requestProducerStreaming = None
|
||
|
_waitingForTransport = False
|
||
|
_abortingCall = None
|
||
|
_optimisticEagerReadSize = 0x4000
|
||
|
_log = Logger()
|
||
|
|
||
|
def __init__(self):
|
||
|
# the request queue
|
||
|
self.requests = []
|
||
|
self._handlingRequest = False
|
||
|
self._dataBuffer = []
|
||
|
self._transferDecoder = None
|
||
|
|
||
|
def connectionMade(self):
|
||
|
if ITCPTransport.providedBy(self.transport):
|
||
|
self.transport.setTcpNoDelay(True)
|
||
|
self.setTimeout(self.timeOut)
|
||
|
self._networkProducer = interfaces.IPushProducer(
|
||
|
self.transport, _NoPushProducer()
|
||
|
)
|
||
|
self._networkProducer.registerProducer(self, True)
|
||
|
|
||
|
def dataReceived(self, data):
|
||
|
self.resetTimeout()
|
||
|
basic.LineReceiver.dataReceived(self, data)
|
||
|
|
||
|
def lineReceived(self, line):
|
||
|
"""
|
||
|
Called for each line from request until the end of headers when
|
||
|
it enters binary mode.
|
||
|
"""
|
||
|
self._receivedHeaderSize += len(line)
|
||
|
if self._receivedHeaderSize > self.totalHeadersSize:
|
||
|
self._respondToBadRequestAndDisconnect()
|
||
|
return
|
||
|
|
||
|
if self.__first_line:
|
||
|
# if this connection is not persistent, drop any data which
|
||
|
# the client (illegally) sent after the last request.
|
||
|
if not self.persistent:
|
||
|
self.dataReceived = self.lineReceived = lambda *args: None
|
||
|
return
|
||
|
|
||
|
# IE sends an extraneous empty line (\r\n) after a POST request;
|
||
|
# eat up such a line, but only ONCE
|
||
|
if not line and self.__first_line == 1:
|
||
|
self.__first_line = 2
|
||
|
return
|
||
|
|
||
|
# create a new Request object
|
||
|
if INonQueuedRequestFactory.providedBy(self.requestFactory):
|
||
|
request = self.requestFactory(self)
|
||
|
else:
|
||
|
request = self.requestFactory(self, len(self.requests))
|
||
|
self.requests.append(request)
|
||
|
|
||
|
self.__first_line = 0
|
||
|
|
||
|
try:
|
||
|
command, request, version = _parseRequestLine(line)
|
||
|
except ValueError:
|
||
|
self._respondToBadRequestAndDisconnect()
|
||
|
return
|
||
|
|
||
|
self._command = command
|
||
|
self._path = request
|
||
|
self._version = version
|
||
|
elif line == b"":
|
||
|
# End of headers.
|
||
|
if self.__header:
|
||
|
ok = self.headerReceived(self.__header)
|
||
|
# If the last header we got is invalid, we MUST NOT proceed
|
||
|
# with processing. We'll have sent a 400 anyway, so just stop.
|
||
|
if not ok:
|
||
|
return
|
||
|
self.__header = b""
|
||
|
self.allHeadersReceived()
|
||
|
if self.length == 0:
|
||
|
self.allContentReceived()
|
||
|
else:
|
||
|
self.setRawMode()
|
||
|
elif line[0] in b" \t":
|
||
|
# Continuation of a multi line header.
|
||
|
self.__header += b" " + line.lstrip(b" \t")
|
||
|
# Regular header line.
|
||
|
# Processing of header line is delayed to allow accumulating multi
|
||
|
# line headers.
|
||
|
else:
|
||
|
if self.__header:
|
||
|
self.headerReceived(self.__header)
|
||
|
self.__header = line
|
||
|
|
||
|
def _finishRequestBody(self, data):
|
||
|
self._dataBuffer.append(data)
|
||
|
self.allContentReceived()
|
||
|
|
||
|
def _failChooseTransferDecoder(self) -> bool:
|
||
|
"""
|
||
|
Utility to indicate failure to choose a decoder.
|
||
|
"""
|
||
|
self._respondToBadRequestAndDisconnect()
|
||
|
self.length = None
|
||
|
return False
|
||
|
|
||
|
def _maybeChooseTransferDecoder(self, header, data):
|
||
|
"""
|
||
|
If the provided header is C{content-length} or
|
||
|
C{transfer-encoding}, choose the appropriate decoder if any.
|
||
|
|
||
|
Returns L{True} if the request can proceed and L{False} if not.
|
||
|
"""
|
||
|
# Can this header determine the length?
|
||
|
if header == b"Content-Length":
|
||
|
if not data.isdigit():
|
||
|
return self._failChooseTransferDecoder()
|
||
|
length = int(data)
|
||
|
newTransferDecoder = _IdentityTransferDecoder(
|
||
|
length, self.requests[-1].handleContentChunk, self._finishRequestBody
|
||
|
)
|
||
|
elif header == b"Transfer-Encoding":
|
||
|
# XXX Rather poorly tested code block, apparently only exercised by
|
||
|
# test_chunkedEncoding
|
||
|
if data.lower() == b"chunked":
|
||
|
length = None
|
||
|
newTransferDecoder = _ChunkedTransferDecoder(
|
||
|
self.requests[-1].handleContentChunk, self._finishRequestBody
|
||
|
)
|
||
|
elif data.lower() == b"identity":
|
||
|
return True
|
||
|
else:
|
||
|
return self._failChooseTransferDecoder()
|
||
|
else:
|
||
|
# It's not a length related header, so exit
|
||
|
return True
|
||
|
|
||
|
if self._transferDecoder is not None:
|
||
|
return self._failChooseTransferDecoder()
|
||
|
else:
|
||
|
self.length = length
|
||
|
self._transferDecoder = newTransferDecoder
|
||
|
return True
|
||
|
|
||
|
def headerReceived(self, line):
|
||
|
"""
|
||
|
Do pre-processing (for Content-Length) and store this header away.
|
||
|
Enforce the per-request header limit.
|
||
|
|
||
|
@type line: C{bytes}
|
||
|
@param line: A line from the header section of a request, excluding the
|
||
|
line delimiter.
|
||
|
|
||
|
@return: A flag indicating whether the header was valid.
|
||
|
@rtype: L{bool}
|
||
|
"""
|
||
|
try:
|
||
|
header, data = line.split(b":", 1)
|
||
|
except ValueError:
|
||
|
self._respondToBadRequestAndDisconnect()
|
||
|
return False
|
||
|
|
||
|
# Canonicalize the header name.
|
||
|
try:
|
||
|
header = _nameEncoder.encode(header)
|
||
|
except InvalidHeaderName:
|
||
|
self._respondToBadRequestAndDisconnect()
|
||
|
return False
|
||
|
|
||
|
data = data.strip(b" \t")
|
||
|
if b"\x00" in data:
|
||
|
self._respondToBadRequestAndDisconnect()
|
||
|
return False
|
||
|
|
||
|
if not self._maybeChooseTransferDecoder(header, data):
|
||
|
return False
|
||
|
|
||
|
self.requests[-1].requestHeaders.addRawHeader(header, data)
|
||
|
|
||
|
self._receivedHeaderCount += 1
|
||
|
if self._receivedHeaderCount > self.maxHeaders:
|
||
|
self._respondToBadRequestAndDisconnect()
|
||
|
return False
|
||
|
|
||
|
return True
|
||
|
|
||
|
def allContentReceived(self):
|
||
|
command = self._command
|
||
|
path = self._path
|
||
|
version = self._version
|
||
|
|
||
|
# reset ALL state variables, so we don't interfere with next request
|
||
|
self.length = 0
|
||
|
self._receivedHeaderCount = 0
|
||
|
self._receivedHeaderSize = 0
|
||
|
self.__first_line = 1
|
||
|
self._transferDecoder = None
|
||
|
del self._command, self._path, self._version
|
||
|
|
||
|
# Disable the idle timeout, in case this request takes a long
|
||
|
# time to finish generating output.
|
||
|
if self.timeOut:
|
||
|
self._savedTimeOut = self.setTimeout(None)
|
||
|
|
||
|
self._handlingRequest = True
|
||
|
|
||
|
# We go into raw mode here even though we will be receiving lines next
|
||
|
# in the protocol; however, this data will be buffered and then passed
|
||
|
# back to line mode in the setLineMode call in requestDone.
|
||
|
self.setRawMode()
|
||
|
|
||
|
req = self.requests[-1]
|
||
|
req.requestReceived(command, path, version)
|
||
|
|
||
|
def rawDataReceived(self, data: bytes) -> None:
|
||
|
"""
|
||
|
This is called when this HTTP/1.1 parser is in raw mode rather than
|
||
|
line mode.
|
||
|
|
||
|
It may be in raw mode for one of two reasons:
|
||
|
|
||
|
1. All the headers of a request have been received and this
|
||
|
L{HTTPChannel} is currently receiving its body.
|
||
|
|
||
|
2. The full content of a request has been received and is currently
|
||
|
being processed asynchronously, and this L{HTTPChannel} is
|
||
|
buffering the data of all subsequent requests to be parsed
|
||
|
later.
|
||
|
|
||
|
In the second state, the data will be played back later.
|
||
|
|
||
|
@note: This isn't really a public API, and should be invoked only by
|
||
|
L{LineReceiver}'s line parsing logic. If you wish to drive an
|
||
|
L{HTTPChannel} from a custom data source, call C{dataReceived} on
|
||
|
it directly.
|
||
|
|
||
|
@see: L{LineReceive.rawDataReceived}
|
||
|
"""
|
||
|
if self._handlingRequest:
|
||
|
self._dataBuffer.append(data)
|
||
|
if (
|
||
|
sum(map(len, self._dataBuffer)) > self._optimisticEagerReadSize
|
||
|
) and not self._waitingForTransport:
|
||
|
# If we received more data than a small limit while processing
|
||
|
# the head-of-line request, apply TCP backpressure to our peer
|
||
|
# to get them to stop sending more request data until we're
|
||
|
# ready. See docstring for _optimisticEagerReadSize above.
|
||
|
self._networkProducer.pauseProducing()
|
||
|
return
|
||
|
|
||
|
try:
|
||
|
self._transferDecoder.dataReceived(data)
|
||
|
except _MalformedChunkedDataError:
|
||
|
self._respondToBadRequestAndDisconnect()
|
||
|
|
||
|
def allHeadersReceived(self):
|
||
|
req = self.requests[-1]
|
||
|
req.parseCookies()
|
||
|
self.persistent = self.checkPersistence(req, self._version)
|
||
|
req.gotLength(self.length)
|
||
|
# Handle 'Expect: 100-continue' with automated 100 response code,
|
||
|
# a simplistic implementation of RFC 2686 8.2.3:
|
||
|
expectContinue = req.requestHeaders.getRawHeaders(b"Expect")
|
||
|
if (
|
||
|
expectContinue
|
||
|
and expectContinue[0].lower() == b"100-continue"
|
||
|
and self._version == b"HTTP/1.1"
|
||
|
):
|
||
|
self._send100Continue()
|
||
|
|
||
|
def checkPersistence(self, request, version):
|
||
|
"""
|
||
|
Check if the channel should close or not.
|
||
|
|
||
|
@param request: The request most recently received over this channel
|
||
|
against which checks will be made to determine if this connection
|
||
|
can remain open after a matching response is returned.
|
||
|
|
||
|
@type version: C{bytes}
|
||
|
@param version: The version of the request.
|
||
|
|
||
|
@rtype: C{bool}
|
||
|
@return: A flag which, if C{True}, indicates that this connection may
|
||
|
remain open to receive another request; if C{False}, the connection
|
||
|
must be closed in order to indicate the completion of the response
|
||
|
to C{request}.
|
||
|
"""
|
||
|
connection = request.requestHeaders.getRawHeaders(b"Connection")
|
||
|
if connection:
|
||
|
tokens = [t.lower() for t in connection[0].split(b" ")]
|
||
|
else:
|
||
|
tokens = []
|
||
|
|
||
|
# Once any HTTP 0.9 or HTTP 1.0 request is received, the connection is
|
||
|
# no longer allowed to be persistent. At this point in processing the
|
||
|
# request, we don't yet know if it will be possible to set a
|
||
|
# Content-Length in the response. If it is not, then the connection
|
||
|
# will have to be closed to end an HTTP 0.9 or HTTP 1.0 response.
|
||
|
|
||
|
# If the checkPersistence call happened later, after the Content-Length
|
||
|
# has been determined (or determined not to be set), it would probably
|
||
|
# be possible to have persistent connections with HTTP 0.9 and HTTP 1.0.
|
||
|
# This may not be worth the effort, though. Just use HTTP 1.1, okay?
|
||
|
|
||
|
if version == b"HTTP/1.1":
|
||
|
if b"close" in tokens:
|
||
|
request.responseHeaders.setRawHeaders(b"Connection", [b"close"])
|
||
|
return False
|
||
|
else:
|
||
|
return True
|
||
|
else:
|
||
|
return False
|
||
|
|
||
|
def requestDone(self, request):
|
||
|
"""
|
||
|
Called by first request in queue when it is done.
|
||
|
"""
|
||
|
if request != self.requests[0]:
|
||
|
raise TypeError
|
||
|
del self.requests[0]
|
||
|
|
||
|
# We should only resume the producer if we're not waiting for the
|
||
|
# transport.
|
||
|
if not self._waitingForTransport:
|
||
|
self._networkProducer.resumeProducing()
|
||
|
|
||
|
if self.persistent:
|
||
|
self._handlingRequest = False
|
||
|
|
||
|
if self._savedTimeOut:
|
||
|
self.setTimeout(self._savedTimeOut)
|
||
|
|
||
|
# Receive our buffered data, if any.
|
||
|
data = b"".join(self._dataBuffer)
|
||
|
self._dataBuffer = []
|
||
|
self.setLineMode(data)
|
||
|
else:
|
||
|
self.loseConnection()
|
||
|
|
||
|
def timeoutConnection(self):
|
||
|
self._log.info("Timing out client: {peer}", peer=str(self.transport.getPeer()))
|
||
|
if self.abortTimeout is not None:
|
||
|
# We use self.callLater because that's what TimeoutMixin does.
|
||
|
self._abortingCall = self.callLater(
|
||
|
self.abortTimeout, self.forceAbortClient
|
||
|
)
|
||
|
self.loseConnection()
|
||
|
|
||
|
def forceAbortClient(self):
|
||
|
"""
|
||
|
Called if C{abortTimeout} seconds have passed since the timeout fired,
|
||
|
and the connection still hasn't gone away. This can really only happen
|
||
|
on extremely bad connections or when clients are maliciously attempting
|
||
|
to keep connections open.
|
||
|
"""
|
||
|
self._log.info(
|
||
|
"Forcibly timing out client: {peer}", peer=str(self.transport.getPeer())
|
||
|
)
|
||
|
# We want to lose track of the _abortingCall so that no-one tries to
|
||
|
# cancel it.
|
||
|
self._abortingCall = None
|
||
|
self.transport.abortConnection()
|
||
|
|
||
|
def connectionLost(self, reason):
|
||
|
self.setTimeout(None)
|
||
|
for request in self.requests:
|
||
|
request.connectionLost(reason)
|
||
|
|
||
|
# If we were going to force-close the transport, we don't have to now.
|
||
|
if self._abortingCall is not None:
|
||
|
self._abortingCall.cancel()
|
||
|
self._abortingCall = None
|
||
|
|
||
|
def isSecure(self):
|
||
|
"""
|
||
|
Return L{True} if this channel is using a secure transport.
|
||
|
|
||
|
Normally this method returns L{True} if this instance is using a
|
||
|
transport that implements L{interfaces.ISSLTransport}.
|
||
|
|
||
|
@returns: L{True} if this request is secure
|
||
|
@rtype: C{bool}
|
||
|
"""
|
||
|
if interfaces.ISSLTransport(self.transport, None) is not None:
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
def writeHeaders(self, version, code, reason, headers):
|
||
|
"""Called by L{Request} objects to write a complete set of HTTP headers to
|
||
|
a transport.
|
||
|
|
||
|
@param version: The HTTP version in use.
|
||
|
@type version: L{bytes}
|
||
|
|
||
|
@param code: The HTTP status code to write.
|
||
|
@type code: L{bytes}
|
||
|
|
||
|
@param reason: The HTTP reason phrase to write.
|
||
|
@type reason: L{bytes}
|
||
|
|
||
|
@param headers: The headers to write to the transport.
|
||
|
@type headers: L{twisted.web.http_headers.Headers}, or (for backwards
|
||
|
compatibility purposes only) any iterable of two-tuples of
|
||
|
L{bytes}, representing header names and header values. The latter
|
||
|
option is not actually used by Twisted.
|
||
|
|
||
|
"""
|
||
|
if not isinstance(headers, Headers):
|
||
|
# Turn into Headers instance for security reasons, to make sure we
|
||
|
# quite and sanitize everything. This variant should be removed
|
||
|
# eventually, it's only here for backwards compatibility.
|
||
|
sanitizedHeaders = Headers()
|
||
|
for name, value in headers:
|
||
|
sanitizedHeaders.addRawHeader(name, value)
|
||
|
headers = sanitizedHeaders
|
||
|
|
||
|
headerSequence = [version, b" ", code, b" ", reason, b"\r\n"]
|
||
|
for name, values in headers.getAllRawHeaders():
|
||
|
for value in values:
|
||
|
headerSequence.extend((name, b": ", value, b"\r\n"))
|
||
|
headerSequence.append(b"\r\n")
|
||
|
self.transport.writeSequence(headerSequence)
|
||
|
|
||
|
def write(self, data):
|
||
|
"""
|
||
|
Called by L{Request} objects to write response data.
|
||
|
|
||
|
@param data: The data chunk to write to the stream.
|
||
|
@type data: L{bytes}
|
||
|
|
||
|
@return: L{None}
|
||
|
"""
|
||
|
self.transport.write(data)
|
||
|
|
||
|
def writeSequence(self, iovec):
|
||
|
"""
|
||
|
Write a list of strings to the HTTP response.
|
||
|
|
||
|
@param iovec: A list of byte strings to write to the stream.
|
||
|
@type iovec: L{list} of L{bytes}
|
||
|
|
||
|
@return: L{None}
|
||
|
"""
|
||
|
self.transport.writeSequence(iovec)
|
||
|
|
||
|
def getPeer(self):
|
||
|
"""
|
||
|
Get the remote address of this connection.
|
||
|
|
||
|
@return: An L{IAddress} provider.
|
||
|
"""
|
||
|
return self.transport.getPeer()
|
||
|
|
||
|
def getHost(self):
|
||
|
"""
|
||
|
Get the local address of this connection.
|
||
|
|
||
|
@return: An L{IAddress} provider.
|
||
|
"""
|
||
|
return self.transport.getHost()
|
||
|
|
||
|
def loseConnection(self):
|
||
|
"""
|
||
|
Closes the connection. Will write any data that is pending to be sent
|
||
|
on the network, but if this response has not yet been written to the
|
||
|
network will not write anything.
|
||
|
|
||
|
@return: L{None}
|
||
|
"""
|
||
|
self._networkProducer.unregisterProducer()
|
||
|
return self.transport.loseConnection()
|
||
|
|
||
|
def registerProducer(self, producer, streaming):
|
||
|
"""
|
||
|
Register to receive data from a producer.
|
||
|
|
||
|
This sets self to be a consumer for a producer. When this object runs
|
||
|
out of data (as when a send(2) call on a socket succeeds in moving the
|
||
|
last data from a userspace buffer into a kernelspace buffer), it will
|
||
|
ask the producer to resumeProducing().
|
||
|
|
||
|
For L{IPullProducer} providers, C{resumeProducing} will be called once
|
||
|
each time data is required.
|
||
|
|
||
|
For L{IPushProducer} providers, C{pauseProducing} will be called
|
||
|
whenever the write buffer fills up and C{resumeProducing} will only be
|
||
|
called when it empties.
|
||
|
|
||
|
@type producer: L{IProducer} provider
|
||
|
@param producer: The L{IProducer} that will be producing data.
|
||
|
|
||
|
@type streaming: L{bool}
|
||
|
@param streaming: C{True} if C{producer} provides L{IPushProducer},
|
||
|
C{False} if C{producer} provides L{IPullProducer}.
|
||
|
|
||
|
@raise RuntimeError: If a producer is already registered.
|
||
|
|
||
|
@return: L{None}
|
||
|
"""
|
||
|
if self._requestProducer is not None:
|
||
|
raise RuntimeError(
|
||
|
"Cannot register producer %s, because producer %s was never "
|
||
|
"unregistered." % (producer, self._requestProducer)
|
||
|
)
|
||
|
|
||
|
if not streaming:
|
||
|
producer = _PullToPush(producer, self)
|
||
|
|
||
|
self._requestProducer = producer
|
||
|
self._requestProducerStreaming = streaming
|
||
|
|
||
|
if not streaming:
|
||
|
producer.startStreaming()
|
||
|
|
||
|
def unregisterProducer(self):
|
||
|
"""
|
||
|
Stop consuming data from a producer, without disconnecting.
|
||
|
|
||
|
@return: L{None}
|
||
|
"""
|
||
|
if self._requestProducer is None:
|
||
|
return
|
||
|
|
||
|
if not self._requestProducerStreaming:
|
||
|
self._requestProducer.stopStreaming()
|
||
|
|
||
|
self._requestProducer = None
|
||
|
self._requestProducerStreaming = None
|
||
|
|
||
|
def stopProducing(self):
|
||
|
"""
|
||
|
Stop producing data.
|
||
|
|
||
|
The HTTPChannel doesn't *actually* implement this, beacuse the
|
||
|
assumption is that it will only be called just before C{loseConnection}
|
||
|
is called. There's nothing sensible we can do other than call
|
||
|
C{loseConnection} anyway.
|
||
|
"""
|
||
|
if self._requestProducer is not None:
|
||
|
self._requestProducer.stopProducing()
|
||
|
|
||
|
def pauseProducing(self):
|
||
|
"""
|
||
|
Pause producing data.
|
||
|
|
||
|
This will be called by the transport when the send buffers have been
|
||
|
filled up. We want to simultaneously pause the producing L{Request}
|
||
|
object and also pause our transport.
|
||
|
|
||
|
The logic behind pausing the transport is specifically to avoid issues
|
||
|
like https://twistedmatrix.com/trac/ticket/8868. In this case, our
|
||
|
inability to send does not prevent us handling more requests, which
|
||
|
means we increasingly queue up more responses in our send buffer
|
||
|
without end. The easiest way to handle this is to ensure that if we are
|
||
|
unable to send our responses, we will not read further data from the
|
||
|
connection until the client pulls some data out. This is a bit of a
|
||
|
blunt instrument, but it's ok.
|
||
|
|
||
|
Note that this potentially interacts with timeout handling in a
|
||
|
positive way. Once the transport is paused the client may run into a
|
||
|
timeout which will cause us to tear the connection down. That's a good
|
||
|
thing!
|
||
|
"""
|
||
|
self._waitingForTransport = True
|
||
|
|
||
|
# The first step is to tell any producer we might currently have
|
||
|
# registered to stop producing. If we can slow our applications down
|
||
|
# we should.
|
||
|
if self._requestProducer is not None:
|
||
|
self._requestProducer.pauseProducing()
|
||
|
|
||
|
# The next step here is to pause our own transport, as discussed in the
|
||
|
# docstring.
|
||
|
if not self._handlingRequest:
|
||
|
self._networkProducer.pauseProducing()
|
||
|
|
||
|
def resumeProducing(self):
|
||
|
"""
|
||
|
Resume producing data.
|
||
|
|
||
|
This will be called by the transport when the send buffer has dropped
|
||
|
enough to actually send more data. When this happens we can unpause any
|
||
|
outstanding L{Request} producers we have, and also unpause our
|
||
|
transport.
|
||
|
"""
|
||
|
self._waitingForTransport = False
|
||
|
|
||
|
if self._requestProducer is not None:
|
||
|
self._requestProducer.resumeProducing()
|
||
|
|
||
|
# We only want to resume the network producer if we're not currently
|
||
|
# waiting for a response to show up.
|
||
|
if not self._handlingRequest:
|
||
|
self._networkProducer.resumeProducing()
|
||
|
|
||
|
def _send100Continue(self):
|
||
|
"""
|
||
|
Sends a 100 Continue response, used to signal to clients that further
|
||
|
processing will be performed.
|
||
|
"""
|
||
|
self.transport.write(b"HTTP/1.1 100 Continue\r\n\r\n")
|
||
|
|
||
|
def _respondToBadRequestAndDisconnect(self):
|
||
|
"""
|
||
|
This is a quick and dirty way of responding to bad requests.
|
||
|
|
||
|
As described by HTTP standard we should be patient and accept the
|
||
|
whole request from the client before sending a polite bad request
|
||
|
response, even in the case when clients send tons of data.
|
||
|
"""
|
||
|
self.transport.write(b"HTTP/1.1 400 Bad Request\r\n\r\n")
|
||
|
self.loseConnection()
|
||
|
|
||
|
|
||
|
def _escape(s):
|
||
|
"""
|
||
|
Return a string like python repr, but always escaped as if surrounding
|
||
|
quotes were double quotes.
|
||
|
|
||
|
@param s: The string to escape.
|
||
|
@type s: L{bytes} or L{str}
|
||
|
|
||
|
@return: An escaped string.
|
||
|
@rtype: L{str}
|
||
|
"""
|
||
|
if not isinstance(s, bytes):
|
||
|
s = s.encode("ascii")
|
||
|
|
||
|
r = repr(s)
|
||
|
if not isinstance(r, str):
|
||
|
r = r.decode("ascii")
|
||
|
if r.startswith("b"):
|
||
|
r = r[1:]
|
||
|
if r.startswith("'"):
|
||
|
return r[1:-1].replace('"', '\\"').replace("\\'", "'")
|
||
|
return r[1:-1]
|
||
|
|
||
|
|
||
|
@provider(IAccessLogFormatter)
|
||
|
def combinedLogFormatter(timestamp, request):
|
||
|
"""
|
||
|
@return: A combined log formatted log line for the given request.
|
||
|
|
||
|
@see: L{IAccessLogFormatter}
|
||
|
"""
|
||
|
clientAddr = request.getClientAddress()
|
||
|
if isinstance(
|
||
|
clientAddr, (address.IPv4Address, address.IPv6Address, _XForwardedForAddress)
|
||
|
):
|
||
|
ip = clientAddr.host
|
||
|
else:
|
||
|
ip = b"-"
|
||
|
referrer = _escape(request.getHeader(b"referer") or b"-")
|
||
|
agent = _escape(request.getHeader(b"user-agent") or b"-")
|
||
|
line = (
|
||
|
'"%(ip)s" - - %(timestamp)s "%(method)s %(uri)s %(protocol)s" '
|
||
|
'%(code)d %(length)s "%(referrer)s" "%(agent)s"'
|
||
|
% dict(
|
||
|
ip=_escape(ip),
|
||
|
timestamp=timestamp,
|
||
|
method=_escape(request.method),
|
||
|
uri=_escape(request.uri),
|
||
|
protocol=_escape(request.clientproto),
|
||
|
code=request.code,
|
||
|
length=request.sentLength or "-",
|
||
|
referrer=referrer,
|
||
|
agent=agent,
|
||
|
)
|
||
|
)
|
||
|
return line
|
||
|
|
||
|
|
||
|
@implementer(interfaces.IAddress)
|
||
|
class _XForwardedForAddress:
|
||
|
"""
|
||
|
L{IAddress} which represents the client IP to log for a request, as gleaned
|
||
|
from an X-Forwarded-For header.
|
||
|
|
||
|
@ivar host: An IP address or C{b"-"}.
|
||
|
@type host: L{bytes}
|
||
|
|
||
|
@see: L{proxiedLogFormatter}
|
||
|
"""
|
||
|
|
||
|
def __init__(self, host):
|
||
|
self.host = host
|
||
|
|
||
|
|
||
|
class _XForwardedForRequest(proxyForInterface(IRequest, "_request")): # type: ignore[misc]
|
||
|
"""
|
||
|
Add a layer on top of another request that only uses the value of an
|
||
|
X-Forwarded-For header as the result of C{getClientAddress}.
|
||
|
"""
|
||
|
|
||
|
def getClientAddress(self):
|
||
|
"""
|
||
|
The client address (the first address) in the value of the
|
||
|
I{X-Forwarded-For header}. If the header is not present, the IP is
|
||
|
considered to be C{b"-"}.
|
||
|
|
||
|
@return: L{_XForwardedForAddress} which wraps the client address as
|
||
|
expected by L{combinedLogFormatter}.
|
||
|
"""
|
||
|
host = (
|
||
|
self._request.requestHeaders.getRawHeaders(b"X-Forwarded-For", [b"-"])[0]
|
||
|
.split(b",")[0]
|
||
|
.strip()
|
||
|
)
|
||
|
return _XForwardedForAddress(host)
|
||
|
|
||
|
# These are missing from the interface. Forward them manually.
|
||
|
@property
|
||
|
def clientproto(self):
|
||
|
"""
|
||
|
@return: The protocol version in the request.
|
||
|
@rtype: L{bytes}
|
||
|
"""
|
||
|
return self._request.clientproto
|
||
|
|
||
|
@property
|
||
|
def code(self):
|
||
|
"""
|
||
|
@return: The response code for the request.
|
||
|
@rtype: L{int}
|
||
|
"""
|
||
|
return self._request.code
|
||
|
|
||
|
@property
|
||
|
def sentLength(self):
|
||
|
"""
|
||
|
@return: The number of bytes sent in the response body.
|
||
|
@rtype: L{int}
|
||
|
"""
|
||
|
return self._request.sentLength
|
||
|
|
||
|
|
||
|
@provider(IAccessLogFormatter)
|
||
|
def proxiedLogFormatter(timestamp, request):
|
||
|
"""
|
||
|
@return: A combined log formatted log line for the given request but use
|
||
|
the value of the I{X-Forwarded-For} header as the value for the client
|
||
|
IP address.
|
||
|
|
||
|
@see: L{IAccessLogFormatter}
|
||
|
"""
|
||
|
return combinedLogFormatter(timestamp, _XForwardedForRequest(request))
|
||
|
|
||
|
|
||
|
class _GenericHTTPChannelProtocol(proxyForInterface(IProtocol, "_channel")): # type: ignore[misc]
|
||
|
"""
|
||
|
A proxy object that wraps one of the HTTP protocol objects, and switches
|
||
|
between them depending on TLS negotiated protocol.
|
||
|
|
||
|
@ivar _negotiatedProtocol: The protocol negotiated with ALPN or NPN, if
|
||
|
any.
|
||
|
@type _negotiatedProtocol: Either a bytestring containing the ALPN token
|
||
|
for the negotiated protocol, or L{None} if no protocol has yet been
|
||
|
negotiated.
|
||
|
|
||
|
@ivar _channel: The object capable of behaving like a L{HTTPChannel} that
|
||
|
is backing this object. By default this is a L{HTTPChannel}, but if a
|
||
|
HTTP protocol upgrade takes place this may be a different channel
|
||
|
object. Must implement L{IProtocol}.
|
||
|
@type _channel: L{HTTPChannel}
|
||
|
|
||
|
@ivar _requestFactory: A callable to use to build L{IRequest} objects.
|
||
|
@type _requestFactory: L{IRequest}
|
||
|
|
||
|
@ivar _site: A reference to the creating L{twisted.web.server.Site} object.
|
||
|
@type _site: L{twisted.web.server.Site}
|
||
|
|
||
|
@ivar _factory: A reference to the creating L{HTTPFactory} object.
|
||
|
@type _factory: L{HTTPFactory}
|
||
|
|
||
|
@ivar _timeOut: A timeout value to pass to the backing channel.
|
||
|
@type _timeOut: L{int} or L{None}
|
||
|
|
||
|
@ivar _callLater: A value for the C{callLater} callback.
|
||
|
@type _callLater: L{callable}
|
||
|
"""
|
||
|
|
||
|
_negotiatedProtocol = None
|
||
|
_requestFactory = Request
|
||
|
_factory = None
|
||
|
_site = None
|
||
|
_timeOut = None
|
||
|
_callLater = None
|
||
|
|
||
|
@property
|
||
|
def factory(self):
|
||
|
"""
|
||
|
@see: L{_genericHTTPChannelProtocolFactory}
|
||
|
"""
|
||
|
return self._channel.factory
|
||
|
|
||
|
@factory.setter
|
||
|
def factory(self, value):
|
||
|
self._factory = value
|
||
|
self._channel.factory = value
|
||
|
|
||
|
@property
|
||
|
def requestFactory(self):
|
||
|
"""
|
||
|
A callable to use to build L{IRequest} objects.
|
||
|
|
||
|
Retries the object from the current backing channel.
|
||
|
"""
|
||
|
return self._channel.requestFactory
|
||
|
|
||
|
@requestFactory.setter
|
||
|
def requestFactory(self, value):
|
||
|
"""
|
||
|
A callable to use to build L{IRequest} objects.
|
||
|
|
||
|
Sets the object on the backing channel and also stores the value for
|
||
|
propagation to any new channel.
|
||
|
|
||
|
@param value: The new callable to use.
|
||
|
@type value: A L{callable} returning L{IRequest}
|
||
|
"""
|
||
|
self._requestFactory = value
|
||
|
self._channel.requestFactory = value
|
||
|
|
||
|
@property
|
||
|
def site(self):
|
||
|
"""
|
||
|
A reference to the creating L{twisted.web.server.Site} object.
|
||
|
|
||
|
Returns the site object from the backing channel.
|
||
|
"""
|
||
|
return self._channel.site
|
||
|
|
||
|
@site.setter
|
||
|
def site(self, value):
|
||
|
"""
|
||
|
A reference to the creating L{twisted.web.server.Site} object.
|
||
|
|
||
|
Sets the object on the backing channel and also stores the value for
|
||
|
propagation to any new channel.
|
||
|
|
||
|
@param value: The L{twisted.web.server.Site} object to set.
|
||
|
@type value: L{twisted.web.server.Site}
|
||
|
"""
|
||
|
self._site = value
|
||
|
self._channel.site = value
|
||
|
|
||
|
@property
|
||
|
def timeOut(self):
|
||
|
"""
|
||
|
The idle timeout for the backing channel.
|
||
|
"""
|
||
|
return self._channel.timeOut
|
||
|
|
||
|
@timeOut.setter
|
||
|
def timeOut(self, value):
|
||
|
"""
|
||
|
The idle timeout for the backing channel.
|
||
|
|
||
|
Sets the idle timeout on both the backing channel and stores it for
|
||
|
propagation to any new backing channel.
|
||
|
|
||
|
@param value: The timeout to set.
|
||
|
@type value: L{int} or L{float}
|
||
|
"""
|
||
|
self._timeOut = value
|
||
|
self._channel.timeOut = value
|
||
|
|
||
|
@property
|
||
|
def callLater(self):
|
||
|
"""
|
||
|
A value for the C{callLater} callback. This callback is used by the
|
||
|
L{twisted.protocols.policies.TimeoutMixin} to handle timeouts.
|
||
|
"""
|
||
|
return self._channel.callLater
|
||
|
|
||
|
@callLater.setter
|
||
|
def callLater(self, value):
|
||
|
"""
|
||
|
Sets the value for the C{callLater} callback. This callback is used by
|
||
|
the L{twisted.protocols.policies.TimeoutMixin} to handle timeouts.
|
||
|
|
||
|
@param value: The new callback to use.
|
||
|
@type value: L{callable}
|
||
|
"""
|
||
|
self._callLater = value
|
||
|
self._channel.callLater = value
|
||
|
|
||
|
def dataReceived(self, data):
|
||
|
"""
|
||
|
An override of L{IProtocol.dataReceived} that checks what protocol we're
|
||
|
using.
|
||
|
"""
|
||
|
if self._negotiatedProtocol is None:
|
||
|
negotiatedProtocol = getattr(
|
||
|
self._channel.transport, "negotiatedProtocol", b"http/1.1"
|
||
|
)
|
||
|
|
||
|
if negotiatedProtocol is None:
|
||
|
negotiatedProtocol = b"http/1.1"
|
||
|
|
||
|
if negotiatedProtocol == b"h2":
|
||
|
if not H2_ENABLED:
|
||
|
raise ValueError("Negotiated HTTP/2 without support.")
|
||
|
|
||
|
# We need to make sure that the HTTPChannel is unregistered
|
||
|
# from the transport so that the H2Connection can register
|
||
|
# itself if possible.
|
||
|
networkProducer = self._channel._networkProducer
|
||
|
networkProducer.unregisterProducer()
|
||
|
|
||
|
# Cancel the old channel's timeout.
|
||
|
self._channel.setTimeout(None)
|
||
|
|
||
|
transport = self._channel.transport
|
||
|
self._channel = H2Connection()
|
||
|
self._channel.requestFactory = self._requestFactory
|
||
|
self._channel.site = self._site
|
||
|
self._channel.factory = self._factory
|
||
|
self._channel.timeOut = self._timeOut
|
||
|
self._channel.callLater = self._callLater
|
||
|
self._channel.makeConnection(transport)
|
||
|
|
||
|
# Register the H2Connection as the transport's
|
||
|
# producer, so that the transport can apply back
|
||
|
# pressure.
|
||
|
networkProducer.registerProducer(self._channel, True)
|
||
|
else:
|
||
|
# Only HTTP/2 and HTTP/1.1 are supported right now.
|
||
|
assert (
|
||
|
negotiatedProtocol == b"http/1.1"
|
||
|
), "Unsupported protocol negotiated"
|
||
|
|
||
|
self._negotiatedProtocol = negotiatedProtocol
|
||
|
|
||
|
return self._channel.dataReceived(data)
|
||
|
|
||
|
|
||
|
def _genericHTTPChannelProtocolFactory(self):
|
||
|
"""
|
||
|
Returns an appropriately initialized _GenericHTTPChannelProtocol.
|
||
|
"""
|
||
|
return _GenericHTTPChannelProtocol(HTTPChannel())
|
||
|
|
||
|
|
||
|
class _MinimalLogFile(TypingProtocol):
|
||
|
def write(self, data: str, /) -> object:
|
||
|
"""
|
||
|
Write some data.
|
||
|
"""
|
||
|
|
||
|
def close(self) -> None:
|
||
|
"""
|
||
|
Close the file.
|
||
|
"""
|
||
|
|
||
|
|
||
|
value: type[_MinimalLogFile] = TextIOWrapper
|
||
|
|
||
|
|
||
|
class HTTPFactory(protocol.ServerFactory):
|
||
|
"""
|
||
|
Factory for HTTP server.
|
||
|
|
||
|
@ivar _logDateTime: A cached datetime string for log messages, updated by
|
||
|
C{_logDateTimeCall}.
|
||
|
@type _logDateTime: C{str}
|
||
|
|
||
|
@ivar _logDateTimeCall: A delayed call for the next update to the cached
|
||
|
log datetime string.
|
||
|
@type _logDateTimeCall: L{IDelayedCall} provided
|
||
|
|
||
|
@ivar _logFormatter: See the C{logFormatter} parameter to L{__init__}
|
||
|
|
||
|
@ivar _nativeize: A flag that indicates whether the log file being written
|
||
|
to wants native strings (C{True}) or bytes (C{False}). This is only to
|
||
|
support writing to L{twisted.python.log} which, unfortunately, works
|
||
|
with native strings.
|
||
|
|
||
|
@ivar reactor: An L{IReactorTime} provider used to manage connection
|
||
|
timeouts and compute logging timestamps.
|
||
|
"""
|
||
|
|
||
|
# We need to ignore the mypy error here, because
|
||
|
# _genericHTTPChannelProtocolFactory is a callable which returns a proxy
|
||
|
# to a Protocol, instead of a concrete Protocol object, as expected in
|
||
|
# the protocol.Factory interface
|
||
|
protocol = _genericHTTPChannelProtocolFactory # type: ignore[assignment]
|
||
|
|
||
|
logPath = None
|
||
|
_logFile: _MinimalLogFile | None = None
|
||
|
|
||
|
timeOut: int | float | None = _REQUEST_TIMEOUT
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
logPath: str | bytes | None = None,
|
||
|
timeout: int | float = _REQUEST_TIMEOUT,
|
||
|
logFormatter: IAccessLogFormatter | None = None,
|
||
|
reactor: IReactorTime | None = None,
|
||
|
):
|
||
|
"""
|
||
|
@param logPath: File path to which access log messages will be written
|
||
|
or C{None} to disable logging.
|
||
|
@type logPath: L{str} or L{bytes}
|
||
|
|
||
|
@param timeout: The initial value of L{timeOut}, which defines the idle
|
||
|
connection timeout in seconds, or C{None} to disable the idle
|
||
|
timeout.
|
||
|
@type timeout: L{float}
|
||
|
|
||
|
@param logFormatter: An object to format requests into log lines for
|
||
|
the access log. L{combinedLogFormatter} when C{None} is passed.
|
||
|
@type logFormatter: L{IAccessLogFormatter} provider
|
||
|
|
||
|
@param reactor: An L{IReactorTime} provider used to manage connection
|
||
|
timeouts and compute logging timestamps. Defaults to the global
|
||
|
reactor.
|
||
|
"""
|
||
|
if reactor is None:
|
||
|
from twisted.internet import reactor # type:ignore[assignment]
|
||
|
self.reactor: IReactorTime = reactor # type:ignore[assignment]
|
||
|
|
||
|
if logPath is not None:
|
||
|
logPath = os.path.abspath(logPath)
|
||
|
self.logPath = logPath
|
||
|
self.timeOut = timeout
|
||
|
if logFormatter is None:
|
||
|
logFormatter = combinedLogFormatter
|
||
|
self._logFormatter = logFormatter
|
||
|
|
||
|
# For storing the cached log datetime and the callback to update it
|
||
|
self._logDateTime: str | None = None
|
||
|
self._logDateTimeCall: IDelayedCall | None = None
|
||
|
|
||
|
logFile = property()
|
||
|
"""
|
||
|
A file (object with C{write(data: str)} and C{close()} methods) that will
|
||
|
be used for logging HTTP requests and responses in the standard U{Combined
|
||
|
Log Format <https://en.wikipedia.org/wiki/Common_Log_Format>} .
|
||
|
|
||
|
@note: for backwards compatibility purposes, this may be I{set} to an
|
||
|
object with a C{write(data: bytes)} method, but these will be detected
|
||
|
(by checking if it's an instance of L{BufferedIOBase}) and replaced
|
||
|
with a L{TextIOWrapper} when retrieved by getting the attribute again.
|
||
|
"""
|
||
|
|
||
|
@logFile.getter
|
||
|
def _get_logFile(self) -> _MinimalLogFile:
|
||
|
if self._logFile is None:
|
||
|
raise AttributeError("no log file present")
|
||
|
return self._logFile
|
||
|
|
||
|
@_get_logFile.setter
|
||
|
def _set_logFile(self, newLogFile: BufferedIOBase | _MinimalLogFile) -> None:
|
||
|
if isinstance(newLogFile, BufferedIOBase):
|
||
|
newLogFile = TextIOWrapper(
|
||
|
newLogFile, # type:ignore[arg-type]
|
||
|
"utf-8",
|
||
|
write_through=True,
|
||
|
newline="\n",
|
||
|
)
|
||
|
self._logFile = newLogFile
|
||
|
|
||
|
logFile = _set_logFile
|
||
|
|
||
|
def _updateLogDateTime(self) -> None:
|
||
|
"""
|
||
|
Update log datetime periodically, so we aren't always recalculating it.
|
||
|
"""
|
||
|
self._logDateTime = datetimeToLogString(self.reactor.seconds())
|
||
|
self._logDateTimeCall = self.reactor.callLater(1, self._updateLogDateTime)
|
||
|
|
||
|
def buildProtocol(self, addr: IAddress) -> Protocol | None:
|
||
|
p = protocol.ServerFactory.buildProtocol(self, addr)
|
||
|
|
||
|
# This is a bit of a hack to ensure that the HTTPChannel timeouts
|
||
|
# occur on the same reactor as the one we're using here. This could
|
||
|
# ideally be resolved by passing the reactor more generally to the
|
||
|
# HTTPChannel, but that won't work for the TimeoutMixin until we fix
|
||
|
# https://twistedmatrix.com/trac/ticket/8488
|
||
|
p.callLater = self.reactor.callLater # type:ignore[union-attr]
|
||
|
|
||
|
# timeOut needs to be on the Protocol instance cause
|
||
|
# TimeoutMixin expects it there
|
||
|
p.timeOut = self.timeOut # type:ignore[union-attr]
|
||
|
return p
|
||
|
|
||
|
def startFactory(self) -> None:
|
||
|
"""
|
||
|
Set up request logging if necessary.
|
||
|
"""
|
||
|
if self._logDateTimeCall is None:
|
||
|
self._updateLogDateTime()
|
||
|
|
||
|
self._logFile = self._openLogFile(self.logPath) if self.logPath else log.logfile
|
||
|
|
||
|
def stopFactory(self) -> None:
|
||
|
if self._logFile is not None:
|
||
|
if self._logFile != log.logfile:
|
||
|
self._logFile.close()
|
||
|
self._logFile = None
|
||
|
|
||
|
if self._logDateTimeCall is not None and self._logDateTimeCall.active():
|
||
|
self._logDateTimeCall.cancel()
|
||
|
self._logDateTimeCall = None
|
||
|
|
||
|
def _openLogFile(self, path: str | bytes) -> _MinimalLogFile:
|
||
|
"""
|
||
|
Override in subclasses, e.g. to use L{twisted.python.logfile}.
|
||
|
"""
|
||
|
return open(path, "a", 1, newline="\n")
|
||
|
|
||
|
def log(self, request: Request) -> None:
|
||
|
"""
|
||
|
Write a line representing C{request} to the access log file.
|
||
|
|
||
|
@param request: The request object about which to log.
|
||
|
"""
|
||
|
logFile = self._logFile
|
||
|
if logFile is not None:
|
||
|
line = self._logFormatter(self._logDateTime, request) + "\n"
|
||
|
logFile.write(line)
|