123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418 |
- import asyncio
- import io
- import os
- import pathlib
- import sys
- from contextlib import suppress
- from enum import Enum, auto
- from mimetypes import MimeTypes
- from stat import S_ISREG
- from types import MappingProxyType
- from typing import ( # noqa
- IO,
- TYPE_CHECKING,
- Any,
- Awaitable,
- Callable,
- Final,
- Iterator,
- List,
- Optional,
- Set,
- Tuple,
- Union,
- cast,
- )
- from . import hdrs
- from .abc import AbstractStreamWriter
- from .helpers import ETAG_ANY, ETag, must_be_empty_body
- from .typedefs import LooseHeaders, PathLike
- from .web_exceptions import (
- HTTPForbidden,
- HTTPNotFound,
- HTTPNotModified,
- HTTPPartialContent,
- HTTPPreconditionFailed,
- HTTPRequestRangeNotSatisfiable,
- )
- from .web_response import StreamResponse
- __all__ = ("FileResponse",)
- if TYPE_CHECKING:
- from .web_request import BaseRequest
- _T_OnChunkSent = Optional[Callable[[bytes], Awaitable[None]]]
- NOSENDFILE: Final[bool] = bool(os.environ.get("AIOHTTP_NOSENDFILE"))
- CONTENT_TYPES: Final[MimeTypes] = MimeTypes()
- # File extension to IANA encodings map that will be checked in the order defined.
- ENCODING_EXTENSIONS = MappingProxyType(
- {ext: CONTENT_TYPES.encodings_map[ext] for ext in (".br", ".gz")}
- )
- FALLBACK_CONTENT_TYPE = "application/octet-stream"
- # Provide additional MIME type/extension pairs to be recognized.
- # https://en.wikipedia.org/wiki/List_of_archive_formats#Compression_only
- ADDITIONAL_CONTENT_TYPES = MappingProxyType(
- {
- "application/gzip": ".gz",
- "application/x-brotli": ".br",
- "application/x-bzip2": ".bz2",
- "application/x-compress": ".Z",
- "application/x-xz": ".xz",
- }
- )
- class _FileResponseResult(Enum):
- """The result of the file response."""
- SEND_FILE = auto() # Ie a regular file to send
- NOT_ACCEPTABLE = auto() # Ie a socket, or non-regular file
- PRE_CONDITION_FAILED = auto() # Ie If-Match or If-None-Match failed
- NOT_MODIFIED = auto() # 304 Not Modified
- # Add custom pairs and clear the encodings map so guess_type ignores them.
- CONTENT_TYPES.encodings_map.clear()
- for content_type, extension in ADDITIONAL_CONTENT_TYPES.items():
- CONTENT_TYPES.add_type(content_type, extension) # type: ignore[attr-defined]
- _CLOSE_FUTURES: Set[asyncio.Future[None]] = set()
- class FileResponse(StreamResponse):
- """A response object can be used to send files."""
- def __init__(
- self,
- path: PathLike,
- chunk_size: int = 256 * 1024,
- status: int = 200,
- reason: Optional[str] = None,
- headers: Optional[LooseHeaders] = None,
- ) -> None:
- super().__init__(status=status, reason=reason, headers=headers)
- self._path = pathlib.Path(path)
- self._chunk_size = chunk_size
- def _seek_and_read(self, fobj: IO[Any], offset: int, chunk_size: int) -> bytes:
- fobj.seek(offset)
- return fobj.read(chunk_size) # type: ignore[no-any-return]
- async def _sendfile_fallback(
- self, writer: AbstractStreamWriter, fobj: IO[Any], offset: int, count: int
- ) -> AbstractStreamWriter:
- # To keep memory usage low,fobj is transferred in chunks
- # controlled by the constructor's chunk_size argument.
- chunk_size = self._chunk_size
- loop = asyncio.get_event_loop()
- chunk = await loop.run_in_executor(
- None, self._seek_and_read, fobj, offset, chunk_size
- )
- while chunk:
- await writer.write(chunk)
- count = count - chunk_size
- if count <= 0:
- break
- chunk = await loop.run_in_executor(None, fobj.read, min(chunk_size, count))
- await writer.drain()
- return writer
- async def _sendfile(
- self, request: "BaseRequest", fobj: IO[Any], offset: int, count: int
- ) -> AbstractStreamWriter:
- writer = await super().prepare(request)
- assert writer is not None
- if NOSENDFILE or self.compression:
- return await self._sendfile_fallback(writer, fobj, offset, count)
- loop = request._loop
- transport = request.transport
- assert transport is not None
- try:
- await loop.sendfile(transport, fobj, offset, count)
- except NotImplementedError:
- return await self._sendfile_fallback(writer, fobj, offset, count)
- await super().write_eof()
- return writer
- @staticmethod
- def _etag_match(etag_value: str, etags: Tuple[ETag, ...], *, weak: bool) -> bool:
- if len(etags) == 1 and etags[0].value == ETAG_ANY:
- return True
- return any(
- etag.value == etag_value for etag in etags if weak or not etag.is_weak
- )
- async def _not_modified(
- self, request: "BaseRequest", etag_value: str, last_modified: float
- ) -> Optional[AbstractStreamWriter]:
- self.set_status(HTTPNotModified.status_code)
- self._length_check = False
- self.etag = etag_value # type: ignore[assignment]
- self.last_modified = last_modified # type: ignore[assignment]
- # Delete any Content-Length headers provided by user. HTTP 304
- # should always have empty response body
- return await super().prepare(request)
- async def _precondition_failed(
- self, request: "BaseRequest"
- ) -> Optional[AbstractStreamWriter]:
- self.set_status(HTTPPreconditionFailed.status_code)
- self.content_length = 0
- return await super().prepare(request)
- def _make_response(
- self, request: "BaseRequest", accept_encoding: str
- ) -> Tuple[
- _FileResponseResult, Optional[io.BufferedReader], os.stat_result, Optional[str]
- ]:
- """Return the response result, io object, stat result, and encoding.
- If an uncompressed file is returned, the encoding is set to
- :py:data:`None`.
- This method should be called from a thread executor
- since it calls os.stat which may block.
- """
- file_path, st, file_encoding = self._get_file_path_stat_encoding(
- accept_encoding
- )
- if not file_path:
- return _FileResponseResult.NOT_ACCEPTABLE, None, st, None
- etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}"
- # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.1-2
- if (ifmatch := request.if_match) is not None and not self._etag_match(
- etag_value, ifmatch, weak=False
- ):
- return _FileResponseResult.PRE_CONDITION_FAILED, None, st, file_encoding
- if (
- (unmodsince := request.if_unmodified_since) is not None
- and ifmatch is None
- and st.st_mtime > unmodsince.timestamp()
- ):
- return _FileResponseResult.PRE_CONDITION_FAILED, None, st, file_encoding
- # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.2-2
- if (ifnonematch := request.if_none_match) is not None and self._etag_match(
- etag_value, ifnonematch, weak=True
- ):
- return _FileResponseResult.NOT_MODIFIED, None, st, file_encoding
- if (
- (modsince := request.if_modified_since) is not None
- and ifnonematch is None
- and st.st_mtime <= modsince.timestamp()
- ):
- return _FileResponseResult.NOT_MODIFIED, None, st, file_encoding
- fobj = file_path.open("rb")
- with suppress(OSError):
- # fstat() may not be available on all platforms
- # Once we open the file, we want the fstat() to ensure
- # the file has not changed between the first stat()
- # and the open().
- st = os.stat(fobj.fileno())
- return _FileResponseResult.SEND_FILE, fobj, st, file_encoding
- def _get_file_path_stat_encoding(
- self, accept_encoding: str
- ) -> Tuple[Optional[pathlib.Path], os.stat_result, Optional[str]]:
- file_path = self._path
- for file_extension, file_encoding in ENCODING_EXTENSIONS.items():
- if file_encoding not in accept_encoding:
- continue
- compressed_path = file_path.with_suffix(file_path.suffix + file_extension)
- with suppress(OSError):
- # Do not follow symlinks and ignore any non-regular files.
- st = compressed_path.lstat()
- if S_ISREG(st.st_mode):
- return compressed_path, st, file_encoding
- # Fallback to the uncompressed file
- st = file_path.stat()
- return file_path if S_ISREG(st.st_mode) else None, st, None
- async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter]:
- loop = asyncio.get_running_loop()
- # Encoding comparisons should be case-insensitive
- # https://www.rfc-editor.org/rfc/rfc9110#section-8.4.1
- accept_encoding = request.headers.get(hdrs.ACCEPT_ENCODING, "").lower()
- try:
- response_result, fobj, st, file_encoding = await loop.run_in_executor(
- None, self._make_response, request, accept_encoding
- )
- except PermissionError:
- self.set_status(HTTPForbidden.status_code)
- return await super().prepare(request)
- except OSError:
- # Most likely to be FileNotFoundError or OSError for circular
- # symlinks in python >= 3.13, so respond with 404.
- self.set_status(HTTPNotFound.status_code)
- return await super().prepare(request)
- # Forbid special files like sockets, pipes, devices, etc.
- if response_result is _FileResponseResult.NOT_ACCEPTABLE:
- self.set_status(HTTPForbidden.status_code)
- return await super().prepare(request)
- if response_result is _FileResponseResult.PRE_CONDITION_FAILED:
- return await self._precondition_failed(request)
- if response_result is _FileResponseResult.NOT_MODIFIED:
- etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}"
- last_modified = st.st_mtime
- return await self._not_modified(request, etag_value, last_modified)
- assert fobj is not None
- try:
- return await self._prepare_open_file(request, fobj, st, file_encoding)
- finally:
- # We do not await here because we do not want to wait
- # for the executor to finish before returning the response
- # so the connection can begin servicing another request
- # as soon as possible.
- close_future = loop.run_in_executor(None, fobj.close)
- # Hold a strong reference to the future to prevent it from being
- # garbage collected before it completes.
- _CLOSE_FUTURES.add(close_future)
- close_future.add_done_callback(_CLOSE_FUTURES.remove)
- async def _prepare_open_file(
- self,
- request: "BaseRequest",
- fobj: io.BufferedReader,
- st: os.stat_result,
- file_encoding: Optional[str],
- ) -> Optional[AbstractStreamWriter]:
- status = self._status
- file_size: int = st.st_size
- file_mtime: float = st.st_mtime
- count: int = file_size
- start: Optional[int] = None
- if (ifrange := request.if_range) is None or file_mtime <= ifrange.timestamp():
- # If-Range header check:
- # condition = cached date >= last modification date
- # return 206 if True else 200.
- # if False:
- # Range header would not be processed, return 200
- # if True but Range header missing
- # return 200
- try:
- rng = request.http_range
- start = rng.start
- end: Optional[int] = rng.stop
- except ValueError:
- # https://tools.ietf.org/html/rfc7233:
- # A server generating a 416 (Range Not Satisfiable) response to
- # a byte-range request SHOULD send a Content-Range header field
- # with an unsatisfied-range value.
- # The complete-length in a 416 response indicates the current
- # length of the selected representation.
- #
- # Will do the same below. Many servers ignore this and do not
- # send a Content-Range header with HTTP 416
- self._headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}"
- self.set_status(HTTPRequestRangeNotSatisfiable.status_code)
- return await super().prepare(request)
- # If a range request has been made, convert start, end slice
- # notation into file pointer offset and count
- if start is not None:
- if start < 0 and end is None: # return tail of file
- start += file_size
- if start < 0:
- # if Range:bytes=-1000 in request header but file size
- # is only 200, there would be trouble without this
- start = 0
- count = file_size - start
- else:
- # rfc7233:If the last-byte-pos value is
- # absent, or if the value is greater than or equal to
- # the current length of the representation data,
- # the byte range is interpreted as the remainder
- # of the representation (i.e., the server replaces the
- # value of last-byte-pos with a value that is one less than
- # the current length of the selected representation).
- count = (
- min(end if end is not None else file_size, file_size) - start
- )
- if start >= file_size:
- # HTTP 416 should be returned in this case.
- #
- # According to https://tools.ietf.org/html/rfc7233:
- # If a valid byte-range-set includes at least one
- # byte-range-spec with a first-byte-pos that is less than
- # the current length of the representation, or at least one
- # suffix-byte-range-spec with a non-zero suffix-length,
- # then the byte-range-set is satisfiable. Otherwise, the
- # byte-range-set is unsatisfiable.
- self._headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}"
- self.set_status(HTTPRequestRangeNotSatisfiable.status_code)
- return await super().prepare(request)
- status = HTTPPartialContent.status_code
- # Even though you are sending the whole file, you should still
- # return a HTTP 206 for a Range request.
- self.set_status(status)
- # If the Content-Type header is not already set, guess it based on the
- # extension of the request path. The encoding returned by guess_type
- # can be ignored since the map was cleared above.
- if hdrs.CONTENT_TYPE not in self._headers:
- if sys.version_info >= (3, 13):
- guesser = CONTENT_TYPES.guess_file_type
- else:
- guesser = CONTENT_TYPES.guess_type
- self.content_type = guesser(self._path)[0] or FALLBACK_CONTENT_TYPE
- if file_encoding:
- self._headers[hdrs.CONTENT_ENCODING] = file_encoding
- self._headers[hdrs.VARY] = hdrs.ACCEPT_ENCODING
- # Disable compression if we are already sending
- # a compressed file since we don't want to double
- # compress.
- self._compression = False
- self.etag = f"{st.st_mtime_ns:x}-{st.st_size:x}" # type: ignore[assignment]
- self.last_modified = file_mtime # type: ignore[assignment]
- self.content_length = count
- self._headers[hdrs.ACCEPT_RANGES] = "bytes"
- if status == HTTPPartialContent.status_code:
- real_start = start
- assert real_start is not None
- self._headers[hdrs.CONTENT_RANGE] = "bytes {}-{}/{}".format(
- real_start, real_start + count - 1, file_size
- )
- # If we are sending 0 bytes calling sendfile() will throw a ValueError
- if count == 0 or must_be_empty_body(request.method, status):
- return await super().prepare(request)
- # be aware that start could be None or int=0 here.
- offset = start or 0
- return await self._sendfile(request, fobj, offset, count)
|