123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174 |
- from __future__ import annotations
- from functools import lru_cache
- from typing import Callable
- from ._cell_widths import CELL_WIDTHS
- # Ranges of unicode ordinals that produce a 1-cell wide character
- # This is non-exhaustive, but covers most common Western characters
- _SINGLE_CELL_UNICODE_RANGES: list[tuple[int, int]] = [
- (0x20, 0x7E), # Latin (excluding non-printable)
- (0xA0, 0xAC),
- (0xAE, 0x002FF),
- (0x00370, 0x00482), # Greek / Cyrillic
- (0x02500, 0x025FC), # Box drawing, box elements, geometric shapes
- (0x02800, 0x028FF), # Braille
- ]
- # A set of characters that are a single cell wide
- _SINGLE_CELLS = frozenset(
- [
- character
- for _start, _end in _SINGLE_CELL_UNICODE_RANGES
- for character in map(chr, range(_start, _end + 1))
- ]
- )
- # When called with a string this will return True if all
- # characters are single-cell, otherwise False
- _is_single_cell_widths: Callable[[str], bool] = _SINGLE_CELLS.issuperset
- @lru_cache(4096)
- def cached_cell_len(text: str) -> int:
- """Get the number of cells required to display text.
- This method always caches, which may use up a lot of memory. It is recommended to use
- `cell_len` over this method.
- Args:
- text (str): Text to display.
- Returns:
- int: Get the number of cells required to display text.
- """
- if _is_single_cell_widths(text):
- return len(text)
- return sum(map(get_character_cell_size, text))
- def cell_len(text: str, _cell_len: Callable[[str], int] = cached_cell_len) -> int:
- """Get the number of cells required to display text.
- Args:
- text (str): Text to display.
- Returns:
- int: Get the number of cells required to display text.
- """
- if len(text) < 512:
- return _cell_len(text)
- if _is_single_cell_widths(text):
- return len(text)
- return sum(map(get_character_cell_size, text))
- @lru_cache(maxsize=4096)
- def get_character_cell_size(character: str) -> int:
- """Get the cell size of a character.
- Args:
- character (str): A single character.
- Returns:
- int: Number of cells (0, 1 or 2) occupied by that character.
- """
- codepoint = ord(character)
- _table = CELL_WIDTHS
- lower_bound = 0
- upper_bound = len(_table) - 1
- index = (lower_bound + upper_bound) // 2
- while True:
- start, end, width = _table[index]
- if codepoint < start:
- upper_bound = index - 1
- elif codepoint > end:
- lower_bound = index + 1
- else:
- return 0 if width == -1 else width
- if upper_bound < lower_bound:
- break
- index = (lower_bound + upper_bound) // 2
- return 1
- def set_cell_size(text: str, total: int) -> str:
- """Set the length of a string to fit within given number of cells."""
- if _is_single_cell_widths(text):
- size = len(text)
- if size < total:
- return text + " " * (total - size)
- return text[:total]
- if total <= 0:
- return ""
- cell_size = cell_len(text)
- if cell_size == total:
- return text
- if cell_size < total:
- return text + " " * (total - cell_size)
- start = 0
- end = len(text)
- # Binary search until we find the right size
- while True:
- pos = (start + end) // 2
- before = text[: pos + 1]
- before_len = cell_len(before)
- if before_len == total + 1 and cell_len(before[-1]) == 2:
- return before[:-1] + " "
- if before_len == total:
- return before
- if before_len > total:
- end = pos
- else:
- start = pos
- def chop_cells(
- text: str,
- width: int,
- ) -> list[str]:
- """Split text into lines such that each line fits within the available (cell) width.
- Args:
- text: The text to fold such that it fits in the given width.
- width: The width available (number of cells).
- Returns:
- A list of strings such that each string in the list has cell width
- less than or equal to the available width.
- """
- _get_character_cell_size = get_character_cell_size
- lines: list[list[str]] = [[]]
- append_new_line = lines.append
- append_to_last_line = lines[-1].append
- total_width = 0
- for character in text:
- cell_width = _get_character_cell_size(character)
- char_doesnt_fit = total_width + cell_width > width
- if char_doesnt_fit:
- append_new_line([character])
- append_to_last_line = lines[-1].append
- total_width = cell_width
- else:
- append_to_last_line(character)
- total_width += cell_width
- return ["".join(line) for line in lines]
- if __name__ == "__main__": # pragma: no cover
- print(get_character_cell_size("😽"))
- for line in chop_cells("""这是对亚洲语言支持的测试。面对模棱两可的想法,拒绝猜测的诱惑。""", 8):
- print(line)
- for n in range(80, 1, -1):
- print(set_cell_size("""这是对亚洲语言支持的测试。面对模棱两可的想法,拒绝猜测的诱惑。""", n) + "|")
- print("x" * n)
|