Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
## Version 6.4.0 (unreleased)

- Enhanced `monospaced_width()` to support emoji ZWJ sequences, flags, variation selectors,
skin tones, and many more languages and terminal sequences, like OSC 8 hyperlinks. It no longer
returns -1 for control characters; instead they are parsed (eg. BACKSPACE, `\b`) for their
horizontal effects, or otherwise ignored, (eg. BEL, `\a`).
- `display_ljust/rjust/center()` delegate to directly to `wcwidth.ljust()/..` for the same.
- Updated `display_center()` to match standard python `str.center()` "parity-odd" spacing.

## Version 6.3.1 (October 25, 2024)

- Fixed `license` metadata field in pyproject.toml.
Expand Down
53 changes: 15 additions & 38 deletions ftfy/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@
the 'wcwidth' library.
"""

from unicodedata import normalize

from wcwidth import wcswidth, wcwidth

from ftfy.fixes import remove_terminal_escapes
import wcwidth


def character_width(char: str) -> int:
Expand All @@ -31,7 +27,7 @@ def character_width(char: str) -> int:
>>> character_width('\n')
-1
"""
return int(wcwidth(char))
return wcwidth.wcwidth(char)
Copy link
Author

@jquast jquast Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the int() wrapper was probably needed from an earlier version of wcwidth that was not typed, but it is typed now



def monospaced_width(text: str) -> int:
Expand All @@ -43,16 +39,12 @@ def monospaced_width(text: str) -> int:
This can be useful for formatting text that may contain non-spacing
characters, or CJK characters that take up two character cells.

Returns -1 if the string contains a non-printable or control character.

>>> monospaced_width('ちゃぶ台返し')
12
>>> len('ちゃぶ台返し')
6
>>> monospaced_width('owl\N{SOFT HYPHEN}flavored')
11
>>> monospaced_width('example\x80')
-1
12
Copy link
Author

@jquast jquast Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Related to SOFT HYPHEN: #226 (comment)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'example\x80' now returns width 7 instead of -1, but removed it anyway


A more complex example: The Korean word 'ibnida' can be written with 3
pre-composed characters or 7 jamo. Either way, it *looks* the same and
Expand All @@ -67,13 +59,16 @@ def monospaced_width(text: str) -> int:
4 characters, when shown as intended.
>>> monospaced_width('\x1b[34mblue\x1b[m')
4

Emoji ZWJ sequences are treated as single grapheme clusters with width 2.
>>> monospaced_width('👨‍👩‍👧')
2

Control characters are parsed and treated as zero-width.
>>> monospaced_width('example\x80')
7
"""
# NFC-normalize the text first, so that we don't need special cases for
# Hangul jamo.
#
# Remove terminal escapes before calculating width, because if they are
# displayed as intended, they will have zero width.
return int(wcswidth(remove_terminal_escapes(normalize("NFC", text))))
return wcwidth.width(text, control_codes="parse")


def display_ljust(text: str, width: int, fillchar: str = " ") -> str:
Expand Down Expand Up @@ -102,13 +97,7 @@ def display_ljust(text: str, width: int, fillchar: str = " ") -> str:
msg = "The padding character must have display width 1"
raise ValueError(msg)

text_width = monospaced_width(text)
if text_width == -1:
# There's a control character here, so just don't add padding
return text

padding = max(0, width - text_width)
return text + fillchar * padding
return wcwidth.ljust(text, width, fillchar=fillchar)


def display_rjust(text: str, width: int, fillchar: str = " ") -> str:
Expand All @@ -133,12 +122,7 @@ def display_rjust(text: str, width: int, fillchar: str = " ") -> str:
msg = "The padding character must have display width 1"
raise ValueError(msg)

text_width = monospaced_width(text)
if text_width == -1:
return text

padding = max(0, width - text_width)
return fillchar * padding + text
return wcwidth.rjust(text, width, fillchar=fillchar)


def display_center(text: str, width: int, fillchar: str = " ") -> str:
Expand All @@ -159,11 +143,4 @@ def display_center(text: str, width: int, fillchar: str = " ") -> str:
msg = "The padding character must have display width 1"
raise ValueError(msg)

text_width = monospaced_width(text)
if text_width == -1:
return text

padding = max(0, width - text_width)
left_padding = padding // 2
right_padding = padding - left_padding
return fillchar * left_padding + text + fillchar * right_padding
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there was a bug here, see https://jazcap53.github.io/pythons-eccentric-strcenter.html

but you never would have guessed, i made the same mistake jquast/wcwidth#188

return wcwidth.center(text, width, fillchar=fillchar)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description = "Fixes mojibake and other problems with Unicode, after the fact"
authors = [{ name = "Robyn Speer", email = "rspeer@arborelia.net" }]
license = { text = "Apache-2.0" }
readme = "README.md"
dependencies = ["wcwidth"]
dependencies = ["wcwidth>=0.4"]
requires-python = ">=3.9"

[project.scripts]
Expand Down
80 changes: 80 additions & 0 deletions tests/test_formatting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import pytest

from ftfy.formatting import (
character_width,
display_center,
display_ljust,
display_rjust,
monospaced_width,
)


class TestMonospacedWidth:
def test_ascii_and_cjk(self):
assert monospaced_width("hello") == 5
assert monospaced_width("中文") == 4
assert monospaced_width("ちゃぶ台返し") == 12
assert monospaced_width("Hello 中文 👍") == 13

def test_grapheme_clusters(self):
assert monospaced_width("cafe\u0301") == 4
assert monospaced_width("\u200d") == 0
assert monospaced_width("👨‍👩‍👧") == 2
assert monospaced_width("👩🏻‍💻") == 2
assert monospaced_width("🇨🇦") == 2
assert monospaced_width("❤️") == 2

def test_ansi_escape_sequences(self):
assert monospaced_width("\x1b[31mred\x1b[0m") == 3
assert monospaced_width("\x1b[34mblue\x1b[m") == 4
assert monospaced_width("\x1b[31;1mBold Red\x1b[0m") == 8

def test_osc8_hyperlinks(self):
assert monospaced_width("\x1b]8;;https://example.com\x07Click here\x1b]8;;\x07") == 10
assert monospaced_width(
"\x1b]8;;https://example.com\x07\x1b[34mBlue Link\x1b[0m\x1b]8;;\x07"
) == 9

def test_control_characters(self):
assert monospaced_width("example\x80") == 7
assert monospaced_width("aaa\b\b\bxxx") == 3
assert monospaced_width("hello\b\bXX") == 5


class TestCharacterWidth:
def test_character_widths(self):
assert character_width("A") == 1
assert character_width("車") == 2
assert character_width("\N{ZERO WIDTH JOINER}") == 0
assert character_width("\n") == -1


class TestDisplayJustify:
def test_ljust(self):
assert display_ljust("hello", 10) == "hello "
assert display_ljust("中", 4) == "中 "
assert display_ljust("👍", 4) == "👍 "
assert display_ljust("hello", 3) == "hello"
assert display_ljust("hi", 5, ".") == "hi..."

def test_rjust(self):
assert display_rjust("hello", 10) == " hello"
assert display_rjust("中", 4) == " 中"
assert display_rjust("👍", 4) == " 👍"

def test_center(self):
assert display_center("hi", 6) == " hi "
assert display_center("中", 6) == " 中 "
assert display_center("hi", 5) == " hi "

def test_invalid_fillchar(self):
with pytest.raises(ValueError, match="display width 1"):
display_ljust("hi", 10, "中")
with pytest.raises(ValueError, match="display width 1"):
display_ljust("hi", 10, "\u200d")
with pytest.raises(ValueError, match="display width 1"):
display_rjust("hi", 10, "中")
with pytest.raises(ValueError, match="display width 1"):
display_center("hi", 10, "中")