How to use ftfy - 10 common examples

To help you get started, we’ve selected a few ftfy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

mholtzscher / spacy_readability / tests / test_books.py View on Github

def test_dale_chall(text, expected, nlp):  text = ftfy.fix_text(text) text = " ".join(text.split()) doc = nlp(text) assert pytest.approx(expected, rel=1e-2) == doc._.dale_chall

mholtzscher / spacy_readability / tests / test_books.py View on Github

def test_linsear_write(text, expected, nlp):  text = ftfy.fix_text(text) text = " ".join(text.split()) doc = nlp(text) assert pytest.approx(expected, rel=1e-2) == doc._.linsear_write

LuminosoInsight / python-ftfy / tests / test_entities.py View on Github

def test_entities(): example = '&amp;\n\n&amp;' assert fix_text(example) == '&amp;\n\n&amp;' assert fix_text_segment(example) == '&amp;\n\n&amp;' assert fix_text(example, fix_entities=True) == '&amp;\n\n&amp;' assert fix_text_segment(example, fix_entities=True) == '&amp;\n\n&amp;'  assert fix_text(example, fix_entities=False) == '&amp;\n\n&amp;' assert fix_text_segment(example, fix_entities=False) == '&amp;\n\n&amp;' assert fix_text_segment('&lt;&gt;', fix_entities=False) == '&lt;&gt;' assert fix_text_segment('&lt;&gt;', fix_entities=True) == '&lt;&gt;' assert fix_text_segment('&lt;&gt;') == '&lt;&gt;' assert fix_text_segment('jednocześnie') == 'jednocześnie' assert fix_text_segment('JEDNOCZEŚNIE') == 'JEDNOCZEŚNIE' assert fix_text_segment('ellipsis…', normalization='NFKC') == 'ellipsis...' assert fix_text_segment('ellipsis…', normalization='NFKC') == 'ellipsis...' assert fix_text_segment('broken') == 'broken\x81' assert unescape_html('euro €') == 'euro €' assert unescape_html('not an entity x6;') == 'not an entity x6;'

LuminosoInsight / python-ftfy / tests / test_entities.py View on Github

assert fix_text(example, fix_entities=True) == '&amp;\n\n&amp;' assert fix_text_segment(example, fix_entities=True) == '&amp;\n\n&amp;' assert fix_text(example, fix_entities=False) == '&amp;\n\n&amp;' assert fix_text_segment(example, fix_entities=False) == '&amp;\n\n&amp;' assert fix_text_segment('&lt;&gt;', fix_entities=False) == '&lt;&gt;' assert fix_text_segment('&lt;&gt;', fix_entities=True) == '&lt;&gt;' assert fix_text_segment('&lt;&gt;') == '&lt;&gt;' assert fix_text_segment('jednocześnie') == 'jednocześnie' assert fix_text_segment('JEDNOCZEŚNIE') == 'JEDNOCZEŚNIE' assert fix_text_segment('ellipsis…', normalization='NFKC') == 'ellipsis...' assert fix_text_segment('ellipsis…', normalization='NFKC') == 'ellipsis...' assert fix_text_segment('broken') == 'broken\x81' assert unescape_html('euro €') == 'euro €'  assert unescape_html('not an entity x6;') == 'not an entity x6;'

LuminosoInsight / python-ftfy / ftfy / streamtester / __init__.py View on Github

def check_ftfy(self, text, encoding_only=True): """ Given a single text input, check whether `ftfy.fix_text_encoding` would change it. If so, display the change. """ self.count += 1  text = unescape_html(text) if not possible_encoding(text, 'ascii'): if encoding_only: fixed = fix_encoding(text) else: fixed = fix_text(text, uncurl_quotes=False, fix_character_width=False) if text != fixed: # possibly filter common bots before printing print('\nText:\t{text!r}\nFixed:\t{fixed!r}\n'.format( text=text, fixed=fixed )) self.num_fixed += 1 elif 'â€' in text or '\x80' in text: print('\nNot fixed:\t{text!r}'.format(text=text)) # Print status updates once in a while if self.count % 100 == 0:

LuminosoInsight / python-ftfy / tests / test_characters.py View on Github

def test_surrogates(): assert fix_surrogates('\udbff\udfff') == '\U0010ffff'  assert fix_surrogates('\ud800\udc00') == '\U00010000'

LuminosoInsight / python-ftfy / tests / test_characters.py View on Github

def test_surrogates():  assert fix_surrogates('\udbff\udfff') == '\U0010ffff' assert fix_surrogates('\ud800\udc00') == '\U00010000'

LuminosoInsight / python-ftfy / ftfy / streamtester / __init__.py View on Github

def check_ftfy(self, text, encoding_only=True): """ Given a single text input, check whether `ftfy.fix_text_encoding` would change it. If so, display the change. """ self.count += 1 text = unescape_html(text) if not possible_encoding(text, 'ascii'): if encoding_only:  fixed = fix_encoding(text) else: fixed = fix_text(text, uncurl_quotes=False, fix_character_width=False) if text != fixed: # possibly filter common bots before printing print('\nText:\t{text!r}\nFixed:\t{fixed!r}\n'.format( text=text, fixed=fixed )) self.num_fixed += 1 elif 'â€' in text or '\x80' in text: print('\nNot fixed:\t{text!r}'.format(text=text)) # Print status updates once in a while if self.count % 100 == 0: print('.', end='', flush=True) if self.count % 10000 == 0: print('\n%d/%d fixed' % (self.num_fixed, self.count))

LuminosoInsight / python-ftfy / tests / test_futuristic_codepoints.py View on Github

def test_unknown_emoji(): # The range we accept as emoji has gotten larger. Let's make sure we can # decode the futuristic emoji U+1F960, which will probably be a picture of # a fortune cookie in Unicode 10.0: emoji_text = "\U0001f960 I see emoji in your future" emojibake = emoji_text.encode('utf-8').decode('windows-1252') assert fix_encoding(emojibake) == emoji_text # We believe enough in the future of this codepoint that we'll even # recognize it with a mangled byte A0 emojibake = emojibake.replace('\xa0', ' ')  assert fix_encoding(emojibake) == emoji_text # Increment the first byte to get a very similar test case, but a # codepoint that will definitely not exist anytime soon. In this case, # we consider the existing text, "ñŸ¥\xa0", to be more probable. not_emoji = "\U0005f960 I see mojibake in your present".encode('utf-8').decode('windows-1252') assert fix_encoding(not_emoji) == not_emoji

LuminosoInsight / python-ftfy / tests / test_futuristic_codepoints.py View on Github

def test_unknown_emoji(): # The range we accept as emoji has gotten larger. Let's make sure we can # decode the futuristic emoji U+1F960, which will probably be a picture of # a fortune cookie in Unicode 10.0: emoji_text = "\U0001f960 I see emoji in your future" emojibake = emoji_text.encode('utf-8').decode('windows-1252')  assert fix_encoding(emojibake) == emoji_text # We believe enough in the future of this codepoint that we'll even # recognize it with a mangled byte A0 emojibake = emojibake.replace('\xa0', ' ') assert fix_encoding(emojibake) == emoji_text # Increment the first byte to get a very similar test case, but a # codepoint that will definitely not exist anytime soon. In this case, # we consider the existing text, "ñŸ¥\xa0", to be more probable. not_emoji = "\U0005f960 I see mojibake in your present".encode('utf-8').decode('windows-1252') assert fix_encoding(not_emoji) == not_emoji

How to use ftfy - 10 common examples

To help you get started, we’ve selected a few ftfy examples, based on popular ways it is used in public projects.

ftfy

Package Health Score

Popular ftfy functions

Similar packages