From 8ff57c5803152c619f88e44ffded28540a289d44 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 7 Nov 2018 21:54:34 +0100 Subject: [PATCH] Do not display control characters in output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kudos to Sherry Taylor for reporting this issue ♥ --- mat2 | 10 ++++++++++ tests/test_climat2.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/mat2 b/mat2 index a36f62d..351d97b 100755 --- a/mat2 +++ b/mat2 @@ -6,6 +6,7 @@ import sys import mimetypes import argparse import logging +import unicodedata try: from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS @@ -83,6 +84,15 @@ def __print_meta(filename: str, metadata: dict, depth: int=1): if isinstance(v, dict): __print_meta(k, v, depth+1) continue + + # Remove control characters + # We might use 'Cc' instead of 'C', but better safe than sorry + # https://www.unicode.org/reports/tr44/#GC_Values_Table + try: + v = ''.join(ch for ch in v if not unicodedata.category(ch).startswith('C')) + except TypeError: + pass # for things that aren't iterable + try: # FIXME this is ugly. print(padding + " %s: %s" % (k, v)) except UnicodeEncodeError: diff --git a/tests/test_climat2.py b/tests/test_climat2.py index dd7c9b9..53e4f5b 100644 --- a/tests/test_climat2.py +++ b/tests/test_climat2.py @@ -121,7 +121,7 @@ class TestGetMeta(unittest.TestCase): proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.pdf'], stdout=subprocess.PIPE) stdout, _ = proc.communicate() - self.assertIn(b'producer: pdfTeX-1.40.14', stdout) + self.assertIn(b'Producer: pdfTeX-1.40.14', stdout) def test_png(self): proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.png'],