summaryrefslogtreecommitdiff
diff options
authorRobert C Jennings <robert.jennings@canonical.com>2017-06-27 12:56:00 -0500
committerRobert C Jennings <robert.jennings@canonical.com>2017-06-27 12:56:00 -0500
commitd93ddf9c4e050d0011e2788f9af3ec4dc6ce7244 (patch)
tree06b4a542d6cafdc3388ca731a3d6a56d443f22a5
parentfe0f5924c2b8ec5b51ed1b44d46dee9314aef36b (diff)
Force output to UTF-8 at all times
We want to output UTF-8 at all times to preserve Unicode characters in the changelog blocks. For Python 2 we will wrap sys.stdout with an instance of StreamWriter with our preferred coding. Python3 requires no changes. When writing output to the terminal we get the encoding of the terminal (utf-8 these days). When we redirect or pipe the output of the program it is generally not possible to know what the input encoding of the receiving program is, the encoding when redirecting to a file will be None (Python 2.7) or UTF-8 (Python 3) $ python2.7 -c "import sys; print sys.stdout.encoding" | cat None $ python3.4 -c "import sys; print(sys.stdout.encoding)" | cat UTF-8 Source: https://wiki.python.org/moin/PrintFails#print.2C_write_and_Unicode_in_pre-3.0_Python bzr-revno: 18.2.29
-rwxr-xr-xmfdiff33
1 files changed, 33 insertions, 0 deletions
diff --git a/mfdiff b/mfdiff
index 1fd3ae8..f40d438 100755
--- a/mfdiff
+++ b/mfdiff
@@ -25,6 +25,8 @@ the two versions.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+import codecs
+import locale
import logging
import os
import os.path
@@ -533,6 +535,36 @@ def setup_logging(loglevel=0):
stream=sys.stderr)
+def stdout_force_unicode():
+ """
+ Force output to UTF-8 at all times
+
+ We want to output UTF-8 at all times to preserve Unicode characters
+ in the changelog blocks. For Python 2 we will wrap sys.stdout with
+ an instance of StreamWriter with our preferred coding. Python3 requires
+ no changes.
+
+ When writing output to the terminal we get the encoding of the
+ terminal (utf-8 these days). When we redirect or pipe the output of
+ the program it is generally not possible to know what the input
+ encoding of the receiving program is, the encoding when redirecting
+ to a file will be None (Python 2.7) or UTF-8 (Python 3)
+
+ $ python2.7 -c "import sys; print sys.stdout.encoding" | cat
+ None
+
+ $ python3.4 -c "import sys; print(sys.stdout.encoding)" | cat
+ UTF-8
+
+ Source:
+ https://wiki.python.org/moin/PrintFails#print.2C_write_and_Unicode_in_pre-3.0_Python
+ """
+
+ if sys.version_info[0] < 3:
+ encoding = codecs.getwriter(locale.getpreferredencoding())
+ sys.stdout = encoding(sys.stdout)
+
+
def main():
"""
Given two manifest files for a particular release/arch, find the
@@ -541,6 +573,7 @@ def main():
the two versions.
"""
+ stdout_force_unicode()
options, (arch, release, manifest_from_filename,
manifest_to_filename) = parse_args()