
December 4th, 2012, 12:43 PM
|
 |
Contributing User
|
|
|
|
If you use Python3, see if your editor/IDE goes along with this:
Code:
# -*- coding: utf8 -*-
# above needed to show certain characters in comment
#
# unicode characters in Python 3.2
# see:
# http://docs.python.org/3.2/library/unicodedata.html
# http://www.unicode.org/Public/5.1.0/ucd/UCD.html
# a string with \u unicode characters (spanish)
mystr = '\u00bfC\u00f3mo es usted?'
# could also use ...
#mystr = "¿Cómo es usted?"
# encode string to <class 'bytes'> or bytearray
mybytes = mystr.encode("utf8")
# decode <class 'bytes'> to string
mystr2 = mybytes.decode("utf8")
print(mystr) # ¿Cómo es usted?
print(mybytes) # b'\xc2\xbfC\xc3\xb3mo es usted?'
print(mystr2) # ¿Cómo es usted?
mystr3 = "¿Cómo es usted?"
mybytes3 = mystr3.encode("utf8")
print(mybytes3) # b'\xc2\xbfC\xc3\xb3mo es usted?'
# extra ...
import unicodedata
unicode_char = '\u00bf'
# get descriptive name
unicode_name = unicodedata.name(unicode_char)
print(unicode_char) # ¿
print(unicode_name) # INVERTED QUESTION MARK
print(unicodedata.lookup('INVERTED QUESTION MARK')) # ¿
# convert unicode char to "utf-8" byte char
# and back to actual character
pi_u = "\u03C0"
pi_b = pi_u.encode("utf-8")
pi_c = pi_b.decode("utf-8")
eps_u = "\u03B5"
eps_b = eps_u.encode("utf-8")
eps_c = eps_b.decode("utf-8")
mu_u = "\u03BC"
mu_b = mu_u.encode("utf-8")
mu_c = mu_b.decode("utf-8")
print(pi_u, type(pi_u)) # π <class 'str'>
print(pi_b, type(pi_b)) # b'\xcf\x80' <class 'bytes'>
print(pi_c, type(pi_c)) # π <class 'str'>
print( pi_u, eps_u, mu_u ) # π ε μ
print( pi_b, eps_b, mu_b ) # b'\xcf\x80' b'\xce\xb5' b'\xce\xbc'
print( pi_c, eps_c, mu_c ) # π ε μ
print( unicodedata.name(pi_u) ) # GREEK SMALL LETTER PI
print( unicodedata.name(mu_u) ) # GREEK SMALL LETTER MU
__________________
Real Programmers always confuse Christmas and Halloween because Oct31 == Dec25
|