I finally took some time, taught myself a bit of Python, and wrote a script for PN2 that I think is useful. It takes the selected character in the current document and spits out some info about it like ANSI (cp1252) value, Unicode value, Unicode name, UTF8 and UTF16LE representation.
For instance the output for ù is something like this:
=== Character Info ===
ANSI: 249
Unicode: U+00F9
NFD: U+0075,U+0300
Name: LATIN SMALL LETTER U WITH GRAVE
UTF8: xc3,xb9
UTF16LE: xf9,x0
import unicodedata
import codecs
@script("CharInfo", "Text")
def CharInfo():
usage = "No character selected"
editor = scintilla.Scintilla(pn.CurrentDoc())
char = editor.GetText(editor.SelectionStart, editor.SelectionEnd)
pn.AddOutput(chr(13) + '=== Character Info ===' + chr(13))
if len(char) > 0: #first byte
c1 = ord(char[0])
u = c1
if len(char) > 1: #second byte
c2 = ord(char[1])
u = (c1-192)*64+c2-128
if len(char) > 2: #third byte
c3 = ord(char[2])
u = (c1-224)*4096+(c2-128)*64+c3-128
if len(char) > 3: #fourth byte
c4 = ord(char[3])
u = (c1-240)*262144+(c2-128)*4096+(c3-128)*64+c4-128
try:
unichar = unichr(u)
if c1 == ord(unichar): #single byte, check if it's ANSI?
ansistr = str(ord(unichar))
unichar = chr(c1)
test = unichar.decode('cp1252') # convert ANSI to Unicode
u = ord(test)
else: #single byte and it's UTF8
try:
ansistr = str(ord(unichar.encode('cp1252'))) #calculate ANSI value if possible
except UnicodeError:
ansistr = 'none' #there is no ANSI equivalent
pn.AddOutput('ANSI: ' + ansistr + chr(13))
pn.AddOutput('Unicode: ' + 'U+' + hex(u).lstrip('0x').zfill(4).upper() + chr(13))
nfdlist = unicodedata.decomposition(unichr(u)).rsplit(' ')
if len(nfdlist) > 1:
pn.AddOutput('NFD: '+','.join('U+'+c for c in nfdlist) + chr(13))
pn.AddOutput('Name: '+unicodedata.name(unichr(u),'unknown') + chr(13))
utf8str = ",".join(hex(ord(c)).lstrip('0') for c in unichr(u).encode('utf_8'))
utf16lestr = ",".join(hex(ord(c)).lstrip('0') for c in unichr(u).encode('utf_16_le'))
pn.AddOutput('UTF8: '+utf8str + chr(13))
pn.AddOutput('UTF16LE: '+utf16lestr + chr(13))
except ValueError:
pn.AddOutput(usage)
BTW, I couldn't get the code tag to preserve the indents on this forum. I used pre and it worked OK.