<?xml version="1.0" encoding="UTF-8"?>
<!-- generator="bbPress/1.0.3" -->
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom">
	<channel>
		<title>pnotepad.org forums &#187; Topic: Script: Character Info</title>
		<link>http://pnotepad.org/forums/topic/352</link>
		<description>Programmer&#039;s Notepad Forums</description>
		<language>en-US</language>
		<pubDate>Thu, 09 Feb 2012 06:10:43 +0000</pubDate>
		<generator>http://bbpress.org/?v=1.0.3</generator>
		<textInput>
			<title><![CDATA[Search]]></title>
			<description><![CDATA[Search all topics from these forums.]]></description>
			<name>q</name>
			<link>http://pnotepad.org/forums/search.php</link>
		</textInput>
		<atom:link href="http://pnotepad.org/forums/rss/topic/352" rel="self" type="application/rss+xml" />

		<item>
                        <title>Script: Character Info (simon)</title>
			<link>http://pnotepad.org/forums/topic/352#post-1241</link>
			<pubDate>Wed, 12 Mar 2008 09:28:13 +0000</pubDate>
			<dc:creator>simon</dc:creator>
			<guid isPermaLink="false">1241@http://pnotepad.org/forums/</guid>
			<description>&#60;p&#62;Nice, thanks!
&#60;/p&#62;</description>
		</item>
		<item>
                        <title>Script: Character Info (wvdirks)</title>
			<link>http://pnotepad.org/forums/topic/352#post-1239</link>
			<pubDate>Wed, 12 Mar 2008 06:48:35 +0000</pubDate>
			<dc:creator>wvdirks</dc:creator>
			<guid isPermaLink="false">1239@http://pnotepad.org/forums/</guid>
			<description>&#60;p&#62;I finally took some time, taught myself a bit of Python, and wrote a script for PN2 that I think is useful. It takes the selected character in the current document and spits out some info about it like ANSI (cp1252) value, Unicode value, Unicode name, UTF8 and UTF16LE representation. &#60;/p&#62;
&#60;p&#62;For instance the output for ù is something like this:&#60;/p&#62;
&#60;p&#62;=== Character Info ===&#60;br /&#62;
ANSI: 249&#60;br /&#62;
Unicode: U+00F9&#60;br /&#62;
NFD: U+0075,U+0300&#60;br /&#62;
Name: LATIN SMALL LETTER U WITH GRAVE&#60;br /&#62;
UTF8: xc3,xb9&#60;br /&#62;
UTF16LE: xf9,x0&#60;/p&#62;
&#60;pre&#62;

import unicodedata
import codecs

@script(&#34;CharInfo&#34;, &#34;Text&#34;)
def CharInfo():

  usage = &#34;No character selected&#34;
  editor = scintilla.Scintilla(pn.CurrentDoc())
  char = editor.GetText(editor.SelectionStart, editor.SelectionEnd)

  pn.AddOutput(chr(13) + '=== Character Info ===' + chr(13))
  if len(char) &#38;gt; 0: #first byte
    c1 = ord(char[0])
    u = c1
  if len(char) &#38;gt; 1: #second byte
    c2 = ord(char[1])
    u = (c1-192)*64+c2-128
  if len(char) &#38;gt; 2: #third byte
    c3 = ord(char[2])
    u = (c1-224)*4096+(c2-128)*64+c3-128
  if len(char) &#38;gt; 3: #fourth byte
    c4 = ord(char[3])
    u = (c1-240)*262144+(c2-128)*4096+(c3-128)*64+c4-128
  try:
    unichar = unichr(u)
    if c1 == ord(unichar): #single byte, check if it's ANSI?
      ansistr = str(ord(unichar))
      unichar = chr(c1)
      test = unichar.decode('cp1252') # convert ANSI to Unicode
      u = ord(test)
    else: #single byte and it's UTF8
      try:
        ansistr = str(ord(unichar.encode('cp1252'))) #calculate ANSI value if possible
      except UnicodeError:
        ansistr = 'none' #there is no ANSI equivalent

    pn.AddOutput('ANSI: ' + ansistr + chr(13))
    pn.AddOutput('Unicode: ' + 'U+' + hex(u).lstrip('0x').zfill(4).upper() + chr(13))
    nfdlist = unicodedata.decomposition(unichr(u)).rsplit(' ')
    if len(nfdlist) &#38;gt; 1:
      pn.AddOutput('NFD: '+','.join('U+'+c for c in nfdlist) + chr(13))
    pn.AddOutput('Name: '+unicodedata.name(unichr(u),'unknown') + chr(13))
    utf8str = &#34;,&#34;.join(hex(ord(c)).lstrip('0') for c in unichr(u).encode('utf_8'))
    utf16lestr = &#34;,&#34;.join(hex(ord(c)).lstrip('0') for c in unichr(u).encode('utf_16_le'))
    pn.AddOutput('UTF8: '+utf8str + chr(13))
    pn.AddOutput('UTF16LE: '+utf16lestr + chr(13))
  except ValueError:
    pn.AddOutput(usage)&#60;/pre&#62;
&#60;p&#62;BTW, I couldn't get the code tag to preserve the indents on this forum. I used pre and it worked OK.
&#60;/p&#62;</description>
		</item>

	</channel>
</rss>

