#!/usr/bin/env python
#
# lookup a japanese word in wwwjdic and display it in a gtk2 window
#
# this code is released under the "I don't care" license, until i figure out how
# to properly license code
import gtk, re, string, sys, urllib
# TODO Make this work
encoding = 'utf8'
# convert character encodings to euc_jp
def convert(word):
# gtk->xclip produces this. it seems to be shell-escaped unicode hex
if word.find('\\x{') == 0:
start = '\\x{'
end = '}'
utf16_str = ''
while word.find(end) != -1:
hex_str = word[word.find(start) + len(start) : word.find(end)]
utf16_str += hex_str.decode('hex').decode('utf_16be')
word = word[(word.find(end) + len(end)):]
word = utf16_str.encode(encoding)
# jis_7 (iso-2022). mlterm produces this
elif word.find('\x1b') == 0:
word = word.decode('jis_7').encode(encoding)
return word
# lookup entry
def lookup(word):
# properly encode word for a url
params = urllib.urlencode( { '1MDJ' + word : None } )
url = 'http://etext.lib.virginia.edu/cgi-local/breen/wwwjdic?' + params
print url
return urllib.urlopen(url).read()
# parse data
def parse(data):
# remove newlines
data = re.sub(re.compile('\n', re.S), '', data)
# watch out for this
match = re.search('(ERROR[^:]*:\s*[^<]*?)\s*<', data)
if match:
return ('', [ match.group(1) ] )
# grab search key
search_key = re.search('Search Key: ([^<]*)', data).group(1)
# cull top and bottom
data = re.sub('^.*?Search Key', '', data)
data = re.sub('
.*$', '', data)
# grab data from each result line
results = []
pattern = re.compile(']*>([^<]*)', re.I)
for match in re.finditer(pattern, data):
results.append(match.group(1))
# indicate whether there are more matches
if re.search('Further matches to display', data):
results.append('...')
# *toss*
return (search_key, results)
# display output
def display((search_key, results)):
# search_key tells us if the results are valid or not
if search_key:
# convert encodings, join the list
search_key = unicode(search_key, encoding)
results = string.join(map(lambda a: unicode(a, encoding), results),'\n')
# create the string
title = search_key
text = 'Search Key: %s\n%s' % (search_key, results)
# oops. invalid results. just shove stuff through
else:
title = 'Error'
text = string.join(results, '\n')
# actually show the window
show(title, text)
# show some text in a gtk2 TextView
def show(title, text):
# create some important stuff
window = gtk.Window() # can't find any good window types
window.connect('destroy', gtk.mainquit)
window.set_title('jlookup - ' + title)
view = gtk.TextView()
view.set_editable(False)
view.set_cursor_visible(False)
view.set_left_margin(5)
view.set_pixels_above_lines(1)
view.set_pixels_below_lines(1)
view.set_right_margin(5)
buffer = view.get_buffer()
# put the string in the buffer
buffer.insert(buffer.get_end_iter(), text)
# enlarge the text
pango_scale = 1024
font_size = 12
big_tag = buffer.create_tag('big')
big_tag.set_property('size', pango_scale * font_size)
for tag in [ big_tag ]:
buffer.apply_tag(tag, buffer.get_start_iter(), buffer.get_end_iter())
#view.set_wrap_mode(gtk.WRAP_WORD)
#view.set_size_request(800, 600)
# setup escape to close the window
#
# LDFJ; ASLIUF EWAOI;U9OU 3WQORU ASOIDPFJ ASLIDJF LKAWJ 3RLKJ ASDF
# this is frustrating as hell. i give up. i'll just use my damn wm
#
#accel_group = gtk.AccelGroup()
#accel_group.connect('destroy', lambda: gtk.gdk.SHIFT_MASK,
#gtk.ACCEL_VISIBLE, lambda : 0)
#accel_group.connect(gtk.gdk.keyval_from_name('e'), window.destroy)
#window.add_accel_group(accel_group)
# pack, show, and loop
window.add(view)
view.show()
window.show()
gtk.main()
# a little error-checking
try:
# did we get anything?
if len(sys.argv) == 1:
raise Exception('input empty')
# the program
display(parse(lookup(convert(sys.argv[1]))))
except Exception, e:
show('Error', str(e))