#!/usr/bin/env python # # lookup a japanese word in wwwjdic and display it in a gtk2 window # # this code is released under the "I don't care" license, until i figure out how # to properly license code import gtk, re, string, sys, urllib # TODO Make this work encoding = 'utf8' # convert character encodings to euc_jp def convert(word): # gtk->xclip produces this. it seems to be shell-escaped unicode hex if word.find('\\x{') == 0: start = '\\x{' end = '}' utf16_str = '' while word.find(end) != -1: hex_str = word[word.find(start) + len(start) : word.find(end)] utf16_str += hex_str.decode('hex').decode('utf_16be') word = word[(word.find(end) + len(end)):] word = utf16_str.encode(encoding) # jis_7 (iso-2022). mlterm produces this elif word.find('\x1b') == 0: word = word.decode('jis_7').encode(encoding) return word # lookup entry def lookup(word): # properly encode word for a url params = urllib.urlencode( { '1MDJ' + word : None } ) url = 'http://etext.lib.virginia.edu/cgi-local/breen/wwwjdic?' + params print url return urllib.urlopen(url).read() # parse data def parse(data): # remove newlines data = re.sub(re.compile('\n', re.S), '', data) # watch out for this match = re.search('(ERROR[^:]*:\s*[^<]*?)\s*<', data) if match: return ('', [ match.group(1) ] ) # grab search key search_key = re.search('Search Key: ([^<]*)', data).group(1) # cull top and bottom data = re.sub('^.*?Search Key', '', data) data = re.sub('
.*$', '', data) # grab data from each result line results = [] pattern = re.compile(']*>([^<]*)', re.I) for match in re.finditer(pattern, data): results.append(match.group(1)) # indicate whether there are more matches if re.search('Further matches to display', data): results.append('...') # *toss* return (search_key, results) # display output def display((search_key, results)): # search_key tells us if the results are valid or not if search_key: # convert encodings, join the list search_key = unicode(search_key, encoding) results = string.join(map(lambda a: unicode(a, encoding), results),'\n') # create the string title = search_key text = 'Search Key: %s\n%s' % (search_key, results) # oops. invalid results. just shove stuff through else: title = 'Error' text = string.join(results, '\n') # actually show the window show(title, text) # show some text in a gtk2 TextView def show(title, text): # create some important stuff window = gtk.Window() # can't find any good window types window.connect('destroy', gtk.mainquit) window.set_title('jlookup - ' + title) view = gtk.TextView() view.set_editable(False) view.set_cursor_visible(False) view.set_left_margin(5) view.set_pixels_above_lines(1) view.set_pixels_below_lines(1) view.set_right_margin(5) buffer = view.get_buffer() # put the string in the buffer buffer.insert(buffer.get_end_iter(), text) # enlarge the text pango_scale = 1024 font_size = 12 big_tag = buffer.create_tag('big') big_tag.set_property('size', pango_scale * font_size) for tag in [ big_tag ]: buffer.apply_tag(tag, buffer.get_start_iter(), buffer.get_end_iter()) #view.set_wrap_mode(gtk.WRAP_WORD) #view.set_size_request(800, 600) # setup escape to close the window # # LDFJ; ASLIUF EWAOI;U9OU 3WQORU ASOIDPFJ ASLIDJF LKAWJ 3RLKJ ASDF # this is frustrating as hell. i give up. i'll just use my damn wm # #accel_group = gtk.AccelGroup() #accel_group.connect('destroy', lambda: gtk.gdk.SHIFT_MASK, #gtk.ACCEL_VISIBLE, lambda : 0) #accel_group.connect(gtk.gdk.keyval_from_name('e'), window.destroy) #window.add_accel_group(accel_group) # pack, show, and loop window.add(view) view.show() window.show() gtk.main() # a little error-checking try: # did we get anything? if len(sys.argv) == 1: raise Exception('input empty') # the program display(parse(lookup(convert(sys.argv[1])))) except Exception, e: show('Error', str(e))