#! /usr/bin/env python import sys, string, codecs, os # TODO: currently the 78 chars are *without* the quotes, while for Gettext it is *with* the quotes # FIXME: it seems possible to get lines bigger than 80 characters. max_length = 78 wrap_before = ['<h1>', '<h2>', '<h3>', '<h4>', '<h5>', '<h6>', '<p>', '<br>', '<br/>', '<ol>', '<ul>', '<li>', '<table>', '<th>', '<tr>', '<td>', '<center>', '<blockquote>', '<pre>', '<hr>', '<hr/>'] ### TODO: try to support any charset, not only UTF-8 (so that it can be used outside KDE) def splitit( start, message, outfile ): # print start+"\""+message+"\"" # DEBUG if len(start): if len(message) + len(start) < max_length and \ string.find(message, '\\n') == -1: outstr = '%s"%s"\n' % (start, message) outfile.write(outstr.encode('utf-8')) return outfile.write(start) outfile.write(u'""\n') index = 0 mlen = len(message) last_brace = 0 last_space = 0 last_comma = 0 while index < mlen: if message[index] == r'n' and (index > 0 and message[index-1] == '\\') \ and (index < 2 or message[index-2] != '\\'): outstr = '"%s"\n' % message[:index+1] outfile.write(outstr.encode('utf-8')) message = message[index+1:] mlen -= index + 1 index = 0 last_brace = 0 last_space = 0 last_comma = 0 continue elif message[index] == u'>': last_brace = index elif message[index] == u' ': last_space = index elif message[index] == u',': last_comma = index elif message[index] == u'<': for s in wrap_before: if index > 0 and message[index:].startswith(s): outstr = '"%s"\n' % message[:index] outfile.write(outstr.encode('utf-8')) message = message[index:] mlen -= index index = 0 last_brace = 0 last_space = 0 last_comma = 0 continue if index > max_length: if last_brace > 50: index = last_brace while index < mlen - 1 and message[index+1] == ' ': index += 1 elif last_space != 0: index = last_space elif last_comma != 0: index = last_comma else: while index > 0 and message[index] == u'\\': index = index - 1 outstr = '"%s"\n' % message[:index+1] outfile.write(outstr.encode('utf-8')) message = message[index+1:] mlen -= index + 1 index = 0 last_brace = 0 last_space = 0 last_comma = 0 continue index += 1 if len(message): outstr = '"%s"\n' % message outfile.write(outstr.encode('utf-8')) if sys.hexversion >= 0x02030000: # We have Python 2.3 or better open_type="rU" # Open for read with "Universal Newline Support" else: # We have a Python older than 2.3 open_type="r" # Normal open for read ### TODO: even in the case of a parse error, the script could try to process the next file(s) instead of exiting. for file in sys.argv[1:]: orig_file = open(file, open_type) new_file = open(file + ".new", 'w') last='' start='' index=0 line=' ' while 1: # python 2.1 has no True ;) line = orig_file.readline() index += 1 if not line: break if line == '\n' or line[0] == '#': splitit(start, last, new_file) start = '' last = '' new_file.write(line) continue try: line = string.strip(unicode(line, 'utf-8')) except UnicodeError: print file if line[0] == '"' and line[-1:] == '"': last += line[1:-1] continue # new message splitit(start, last, new_file) if line.startswith("msgid "): start = "msgid " last = string.lstrip(line[6:-1])[1:] elif line.startswith("msgstr "): start = "msgstr " last = string.lstrip(line[7:-1])[1:] elif line.startswith("msgctxt "): start = "msgctxt " last = string.lstrip(line[8:-1])[1:] elif line.startswith("msgid_plural "): start = "msgid_plural " last = string.lstrip(line[13:-1])[1:] elif line.startswith("msgstr["): # For most languages, there will be only one digit if line[8] == "]" and line[9] == " ": if line[7].isdigit(): start = line[:10] last = string.lstrip(line[10:-1])[1:] else: print file, "not-a-digit error for mgstr[] in line", index orig_file.close() new_file.close() sys.exit(1) else: posdigit = 7 # The first digit is at position 7 while line[posdigit].isdigit(): posdigit += 1 if posdigit > 7 and line[posdigit] == "]" and line[posdigit+1] == " ": posdigit += 2 # skip ] and the space start = line[:posdigit] last = string.lstrip(line[posdigit:-1])[1:] else: print file, "parse error after msgstr[ in line", index orig_file.close() new_file.close() sys.exit(1) else: print file, "parsing error in line", index orig_file.close() new_file.close() sys.exit(1) splitit(start, last, new_file) orig_file.close() new_file.close() os.rename(file + ".new", file) # kate: space-indent off; indent-width 8; replace-tabs off;