diff --git a/genChRanges.py b/genChRanges.py index 26176bd5bb025822fd72b764b8364203e15d04cc_Z2VuQ2hSYW5nZXMucHk=..265a979b5b493db59933a6ec88522baec2e02fa8_Z2VuQ2hSYW5nZXMucHk= 100755 --- a/genChRanges.py +++ b/genChRanges.py @@ -16,7 +16,6 @@ # import sys -import string import time # @@ -28,17 +27,17 @@ ret = [] pos = 0 while pos < len(lst): - try: # index generates exception if not present - s = lst[pos:].index(1) # look for start of next range - except: - break # if no more, finished - pos += s # pointer to start of possible range - try: - e = lst[pos:].index(0) # look for end of range - e += pos - except: # if no end, set to end of list - e = len(lst) - ret.append((pos, e-1)) # append range tuple to list - pos = e + 1 # ready to check for next range + try: # index generates exception if not present + s = lst[pos:].index(1) # look for start of next range + except: + break # if no more, finished + pos += s # pointer to start of possible range + try: + e = lst[pos:].index(0) # look for end of range + e += pos + except: # if no end, set to end of list + e = len(lst) + ret.append((pos, e-1)) # append range tuple to list + pos = e + 1 # ready to check for next range return ret @@ -43,6 +42,6 @@ return ret -sources = "chvalid.def" # input filename +sources = "chvalid.def" # input filename # minTableSize gives the minimum number of ranges which must be present # before a 256-byte lookup table is produced. If there are less than this @@ -57,9 +56,9 @@ try: defines = open("chvalid.def", "r") except: - print "Missing chvalid.def, aborting ..." + print("Missing chvalid.def, aborting ...") sys.exit(1) # # The lines in the .def file have three types:- # name: Defines a new function block @@ -61,9 +60,9 @@ sys.exit(1) # # The lines in the .def file have three types:- # name: Defines a new function block -# ur: Defines individual or ranges of unicode values +# ur: Defines individual or ranges of unicode values # end: Indicates the end of the function block # # These lines are processed below. @@ -72,8 +71,8 @@ # ignore blank lines, or lines beginning with '#' if line[0] == '#': continue - line = string.strip(line) + line = line.strip() if line == '': continue # split line into space-separated fields, then split on type try: @@ -76,38 +75,38 @@ if line == '': continue # split line into space-separated fields, then split on type try: - fields = string.split(line, ' ') - # - # name line: - # validate any previous function block already ended - # validate this function not already defined - # initialize an entry in the function dicitonary - # including a mask table with no values yet defined - # - if fields[0] == 'name': - name = fields[1] - if state != 0: - print "'name' %s found before previous name" \ - "completed" % (fields[1]) - continue - state = 1 - if Functs.has_key(name): - print "name '%s' already present - may give" \ - " wrong results" % (name) - else: - # dict entry with two list elements (chdata, rangedata) - Functs[name] = [ [], [] ] - for v in range(256): - Functs[name][0].append(0) - # - # end line: - # validate there was a preceding function name line - # set state to show no current function active - # - elif fields[0] == 'end': - if state == 0: - print "'end' found outside of function block" - continue - state = 0 + fields = line.split(' ') + # + # name line: + # validate any previous function block already ended + # validate this function not already defined + # initialize an entry in the function dicitonary + # including a mask table with no values yet defined + # + if fields[0] == 'name': + name = fields[1] + if state != 0: + print("'name' %s found before previous name" \ + "completed" % (fields[1])) + continue + state = 1 + if name in Functs: + print("name '%s' already present - may give" \ + " wrong results" % (name)) + else: + # dict entry with two list elements (chdata, rangedata) + Functs[name] = [ [], [] ] + for v in range(256): + Functs[name][0].append(0) + # + # end line: + # validate there was a preceding function name line + # set state to show no current function active + # + elif fields[0] == 'end': + if state == 0: + print("'end' found outside of function block") + continue + state = 0 @@ -113,65 +112,65 @@ - # - # ur line: - # validate function has been defined - # process remaining fields on the line, which may be either - # individual unicode values or ranges of values - # - elif fields[0] == 'ur': - if state != 1: - raise ValidationError, "'ur' found outside of 'name' block" - for el in fields[1:]: - pos = string.find(el, '..') - # pos <=0 means not a range, so must be individual value - if pos <= 0: - # cheap handling of hex or decimal values - if el[0:2] == '0x': - value = int(el[2:],16) - elif el[0] == "'": - value = ord(el[1]) - else: - value = int(el) - if ((value < 0) | (value > 0x1fffff)): - raise ValidationError, 'Illegal value (%s) in ch for'\ - ' name %s' % (el,name) - # for ur we have only ranges (makes things simpler), - # so convert val to range - currange = (value, value) - # pos > 0 means this is a range, so isolate/validate - # the interval - else: - # split the range into it's first-val, last-val - (first, last) = string.split(el, "..") - # convert values from text into binary - if first[0:2] == '0x': - start = int(first[2:],16) - elif first[0] == "'": - start = ord(first[1]) - else: - start = int(first) - if last[0:2] == '0x': - end = int(last[2:],16) - elif last[0] == "'": - end = ord(last[1]) - else: - end = int(last) - if (start < 0) | (end > 0x1fffff) | (start > end): - raise ValidationError, "Invalid range '%s'" % el - currange = (start, end) - # common path - 'currange' has the range, now take care of it - # We split on single-byte values vs. multibyte - if currange[1] < 0x100: # single-byte - for ch in range(currange[0],currange[1]+1): - # validate that value not previously defined - if Functs[name][0][ch]: - msg = "Duplicate ch value '%s' for name '%s'" % (el, name) - raise ValidationError, msg - Functs[name][0][ch] = 1 - else: # multi-byte - if currange in Functs[name][1]: - raise ValidationError, "range already defined in" \ - " function" - else: - Functs[name][1].append(currange) + # + # ur line: + # validate function has been defined + # process remaining fields on the line, which may be either + # individual unicode values or ranges of values + # + elif fields[0] == 'ur': + if state != 1: + raise Exception("'ur' found outside of 'name' block") + for el in fields[1:]: + pos = el.find('..') + # pos <=0 means not a range, so must be individual value + if pos <= 0: + # cheap handling of hex or decimal values + if el[0:2] == '0x': + value = int(el[2:],16) + elif el[0] == "'": + value = ord(el[1]) + else: + value = int(el) + if ((value < 0) | (value > 0x1fffff)): + raise Exception('Illegal value (%s) in ch for'\ + ' name %s' % (el,name)) + # for ur we have only ranges (makes things simpler), + # so convert val to range + currange = (value, value) + # pos > 0 means this is a range, so isolate/validate + # the interval + else: + # split the range into it's first-val, last-val + (first, last) = el.split("..") + # convert values from text into binary + if first[0:2] == '0x': + start = int(first[2:],16) + elif first[0] == "'": + start = ord(first[1]) + else: + start = int(first) + if last[0:2] == '0x': + end = int(last[2:],16) + elif last[0] == "'": + end = ord(last[1]) + else: + end = int(last) + if (start < 0) | (end > 0x1fffff) | (start > end): + raise Exception("Invalid range '%s'" % el) + currange = (start, end) + # common path - 'currange' has the range, now take care of it + # We split on single-byte values vs. multibyte + if currange[1] < 0x100: # single-byte + for ch in range(currange[0],currange[1]+1): + # validate that value not previously defined + if Functs[name][0][ch]: + msg = "Duplicate ch value '%s' for name '%s'" % (el, name) + raise Exception(msg) + Functs[name][0][ch] = 1 + else: # multi-byte + if currange in Functs[name][1]: + raise Exception("range already defined in" \ + " function") + else: + Functs[name][1].append(currange) except: @@ -176,7 +175,7 @@ except: - print "Failed to process line: %s" % (line) - raise + print("Failed to process line: %s" % (line)) + raise # # At this point, the entire definition file has been processed. Now we # enter the output phase, where we generate the two files chvalid.c and' @@ -194,9 +193,9 @@ try: header = open("include/libxml/chvalid.h", "w") except: - print "Failed to open include/libxml/chvalid.h" + print("Failed to open include/libxml/chvalid.h") sys.exit(1) try: output = open("chvalid.c", "w") except: @@ -198,9 +197,9 @@ sys.exit(1) try: output = open("chvalid.c", "w") except: - print "Failed to open chvalid.c" + print("Failed to open chvalid.c") sys.exit(1) date = time.asctime(time.localtime(time.time())) @@ -236,10 +235,10 @@ typedef struct _xmlChSRange xmlChSRange; typedef xmlChSRange *xmlChSRangePtr; struct _xmlChSRange { - unsigned short low; - unsigned short high; + unsigned short\tlow; + unsigned short\thigh; }; typedef struct _xmlChLRange xmlChLRange; typedef xmlChLRange *xmlChLRangePtr; struct _xmlChLRange { @@ -241,12 +240,12 @@ }; typedef struct _xmlChLRange xmlChLRange; typedef xmlChLRange *xmlChLRangePtr; struct _xmlChLRange { - unsigned int low; - unsigned int high; + unsigned int\tlow; + unsigned int\thigh; }; typedef struct _xmlChRangeGroup xmlChRangeGroup; typedef xmlChRangeGroup *xmlChRangeGroupPtr; struct _xmlChRangeGroup { @@ -248,15 +247,15 @@ }; typedef struct _xmlChRangeGroup xmlChRangeGroup; typedef xmlChRangeGroup *xmlChRangeGroupPtr; struct _xmlChRangeGroup { - int nbShortRange; - int nbLongRange; - const xmlChSRange *shortRange; /* points to an array of ranges */ - const xmlChLRange *longRange; + int\t\t\tnbShortRange; + int\t\t\tnbLongRange; + const xmlChSRange\t*shortRange;\t/* points to an array of ranges */ + const xmlChLRange\t*longRange; }; /** * Range checking routine */ XMLPUBFUN int XMLCALL @@ -257,11 +256,11 @@ }; /** * Range checking routine */ XMLPUBFUN int XMLCALL - xmlCharInRange(unsigned int val, const xmlChRangeGroup *group); +\t\txmlCharInRange(unsigned int val, const xmlChRangeGroup *group); """ % (date, sources)); output.write( """/* @@ -264,9 +263,9 @@ """ % (date, sources)); output.write( """/* - * chvalid.c: this module implements the character range - * validation APIs + * chvalid.c:\tthis module implements the character range + *\t\tvalidation APIs * * This file is automatically generated from the cvs source * definition files using the genChRanges.py Python script @@ -299,8 +298,7 @@ # compares, otherwise we output a 256-byte table and a macro to use it. # -fkeys = Functs.keys() # Dictionary of all defined functions -fkeys.sort() # Put some order to our output +fkeys = sorted(Functs.keys()) for f in fkeys: @@ -308,5 +306,5 @@ # If the total number of such ranges is less than minTableSize, we generate # an inline macro for direct comparisons; if greater, we generate a lookup # table. - if max(Functs[f][0]) > 0: # only check if at least one entry + if max(Functs[f][0]) > 0: # only check if at least one entry rangeTable = makeRange(Functs[f][0]) @@ -312,8 +310,8 @@ rangeTable = makeRange(Functs[f][0]) - numRanges = len(rangeTable) - if numRanges >= minTableSize: # table is worthwhile - header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f) - header.write(""" + numRanges = len(rangeTable) + if numRanges >= minTableSize: # table is worthwhile + header.write("XMLPUBVAR const unsigned char %s_tab[256];\n" % f) + header.write(""" /** * %s_ch: * @c: char to validate @@ -321,5 +319,5 @@ * Automatically generated by genChRanges.py */ """ % f) - header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f)) + header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f)) @@ -325,11 +323,11 @@ - # write the constant data to the code file - output.write("const unsigned char %s_tab[256] = {\n" % f) - pline = " " - for n in range(255): - pline += " 0x%02x," % Functs[f][0][n] - if len(pline) > 72: - output.write(pline + "\n") - pline = " " - output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255]) + # write the constant data to the code file + output.write("const unsigned char %s_tab[256] = {\n" % f) + pline = " " + for n in range(255): + pline += " 0x%02x," % Functs[f][0][n] + if len(pline) > 72: + output.write(pline + "\n") + pline = " " + output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255]) @@ -335,15 +333,15 @@ - else: # inline check is used - # first another little optimisation - if space is present, - # put it at the front of the list so it is checked first - try: - ix = rangeTable.remove((0x20, 0x20)) - rangeTable.insert(0, (0x20, 0x20)) - except: - pass - firstFlag = 1 - - header.write(""" + else: # inline check is used + # first another little optimisation - if space is present, + # put it at the front of the list so it is checked first + try: + ix = rangeTable.remove((0x20, 0x20)) + rangeTable.insert(0, (0x20, 0x20)) + except: + pass + firstFlag = 1 + + header.write(""" /** * %s_ch: * @c: char to validate @@ -351,32 +349,32 @@ * Automatically generated by genChRanges.py */ """ % f) - # okay, I'm tired of the messy lineup - let's automate it! - pline = "#define %s_ch(c)" % f - # 'ntab' is number of tabs needed to position to col. 33 from name end - ntab = 4 - (len(pline)) / 8 - if ntab < 0: - ntab = 0 - just = "" - for i in range(ntab): - just += "\t" - pline = pline + just + "(" - for rg in rangeTable: - if not firstFlag: - pline += " || \\\n\t\t\t\t " - else: - firstFlag = 0 - if rg[0] == rg[1]: # single value - check equal - pline += "((c) == 0x%x)" % rg[0] - else: # value range - # since we are doing char, also change range ending in 0xff - if rg[1] != 0xff: - pline += "((0x%x <= (c)) &&" % rg[0] - pline += " ((c) <= 0x%x))" % rg[1] - else: - pline += " (0x%x <= (c))" % rg[0] - pline += ")\n" - header.write(pline) + # okay, I'm tired of the messy lineup - let's automate it! + pline = "#define %s_ch(c)" % f + # 'ntab' is number of tabs needed to position to col. 33 from name end + ntab = 4 - (len(pline)) // 8 + if ntab < 0: + ntab = 0 + just = "" + for i in range(ntab): + just += "\t" + pline = pline + just + "(" + for rg in rangeTable: + if not firstFlag: + pline += " || \\\n\t\t\t\t " + else: + firstFlag = 0 + if rg[0] == rg[1]: # single value - check equal + pline += "((c) == 0x%x)" % rg[0] + else: # value range + # since we are doing char, also change range ending in 0xff + if rg[1] != 0xff: + pline += "((0x%x <= (c)) &&" % rg[0] + pline += " ((c) <= 0x%x))" % rg[1] + else: + pline += " (0x%x <= (c))" % rg[0] + pline += ")\n" + header.write(pline) header.write(""" /** @@ -387,5 +385,5 @@ */ """ % f) pline = "#define %sQ(c)" % f - ntab = 4 - (len(pline)) / 8 + ntab = 4 - (len(pline)) // 8 if ntab < 0: @@ -391,4 +389,4 @@ if ntab < 0: - ntab = 0 + ntab = 0 just = "" for i in range(ntab): @@ -393,5 +391,5 @@ just = "" for i in range(ntab): - just += "\t" + just += "\t" header.write(pline + just + "(((c) < 0x100) ? \\\n\t\t\t\t ") if max(Functs[f][0]) > 0: @@ -396,4 +394,4 @@ header.write(pline + just + "(((c) < 0x100) ? \\\n\t\t\t\t ") if max(Functs[f][0]) > 0: - header.write("%s_ch((c)) :" % f) + header.write("%s_ch((c)) :" % f) else: @@ -399,6 +397,6 @@ else: - header.write("0 :") + header.write("0 :") # if no ranges defined, value invalid if >= 0x100 numRanges = len(Functs[f][1]) if numRanges == 0: @@ -401,6 +399,6 @@ # if no ranges defined, value invalid if >= 0x100 numRanges = len(Functs[f][1]) if numRanges == 0: - header.write(" 0)\n\n") + header.write(" 0)\n\n") else: @@ -406,21 +404,21 @@ else: - if numRanges >= minTableSize: - header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f) - else: # if < minTableSize, generate inline code - firstFlag = 1 - for rg in Functs[f][1]: - if not firstFlag: - pline += " || \\\n\t\t\t\t " - else: - firstFlag = 0 - pline = "\\\n\t\t\t\t(" - if rg[0] == rg[1]: # single value - check equal - pline += "((c) == 0x%x)" % rg[0] - else: # value range - pline += "((0x%x <= (c)) &&" % rg[0] - pline += " ((c) <= 0x%x))" % rg[1] - pline += "))\n\n" - header.write(pline) + if numRanges >= minTableSize: + header.write(" \\\n\t\t\t\t xmlCharInRange((c), &%sGroup))\n\n" % f) + else: # if < minTableSize, generate inline code + firstFlag = 1 + for rg in Functs[f][1]: + if not firstFlag: + pline += " || \\\n\t\t\t\t " + else: + firstFlag = 0 + pline = "\\\n\t\t\t\t(" + if rg[0] == rg[1]: # single value - check equal + pline += "((c) == 0x%x)" % rg[0] + else: # value range + pline += "((0x%x <= (c)) &&" % rg[0] + pline += " ((c) <= 0x%x))" % rg[1] + pline += "))\n\n" + header.write(pline) if len(Functs[f][1]) > 0: @@ -424,7 +422,7 @@ if len(Functs[f][1]) > 0: - header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f) + header.write("XMLPUBVAR const xmlChRangeGroup %sGroup;\n" % f) # @@ -432,20 +430,20 @@ # for f in fkeys: - if len(Functs[f][1]) > 0: # only generate if unicode ranges present - rangeTable = Functs[f][1] - rangeTable.sort() # ascending tuple sequence - numShort = 0 - numLong = 0 - for rg in rangeTable: - if rg[1] < 0x10000: # if short value - if numShort == 0: # first occurrence - pline = "static const xmlChSRange %s_srng[] = {" % f - else: - pline += "," - numShort += 1 - if len(pline) > 60: - output.write(pline + "\n") - pline = " " + if len(Functs[f][1]) > 0: # only generate if unicode ranges present + rangeTable = Functs[f][1] + rangeTable.sort() # ascending tuple sequence + numShort = 0 + numLong = 0 + for rg in rangeTable: + if rg[1] < 0x10000: # if short value + if numShort == 0: # first occurrence + pline = "static const xmlChSRange %s_srng[] = {" % f + else: + pline += "," + numShort += 1 + if len(pline) > 60: + output.write(pline + "\n") + pline = " " else: pline += " " @@ -450,17 +448,17 @@ else: pline += " " - pline += "{0x%x, 0x%x}" % (rg[0], rg[1]) - else: # if long value - if numLong == 0: # first occurrence - if numShort > 0: # if there were shorts, finish them off - output.write(pline + "};\n") - pline = "static const xmlChLRange %s_lrng[] = { " % f - else: - pline += ", " - numLong += 1 - if len(pline) > 60: - output.write(pline + "\n") - pline = " " - pline += "{0x%x, 0x%x}" % (rg[0], rg[1]) - output.write(pline + "};\n") # finish off last group + pline += "{0x%x, 0x%x}" % (rg[0], rg[1]) + else: # if long value + if numLong == 0: # first occurrence + if numShort > 0: # if there were shorts, finish them off + output.write(pline + "};\n") + pline = "static const xmlChLRange %s_lrng[] = { " % f + else: + pline += ", " + numLong += 1 + if len(pline) > 60: + output.write(pline + "\n") + pline = " " + pline += "{0x%x, 0x%x}" % (rg[0], rg[1]) + output.write(pline + "};\n") # finish off last group @@ -466,15 +464,15 @@ - pline = "const xmlChRangeGroup %sGroup =\n\t{%d, %d, " % (f, numShort, numLong) - if numShort > 0: - pline += "%s_srng" % f - else: - pline += "(xmlChSRangePtr)0" - if numLong > 0: - pline += ", %s_lrng" % f - else: - pline += ", (xmlChLRangePtr)0" - - output.write(pline + "};\n\n") + pline = "const xmlChRangeGroup %sGroup =\n\t{%d, %d, " % (f, numShort, numLong) + if numShort > 0: + pline += "%s_srng" % f + else: + pline += "(xmlChSRangePtr)0" + if numLong > 0: + pline += ", %s_lrng" % f + else: + pline += ", (xmlChLRangePtr)0" + + output.write(pline + "};\n\n") output.write( """ @@ -495,22 +493,22 @@ const xmlChLRange *lptr; if (rptr == NULL) return(0); - if (val < 0x10000) { /* is val in 'short' or 'long' array? */ - if (rptr->nbShortRange == 0) - return 0; - low = 0; - high = rptr->nbShortRange - 1; - sptr = rptr->shortRange; - while (low <= high) { - mid = (low + high) / 2; - if ((unsigned short) val < sptr[mid].low) { - high = mid - 1; - } else { - if ((unsigned short) val > sptr[mid].high) { - low = mid + 1; - } else { - return 1; - } - } - } + if (val < 0x10000) {\t/* is val in 'short' or 'long' array? */ +\tif (rptr->nbShortRange == 0) +\t return 0; +\tlow = 0; +\thigh = rptr->nbShortRange - 1; +\tsptr = rptr->shortRange; +\twhile (low <= high) { +\t mid = (low + high) / 2; +\t if ((unsigned short) val < sptr[mid].low) { +\t\thigh = mid - 1; +\t } else { +\t\tif ((unsigned short) val > sptr[mid].high) { +\t\t low = mid + 1; +\t\t} else { +\t\t return 1; +\t\t} +\t } +\t} } else { @@ -516,22 +514,22 @@ } else { - if (rptr->nbLongRange == 0) { - return 0; - } - low = 0; - high = rptr->nbLongRange - 1; - lptr = rptr->longRange; - while (low <= high) { - mid = (low + high) / 2; - if (val < lptr[mid].low) { - high = mid - 1; - } else { - if (val > lptr[mid].high) { - low = mid + 1; - } else { - return 1; - } - } - } +\tif (rptr->nbLongRange == 0) { +\t return 0; +\t} +\tlow = 0; +\thigh = rptr->nbLongRange - 1; +\tlptr = rptr->longRange; +\twhile (low <= high) { +\t mid = (low + high) / 2; +\t if (val < lptr[mid].low) { +\t\thigh = mid - 1; +\t } else { +\t\tif (val > lptr[mid].high) { +\t\t low = mid + 1; +\t\t} else { +\t\t return 1; +\t\t} +\t } +\t} } return 0; }