patternpythonMinor
Brace pairing ({}[]()<>) cleanup/speedup
Viewed 0 times
pairingbracecleanupspeedup
Problem
This is working as expected, except in the speed area + I need to make it more readable and shorter if possible, I probably have lot's of things I don't need :).
Edit it is working now
```
charset = dict(opening='{[(',\
string = ('"', "'"),\
comment=((''), ('"""', '"""'), ('#', '\n')))
allowed = ''.join([x[0][0] + x[1][0] for x in charset['comment']])
allowed += ''.join(charset['string'])
allowed += charset['opening']
allowed += charset['closing']
def brace_check(text):
o = []
c = []
notr = []
found = []
busy = False
last_pos = None
for i in xrange(len(text)):
ch = text[i]
if not busy:
cont = True
for comment in charset['comment']:
if ch == comment[0][0]:
como = text[i:len(comment[0])]
if como == comment[0]:
busy = comment[1]
if ch in charset['opening']:
last_pos = i
cont = False
break
if cont:
if ch in charset['string']:
busy = ch
elif ch in charset['opening']:
o.append((ch, i))
elif ch in charset['closing']:
c.append((ch, i))
else:
if ch == busy[0]:
if len(busy) == 1:
comc = ch
else:
comc = text[i:i + len(busy)]
if comc == busy:
if last_pos is not None:
if busy[-1] in charset['closing']:
found.append((last_pos, i))
last_pos = None
text = text[:i] + '\n' * len(comc) +\
text[i + len(comc):]
busy = not busy
elif busy in charset['string']:
if ch == '\n':
busy = not busy
Edit it is working now
```
charset = dict(opening='{[(',\
string = ('"', "'"),\
comment=((''), ('"""', '"""'), ('#', '\n')))
allowed = ''.join([x[0][0] + x[1][0] for x in charset['comment']])
allowed += ''.join(charset['string'])
allowed += charset['opening']
allowed += charset['closing']
def brace_check(text):
o = []
c = []
notr = []
found = []
busy = False
last_pos = None
for i in xrange(len(text)):
ch = text[i]
if not busy:
cont = True
for comment in charset['comment']:
if ch == comment[0][0]:
como = text[i:len(comment[0])]
if como == comment[0]:
busy = comment[1]
if ch in charset['opening']:
last_pos = i
cont = False
break
if cont:
if ch in charset['string']:
busy = ch
elif ch in charset['opening']:
o.append((ch, i))
elif ch in charset['closing']:
c.append((ch, i))
else:
if ch == busy[0]:
if len(busy) == 1:
comc = ch
else:
comc = text[i:i + len(busy)]
if comc == busy:
if last_pos is not None:
if busy[-1] in charset['closing']:
found.append((last_pos, i))
last_pos = None
text = text[:i] + '\n' * len(comc) +\
text[i + len(comc):]
busy = not busy
elif busy in charset['string']:
if ch == '\n':
busy = not busy
Solution
def brace_check(string):string is the name of a python module, it might be best to avoid it
# Imports
import reimports should be done at the module level not inside a function
# Configuration
charset = dict(opening='{[(',\
string = ['"', "'"],\
single=['#', '//'],\
multi=[['/*', '*/'], '"""', ['']])Constant values should be at the module level not inside the function
found = []# found objectsfound isn't used until way later. Declare it way later.
notr = []# busy locationsnotr and its comment don't suggest anything the same.
temp = {}# temporary holding for opening braces
end = {}# temporary holding for closing bracesAll variables are temporary. Don't give names or comments that tell me such obvious things
# Find all ocurrances and its position
newline = [m.start() for m in re.finditer('\n', string)] # every newline
for mn in charset['multi']:# find every multiline comment
if type(mn) == type(str()):# if there only is one element
op = [m.start() for m in re.finditer(re.escape(mn), string)]
cl = op[1::2]
op = op[0::2]
mn = (mn, mn)
else:
op = [m.start() for m in re.finditer(re.escape(mn[0]), string)]
cl = [m.start() for m in re.finditer(re.escape(mn[1]), string)]These short cryptic variables name are asking for confusion.
for i in xrange(len(op)):# find and close every comment
om = op[i]Use the enumerate function. It lets you use a foreach loop and get the indexes at the same time
try:
cm = next(v for v in cl if v > om) + len(mn[1])
add a line like: start_comment, end_comment = mn, that way you can avoid the indexes are your code will be clearer.
if mn[0][0] in charset['opening'] and\
mn[1][-1] in charset['closing']:
for i in xrange(om + 1, cm - 1):
if i not in notr:
notr.append(i)Rather the constantly checking whether a value is in notr, use a set. Then the last three lines can simply be notr.add( xrange(om + 1, cm - 1) )
else:
for i in xrange(om, cm):
if i not in notr:
notr.append(i)Did we really need that special case?
except:Don't catch all exceptions, you'll hide bugs that way. Instead, catch only the specific type of exception you are interested in. Also, you want as little code in the try block as possible to avoid catching other stray exceptions
for i in xrange(om, len(string)):
if i not in notr:
notr.append(i)You at least need a comment explaining why this make sense.
del op, clDon't del variables, its rarely useful.
for sn in charset['single']:
rep = [m.start() for m in re.finditer(re.escape(sn), string)]
for s in rep:
try:
new = next(v for v in newline if v > s)
except:
new = len(string)
newline.append(len(string))Why not use the string search functions rather then having built a newline list? Also, if you need to treat the end of input as a newline, put that in when you create the list not in an exception handler.
if not s in notr:
for i in xrange(s, new):
if i not in notr:
notr.append(i)
newline.remove(new)
del rep
for sn in charset['string']:
rep = [m.start() for m in re.finditer(re.escape(sn), string)]
last = False
for i in xrange(len(rep)):
if not last:
s = rep[i]
if s not in notr:
try:
new = next(v for v in newline if v > s)
except:
new = len(string)
newline.append(len(string))If code repeats, you need a function.
try:
n = next(v for v in rep if v > s)
except:
n = new + 1
if n < new:
last = True
for i in xrange(s, n + 1):
if i not in notr:
notr.append(i)
else:
for i in xrange(s, new):
if i not in notr:
notr.append(i)
newline.remove(new)
else:
last = False
del rep
for cn in charset['closing']:
on = charset['opening'][charset['closing'].find(cn)]
end[on] = [m.start() for m in re.finditer(re.escape(cn), string)\
if m.start() not in notr]Given the cn is a single character, use of a regular expressio
Code Snippets
def brace_check(string):# Imports
import re# Configuration
charset = dict(opening='{[(<',\
closing='}])>',\
string = ['"', "'"],\
single=['#', '//'],\
multi=[['/*', '*/'], '"""', ['<!--', '-->']])found = []# found objectsnotr = []# busy locationsContext
StackExchange Code Review Q#3543, answer score: 6
Revisions (0)
No revisions yet.