Commit 257df10f by Wenzel Jakob

improve mkdoc.py determinism

When processing many files that contain top-level items with the same
name (e.g. "operator<<"), the output was non-deterministic and depended
on the order in which the different Clang processes finished. This
commit adds sorting that also accounts for the filename to prevent
random changes from run to run.
parent 46469d4e
...@@ -56,26 +56,19 @@ CPP_OPERATORS = OrderedDict( ...@@ -56,26 +56,19 @@ CPP_OPERATORS = OrderedDict(
job_count = cpu_count() job_count = cpu_count()
job_semaphore = Semaphore(job_count) job_semaphore = Semaphore(job_count)
registered_names = dict() output = []
def d(s): def d(s):
return s.decode('utf8') return s.decode('utf8')
def sanitize_name(name): def sanitize_name(name):
global registered_names
name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name) name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
for k, v in CPP_OPERATORS.items(): for k, v in CPP_OPERATORS.items():
name = name.replace('operator%s' % k, 'operator_%s' % v) name = name.replace('operator%s' % k, 'operator_%s' % v)
name = re.sub('<.*>', '', name) name = re.sub('<.*>', '', name)
name = ''.join([ch if ch.isalnum() else '_' for ch in name]) name = ''.join([ch if ch.isalnum() else '_' for ch in name])
name = re.sub('_$', '', re.sub('_+', '_', name)) name = re.sub('_$', '', re.sub('_+', '_', name))
if name in registered_names:
registered_names[name] += 1
name += '_' + str(registered_names[name])
else:
registered_names[name] = 1
return '__doc_' + name return '__doc_' + name
...@@ -189,8 +182,7 @@ def process_comment(comment): ...@@ -189,8 +182,7 @@ def process_comment(comment):
return result.rstrip().lstrip('\n') return result.rstrip().lstrip('\n')
def extract(filename, node, prefix, output): def extract(filename, node, prefix):
num_extracted = 0
if not (node.location.file is None or if not (node.location.file is None or
os.path.samefile(d(node.location.file.name), filename)): os.path.samefile(d(node.location.file.name), filename)):
return 0 return 0
...@@ -201,9 +193,7 @@ def extract(filename, node, prefix, output): ...@@ -201,9 +193,7 @@ def extract(filename, node, prefix, output):
sub_prefix += '_' sub_prefix += '_'
sub_prefix += d(node.spelling) sub_prefix += d(node.spelling)
for i in node.get_children(): for i in node.get_children():
num_extracted += extract(filename, i, sub_prefix, output) extract(filename, i, sub_prefix)
if num_extracted == 0:
return 0
if node.kind in PRINT_LIST: if node.kind in PRINT_LIST:
comment = d(node.raw_comment) if node.raw_comment is not None else '' comment = d(node.raw_comment) if node.raw_comment is not None else ''
comment = process_comment(comment) comment = process_comment(comment)
...@@ -212,18 +202,15 @@ def extract(filename, node, prefix, output): ...@@ -212,18 +202,15 @@ def extract(filename, node, prefix, output):
sub_prefix += '_' sub_prefix += '_'
if len(node.spelling) > 0: if len(node.spelling) > 0:
name = sanitize_name(sub_prefix + d(node.spelling)) name = sanitize_name(sub_prefix + d(node.spelling))
output.append('\nstatic const char *%s =%sR"doc(%s)doc";' % global output
(name, '\n' if '\n' in comment else ' ', comment)) output.append((name, filename, comment))
num_extracted += 1
return num_extracted
class ExtractionThread(Thread): class ExtractionThread(Thread):
def __init__(self, filename, parameters, output): def __init__(self, filename, parameters):
Thread.__init__(self) Thread.__init__(self)
self.filename = filename self.filename = filename
self.parameters = parameters self.parameters = parameters
self.output = output
job_semaphore.acquire() job_semaphore.acquire()
def run(self): def run(self):
...@@ -232,7 +219,7 @@ class ExtractionThread(Thread): ...@@ -232,7 +219,7 @@ class ExtractionThread(Thread):
index = cindex.Index( index = cindex.Index(
cindex.conf.lib.clang_createIndex(False, True)) cindex.conf.lib.clang_createIndex(False, True))
tu = index.parse(self.filename, self.parameters) tu = index.parse(self.filename, self.parameters)
extract(self.filename, tu.cursor, '', self.output) extract(self.filename, tu.cursor, '')
finally: finally:
job_semaphore.release() job_semaphore.release()
...@@ -289,18 +276,26 @@ if __name__ == '__main__': ...@@ -289,18 +276,26 @@ if __name__ == '__main__':
#endif #endif
''') ''')
output = [] output.clear()
for filename in filenames: for filename in filenames:
thr = ExtractionThread(filename, parameters, output) thr = ExtractionThread(filename, parameters)
thr.start() thr.start()
print('Waiting for jobs to finish ..', file=sys.stderr) print('Waiting for jobs to finish ..', file=sys.stderr)
for i in range(job_count): for i in range(job_count):
job_semaphore.acquire() job_semaphore.acquire()
output.sort() name_ctr = 1
for l in output: name_prev = None
print(l) for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))):
if name == name_prev:
name_ctr += 1
name = name + "_%i" % name_ctr
else:
name_prev = name
name_ctr = 1
print('\nstatic const char *%s =%sR"doc(%s)doc";' %
(name, '\n' if '\n' in comment else ' ', comment))
print(''' print('''
#if defined(__GNUG__) #if defined(__GNUG__)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment