improve mkdoc.py determinism

When processing many files that contain top-level items with the same name (e.g. "operator<<"), the output was non-deterministic and depended on the order in which the different Clang processes finished. This commit adds sorting that also accounts for the filename to prevent random changes from run to run.

improve mkdoc.py determinism
When processing many files that contain top-level items with the same name (e.g. "operator<<"), the output was non-deterministic and depended on the order in which the different Clang processes finished. This commit adds sorting that also accounts for the filename to prevent random changes from run to run.
257df10f · Wenzel Jakob · 46469d4e · 257df10f
Commit 257df10f authored Apr 12, 2017 by Wenzel Jakob
Hide whitespace changes
Inline Side-by-side

Showing with 20 additions and 25 deletions

tools/mkdoc.py
+20 -25

No files found.
--- a/tools/mkdoc.py
+++ b/tools/mkdoc.py
@@ -56,26 +56,19 @@ CPP_OPERATORS = OrderedDict(
 job_count = cpu_count()
 job_semaphore = Semaphore(job_count)
-registered_names = dict()
+output = []
 def d(s):
    return s.decode('utf8')
 def sanitize_name(name):
-    global registered_names
    name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
    for k, v in CPP_OPERATORS.items():
        name = name.replace('operator%s' % k, 'operator_%s' % v)
    name = re.sub('<.*>', '', name)
    name = ''.join([ch if ch.isalnum() else '_' for ch in name])
    name = re.sub('_$', '', re.sub('_+', '_', name))
-    if name in registered_names:
-        registered_names[name] += 1
-        name += '_' + str(registered_names[name])
-    else:
-        registered_names[name] = 1
    return '__doc_' + name
@@ -189,8 +182,7 @@ def process_comment(comment):
    return result.rstrip().lstrip('\n')
-def extract(filename, node, prefix, output):
+def extract(filename, node, prefix):
-    num_extracted = 0
    if not (node.location.file is None or
            os.path.samefile(d(node.location.file.name), filename)):
        return 0
@@ -201,9 +193,7 @@ def extract(filename, node, prefix, output):
                sub_prefix += '_'
            sub_prefix += d(node.spelling)
        for i in node.get_children():
-            num_extracted += extract(filename, i, sub_prefix, output)
+            extract(filename, i, sub_prefix)
-        if num_extracted == 0:
-            return 0
    if node.kind in PRINT_LIST:
        comment = d(node.raw_comment) if node.raw_comment is not None else ''
        comment = process_comment(comment)
@@ -212,18 +202,15 @@ def extract(filename, node, prefix, output):
            sub_prefix += '_'
        if len(node.spelling) > 0:
            name = sanitize_name(sub_prefix + d(node.spelling))
-            output.append('\nstatic const char *%s =%sR"doc(%s)doc";' %
+            global output
-                (name, '\n' if '\n' in comment else ' ', comment))
+            output.append((name, filename, comment))
-            num_extracted += 1
-    return num_extracted
 class ExtractionThread(Thread):
-    def __init__(self, filename, parameters, output):
+    def __init__(self, filename, parameters):
        Thread.__init__(self)
        self.filename = filename
        self.parameters = parameters
-        self.output = output
        job_semaphore.acquire()
    def run(self):
@@ -232,7 +219,7 @@ class ExtractionThread(Thread):
            index = cindex.Index(
                cindex.conf.lib.clang_createIndex(False, True))
            tu = index.parse(self.filename, self.parameters)
-            extract(self.filename, tu.cursor, '', self.output)
+            extract(self.filename, tu.cursor, '')
        finally:
            job_semaphore.release()
@@ -289,18 +276,26 @@ if __name__ == '__main__':
 #endif
 ''')
-    output = []
+    output.clear()
    for filename in filenames:
-        thr = ExtractionThread(filename, parameters, output)
+        thr = ExtractionThread(filename, parameters)
        thr.start()
    print('Waiting for jobs to finish ..', file=sys.stderr)
    for i in range(job_count):
        job_semaphore.acquire()
-    output.sort()
+    name_ctr = 1
-    for l in output:
+    name_prev = None
-        print(l)
+    for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))):
+        if name == name_prev:
+            name_ctr += 1
+            name = name + "_%i" % name_ctr
+        else:
+            name_prev = name
+            name_ctr = 1
+        print('\nstatic const char *%s =%sR"doc(%s)doc";' %
+              (name, '\n' if '\n' in comment else ' ', comment))
    print('''
 #if defined(__GNUG__)