Source code for parser

# -*- coding: utf-8 -*-

import os
import re
from click import echo, secho
from clang.cindex import CursorKind, TokenKind, TranslationUnit, Index
import clang.cindex
import tempfile
import hashlib
from itertools import chain
from functools import wraps
from collections.abc import Iterable
from collections import OrderedDict, defaultdict
from ccrawl import conf
from ccrawl import graphs
from ccrawl.core import (
    cFunc,
    cMacro,
    cTypedef,
    cStruct,
    cUnion,
    cClass,
    cTemplate,
    cEnum,
    cNamespace,
)

g_indent = 0

CHandlers = {}

# ccrawl classes for clang parser:
# ------------------------------------------------------------------------------


[docs]def declareHandler(kind): """ Decorator used to register a handler associated to a clang cursor kind/type. The decorated handler function will be called to process each cursor of this kind. """ def decorate(f): CHandlers[kind] = f return f return decorate
spec_chars = (",", "*", "::", "&", "(", ")", "[", "]") # cursor types that will be parsed to instanciate ccrawl # objects: TYPEDEF_DECL = CursorKind.TYPEDEF_DECL STRUCT_DECL = CursorKind.STRUCT_DECL UNION_DECL = CursorKind.UNION_DECL ENUM_DECL = CursorKind.ENUM_DECL FUNCTION_DECL = CursorKind.FUNCTION_DECL MACRO_DEF = CursorKind.MACRO_DEFINITION CLASS_DECL = CursorKind.CLASS_DECL FUNC_TEMPLATE = CursorKind.FUNCTION_TEMPLATE CLASS_TEMPLATE = CursorKind.CLASS_TEMPLATE CLASS_TPSPEC = CursorKind.CLASS_TEMPLATE_PARTIAL_SPECIALIZATION NAMESPACE = CursorKind.NAMESPACE # handlers: @declareHandler(FUNCTION_DECL) def FuncDecl(cur, cxx, errors=None): identifier = cur.spelling t = cur.type.spelling proto = fix_type_conversion(cur, t, cxx, errors) params = [] locs = [] calls = [] f = cFunc(prototype=proto) for e in cur.get_children(): if conf.DEBUG: echo("%s: %s" % (e.kind, e.spelling)) if e.kind == CursorKind.PARM_DECL: params.append(e.spelling) elif e.kind == CursorKind.COMPOUND_STMT: locs, calls = CodeDef(e, cxx, errors) f["params"] = params f["locs"] = locs f["calls"] = calls if conf.VERBOSE: secho(" cFunc: %s" % identifier) return identifier, f @declareHandler(MACRO_DEF) def MacroDef(cur, cxx, errors=None): if cur.extent.start.file: identifier = cur.spelling toks = [] for t in list(cur.get_tokens())[1:]: pre = "" if t.spelling in spec_chars else " " toks.append(pre + t.spelling) s = "".join(toks) if conf.VERBOSE: secho(" cMacro: %s" % identifier) return identifier, cMacro(s.replace("( ", "(")) @declareHandler(TYPEDEF_DECL) def TypeDef(cur, cxx, errors=None): identifier = cur.type.spelling dt = cur.underlying_typedef_type if "(anonymous" in dt.spelling: dt = dt.get_canonical() if "(unnamed" in dt.spelling: dt = dt.get_canonical() t = fix_type_conversion(cur, dt.spelling, cxx, errors) t = get_uniq_typename(t) if t.startswith("struct volatile "): t = t.replace("struct volatile ","") elif t.startswith("struct const "): t = t.replace("struct const ","") if conf.DEBUG: echo("\t" * g_indent + "make unique: %s" % t) if conf.VERBOSE: secho(" cTypedef: %s" % identifier) return identifier, cTypedef(t) @declareHandler(CursorKind.TYPE_REF) def TypeRef(cur, cxx, errors=None): echo("\t" * g_indent + cur.spelling) return cur.spelling, None @declareHandler(STRUCT_DECL) def StructDecl(cur, cxx, errors=None): typename = cur.type.spelling if cxx: typename = cur.type.get_canonical().spelling if not typename.startswith("struct "): typename = "struct " + typename typename = get_uniq_typename(typename) if conf.DEBUG: echo("\t" * g_indent + "make unique: %s" % typename) S = cClass() if cxx else cStruct() SetStructured(cur, S, errors) if conf.VERBOSE: secho(" %s: %s" % (S.__class__.__name__, typename)) return typename, S @declareHandler(UNION_DECL) def UnionDecl(cur, cxx, errors=None): typename = cur.type.spelling if cxx: typename = cur.type.get_canonical().spelling if not typename.startswith("union "): typename = "union " + typename typename = get_uniq_typename(typename) if conf.DEBUG: echo("\t" * g_indent + "make unique: %s" % typename) S = cClass() if cxx else cUnion() SetStructured(cur, S, errors) if conf.VERBOSE: secho(" %s: %s" % (S.__class__.__name__, typename)) return typename, S @declareHandler(CLASS_DECL) def ClassDecl(cur, cxx, errors=None): typename = "class %s" % (cur.type.get_canonical().spelling) if conf.DEBUG: echo("\t" * g_indent + "%s" % typename) S = cClass() SetStructured(cur, S, errors) if conf.VERBOSE: secho(" %s: %s" % (S.__class__.__name__, typename)) return typename, S @declareHandler(ENUM_DECL) def EnumDecl(cur, cxx, errors=None): global g_indent typename = cur.type.spelling if cxx: typename = cur.type.get_canonical().spelling if not typename.startswith("enum "): typename = "enum " + typename typename = get_uniq_typename(typename) if conf.DEBUG: echo("\t" * g_indent + "make unique: %s" % typename) S = cEnum() S._in = str(cur.extent.start.file) # a = 0 g_indent += 1 for f in cur.get_children(): if conf.DEBUG: echo("\t" * g_indent + "%s: " % (f.kind), nl=False) # if not f.is_definition(): # if a: raise ValueError if f.kind is CursorKind.ENUM_CONSTANT_DECL: S[f.spelling] = f.enum_value if conf.DEBUG: echo(str(f.enum_value), nl=False) elif conf.DEBUG: echo("%s:%s" % (f.kind, f.spelling)) g_indent -= 1 if conf.VERBOSE: secho(" %s: %s" % (S.__class__.__name__, typename)) return typename, S @declareHandler(CLASS_TEMPLATE) def ClassTemplate(cur, cxx, errors=None): identifier = cur.displayname p = [] for x in cur.get_children(): if x.kind == CursorKind.TEMPLATE_TYPE_PARAMETER: p.append("typename %s" % x.spelling) elif x.kind == CursorKind.TEMPLATE_NON_TYPE_PARAMETER: p.append("%s %s" % (x.type.spelling, x.spelling)) # now we need this to distinguish struct/union/class template: # damn libclang!! children here should really allow for # this by having a STRUCT_DECL, UNION_DECL or CLASS_DECL ! # Or, if implicit CLASS_DECL due to being a CLASS_TEMPLATE, # then there should be a STRUCT_TEMPLATE as well. toks = [x.spelling for x in cur.get_tokens()] try: i = toks.index(cur.spelling) k = toks[i - 1] except ValueError: k = "struct" identifier = "%s %s" % (k, identifier) if conf.DEBUG: echo("\t" * g_indent + str(identifier)) # ok so now proceed with the "class" parsing: S = cClass() SetStructured(cur, S, errors) if conf.VERBOSE: secho(" cTemplate/%s: %s" % (S.__class__.__name__, identifier)) return identifier, cTemplate(params=p, cClass=S) @declareHandler(FUNC_TEMPLATE) def FuncTemplate(cur, cxx, errors=None): identifier = cur.spelling if conf.DEBUG: echo("\t" * g_indent + identifier) proto = cur.type.spelling if conf.DEBUG: echo("\t" * g_indent + proto) p = [] for x in cur.get_children(): if x.kind == CursorKind.TEMPLATE_TYPE_PARAMETER: p.append("typename %s" % x.spelling) elif x.kind == CursorKind.TEMPLATE_NON_TYPE_PARAMETER: p.append("%s %s" % (x.type.spelling, x.spelling)) f = re.sub(r"__attribute__.*", "", proto) if conf.VERBOSE: secho(" cTemplate/cFunc: %s" % identifier) return identifier, cTemplate(params=p, cFunc=cFunc(prototype=f)) @declareHandler(CLASS_TPSPEC) def ClassTemplatePartialSpec(cur, cxx, errors=None): identifier, obj = ClassTemplate(cur, cxx, errors) obj["partial_specialization"] = True return identifier, obj @declareHandler(NAMESPACE) def NameSpace(cur, cxx, errors=None): namespace = cur.spelling S = cNamespace() S.local = {} # check if namespace is inlined: toks = [t.spelling for t in cur.get_tokens()] try: i = toks.index(namespace) S.inline = toks[i - 2] == "inline" S.parent = cur.lexical_parent.spelling if S.inline else "" except ValueError: S.inline = False S.parent = "" for f in cur.get_children(): if f.kind in CHandlers: i, obj = CHandlers[f.kind](f, cxx, errors) if S.inline: i = i.replace("%s::" % namespace, "") S.append(i) S.local[i] = obj if conf.VERBOSE: secho(" %s: %s" % (S.__class__.__name__, namespace)) return namespace, S def CodeDef(cur, cxx, errors=None): global g_indent g_indent += 1 locs = [] calls = [] # for f in deepflatten(cur): for f in cur.walk_preorder(): if conf.DEBUG: echo("\t" * g_indent + "%s: %s" % (f.kind, f.spelling)) if f.kind == CursorKind.VAR_DECL: locs.append((f.type.spelling, f.spelling)) if conf.DEBUG: echo("\t" * g_indent + "var: (%s,%s)" % locs[-1]) elif f.kind == CursorKind.CALL_EXPR: calls.append(f.spelling) g_indent -= 1 return locs, calls def SetStructured(cur, S, errors=None): global g_indent S._in = str(cur.extent.start.file) local = {} alltoks = [ (t.kind, t.spelling, t.location) for t in cur._tu.get_tokens(extent=cur.extent) ] attr_x = False if errors is None: errors = [] g_indent += 1 bitfield_error = False if errors: for (k, s, l) in alltoks: if k == TokenKind.PUNCTUATION and s == ":": if conf.DEBUG: secho("bitfield structure with errors...", fg="yellow") bitfield_error = True break for f in cur.get_children(): if conf.DEBUG: echo("\t" * g_indent + str(f.kind) + "=" + str(f.spelling)) errs = [] for i, r in enumerate(errors): if f.extent.start.line <= r.location.line <= f.extent.end.line: if (f.extent.start.line != f.extent.end.line) or ( f.extent.start.column <= r.location.column <= f.extent.end.column ): errs.append(r) if errs and bitfield_error: # fixing the extent. Clang is buggy and has forgotten the bitfield tokens... fix = None T = [(t.kind, t.spelling, t.location) for t in f.get_tokens()] try: off = alltoks.index(T[0]) for k, s, l in alltoks[off:]: if k == TokenKind.PUNCTUATION and s == ";": if l.offset > T[-1][2].offset: fix = l.offset break except (IndexError,ValueError): pass if fix is not None: x = f._extent e = clang.cindex.SourceLocation.from_offset(f._tu, x.end.file, fix) x = clang.cindex.SourceRange.from_locations(x.start, e) f._extent = x # nested type definition of another structured type: if f.kind in ( STRUCT_DECL, UNION_DECL, ENUM_DECL, CLASS_DECL, FUNC_TEMPLATE, CLASS_TEMPLATE, ): identifier, slocal = CHandlers[f.kind](f, S._is_class, errs) if f.kind == FUNC_TEMPLATE: S.append( ( ("template%s" % slocal.get_template(), slocal["cFunc"]["prototype"]), ("", identifier), (f.access_specifier.name, ""), ) ) else: local[identifier] = slocal attr_x = True if not S._is_class: S.append([identifier, "", ""]) # c++ parent class: elif f.kind is CursorKind.CXX_BASE_SPECIFIER: is_virtual = clang.cindex.conf.lib.clang_isVirtualBase(f) virtual = "virtual" if is_virtual else "" # the spelling seems to always includes the 'class'/'struct' keyword... S.append( (("parent", virtual), ("", f.spelling), (f.access_specifier.name, "")) ) # c++ 'using' declaration: elif f.kind is CursorKind.USING_DECLARATION: uses = [] name = "" for x in f.get_children(): if x.kind == CursorKind.TYPE_REF: uses.append(x.spelling) if x.kind == CursorKind.OVERLOADED_DECL_REF: name = x.spelling if conf.DEBUG: echo("\t" * g_indent + "%s : %s" % (name, uses)) S.append((("using", uses), ("", name), ("", ""))) # structured type member: else: try: comment = f.brief_comment or f.raw_comment except UnicodeDecodeError: comment = "" # type spelling is our member type only if this type is defined already, # otherwise clang takes the default 'int' type here and we can't access # the wanted type unless we access f's tokens. # field/member declaration: if f.kind in ( CursorKind.FIELD_DECL, CursorKind.VAR_DECL, CursorKind.CONSTRUCTOR, CursorKind.DESTRUCTOR, CursorKind.CXX_METHOD, ): t = f.type.spelling if "(anonymous" in t: if not S._is_class: t = f.type.get_canonical().spelling elif "(unnamed" in t: if not S._is_class: t = f.type.get_canonical().spelling else: if S._is_class: kind = get_kind_type(t) t = f.type.get_canonical().spelling if "type-parameter" in t: t = f.type.spelling if kind: t = "%s %s" % (kind, t) t = fix_type_conversion(f, t, S._is_class, errs) t = get_uniq_typename(t) attr = "" if f.kind == CursorKind.VAR_DECL: attr = "static" if f.is_virtual_method(): attr = "virtual" if f.is_bitfield(): bw = f.get_bitfield_width() if conf.DEBUG: echo("\t" * g_indent + "bitfield size:%d" % bw) t += "# %d" % bw for w in f.get_children(): if conf.DEBUG: g_indent += 1 subk = w.kind subs = w.spelling echo("\t" * g_indent + "%s: %s" % (subk, subs)) g_indent -= 1 if w.kind == CursorKind.CXX_FINAL_ATTR: attr += ", final" if w.kind == CursorKind.CXX_OVERRIDE_ATTR: attr += ", override" if S._is_class: # a C++ class member is stored as: # [ (static/virtual?, type definition), # (mangled name, source name), # (access specifier, comment) ] member = ( (attr, t), (f.mangled_name, f.spelling), (f.access_specifier.name, comment), ) else: if attr_x and t == S[-1][0]: S.pop() attr_x = False member = (t, f.spelling, comment) S.append(member) elif f.kind == CursorKind.FRIEND_DECL: for frd in f.get_children(): member = ( ("friend", frd.type.spelling), (frd.mangled_name, frd.spelling), ("", comment), ) S.append(member) S.local = local g_indent -= 1 def get_kind_type(t): if "struct " in t: kind = "struct" elif "union " in t: kind = "union" elif "enum " in t: kind = "enum" else: kind = "" return kind def get_uniq_typename(t): if not (("(anonymous" in t) or ("(unnamed" in t)): return t kind = get_kind_type(t) # anon types inside *named* struct/union are prefixed by # the struct/union namespace, we don't keep this since # we are creating a unique typename anyway if "::" in t: t = "%s %s" % (kind, t.split("::")[-1]) x = re.compile(r"\((anonymous|unnamed) .*\)") s = x.search(t).group(0) h = hashlib.sha256(s.encode("ascii")).hexdigest()[:8] if not t.startswith(kind): t = "%s %s" % (kind, t) return re.sub(r"\((anonymous|unnamed) .*\)", "?_%s" % h, t, count=1) def fix_type_conversion(f, t, cxx, errs): if not errs: return t # type t might be a prototype, a structured type, or a # "complex" type (as opposed to simple) in which an unknown # type (denoted ut hereafter) as been replaced by 'int'. # Typename ut is fully provided in errs but # unfortunately, type t might contain several 'int' keywords, # some of which being really 'ints' and not the result of the # ut->int replacement. # In a previous version of ccrawl, we used a trick: since ut # is known by catching error messages, we'd add a fake typedef # string in a private include and then recompile our file. # The drawback was that we'd need several recompilations. # In this version we will detect which ints have been replaced # and switch them back to ut... if conf.DEBUG: secho("fix_type_conversion:", fg="yellow") if re.search(r"(?<!\w)int(?!\w)", t): # there is at least one int occurence in t... candidates = [] for r in errs: if "unknown type" in r.spelling: candidates.append(re.findall(r"'(.*)'", r.spelling)[0]) elif "no type named" in r.spelling: l = re.findall(r"'(\w+)'", r.spelling) candidates.append("::".join(reversed(l))) elif "undeclared identifier" in r.spelling: l = re.findall(r"'(\w+)'", r.spelling) candidates.append("::".join(reversed(l)) + "~") if not candidates: return t marks = [""] if conf.DEBUG: secho("candidates: %s" % candidates, fg="magenta") # for every occurence of int type in t: T = [x for x in f.get_tokens()] fixbitfield = "" for _ in re.finditer(r"(?<!\w)int(?!\w)", t): # lets see if this was diag-ed has an 'unknown type' error: # now we only need to replace some 'int' token be ut in t... # to be extracting the missing types # from the errs and replacing the 'int' identifier in t by its # corresponding type. Either based on error location (column) or # by counting 'int' occurences within f's tokens up to the point # where the type string is located... while len(T) > 0: x = T.pop(0) if conf.DEBUG: secho("%s: %s" % (x.kind, x.spelling), fg="red") if x.kind == TokenKind.KEYWORD: if x.spelling == "int": marks.append("int") break elif x.kind == TokenKind.IDENTIFIER: for c in candidates: if x.spelling in c: if c.endswith("~"): c = c[:-1] while len(T) > 0 and T[0].spelling == "::": x = T.pop(0) x = T.pop(0) c += "::%s" % (x.spelling) marks.append(c) break elif x.kind == TokenKind.PUNCTUATION and x.spelling == ":": fixbitfield = "#{}".format(T[0].spelling) st = re.split(r"(?<!\w)int(?!\w)", t) d = len(st) - len(marks) if d > 0: marks = marks + (["int"] * d) ct = "" for m, s in zip(marks, st): ct = ct + m + s t = ct + fixbitfield if conf.DEBUG: echo("\t" * g_indent + "type: %s" % t) return t # ccrawl 'parse' function(s), wrapper of clang index.parse; # ------------------------------------------------------------------------------
[docs]def parse(filename, args=None, unsaved_files=None, options=None, kind=None, tag=None, config=None): """ Function that parses the input filename and returns the dictionary of name:object extracted from this C or C++ file. """ # clang parser cindex options: if options is None: # (detailed processing allows to get macros in iterated cursors) options = TranslationUnit.PARSE_NONE options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD options |= TranslationUnit.PARSE_INCOMPLETE options |= TranslationUnit.PARSE_INCLUDE_BRIEF_COMMENTS_IN_CODE_COMPLETION # (preprocessor options not exported in the python bindings): RetainExcludedConditionalBlocks = 0x8000 KeepGoing = 0x200 #options |= RetainExcludedConditionalBlocks options |= KeepGoing if args is None: # mandatory if only libclang python binding is installed, since then the llvm-headers # are probably missing we need to use the builtin modulemap: _args = [ "-ferror-limit=0", "-fmodules", "-fbuiltin-module-map", ] else: _args = args[:] if config is None: try: config = conf.config.Collect except AttributeError: config = conf.Collect(c=None) cxx_args = ["-x", "c++", "-std=c++11", "-fno-delayed-template-parsing"] if config.cxx: if filename.endswith(".hpp") or filename.endswith(".cpp"): _args.extend(cxx_args) cxx = "c++" in _args if not config.strict: # in non strict mode, we allow missing includes fd, depf = tempfile.mkstemp(prefix="ccrawl-") os.close(fd) _args += ["-M", "-MG", "-MF%s" % depf] if conf.DEBUG: echo("\nfilename: %s, args: %s" % (filename, _args)) if unsaved_files is None: # unsaved files are also used to replace existing files by these if the # filename matches, # TODO: allowing to "preload" headers like stddef.h for example... unsaved_files = [] if kind is None: kind = CHandlers else: for k in kind: assert k in CHandlers if config.allc is False: options |= TranslationUnit.PARSE_SKIP_FUNCTION_BODIES defs = OrderedDict() index = Index.create() # call clang parser: try: tu = index.parse(filename, _args, unsaved_files, options) for err in tu.diagnostics: if conf.DEBUG: secho(err.format(), fg="yellow") if err.severity == 3: # common errors when parsing c++ as c: if ("expected ';'" in err.spelling) or ("'namespace'" in err.spelling): if "asm block" in err.spelling: continue if config.cxx: if conf.DEBUG: secho("reparse as c++ input...",fg="cyan") cxx = True tu = index.parse(filename, _args + cxx_args, unsaved_files, options) break elif config.skipcxx: secho("[c++]".rjust(12), fg="yellow") if conf.DEBUG: echo("includes:") for t in tu.get_includes(): secho((" "*t.depth)+t.include.name,fg="yellow") return [] elif err.severity == 4: # this should not happen anymore thanks to -M -MG opts... # we keep it here just in case. if conf.VERBOSE: secho(err.format(), bg="red", err=True) raise ValueError except Exception: if not conf.QUIET: secho("[err]", fg="red") if conf.VERBOSE: secho("clang index.parse error", fg="red", err=True) return [] else: if conf.VERBOSE: echo(":") if not config.strict: os.remove(depf) # walk down all AST to get all top-level cursors: pool = [(c, []) for c in tu.cursor.get_children()] #name = str(tu.cursor.extent.start.file.name) diag = {} for r in tu.diagnostics: if selected_errs(r): if not r.location.file.name in diag: diag[r.location.file.name] = defaultdict(list) diag[r.location.file.name][r.location.line].append(r) # map diagnostics to cursors: for cur, errs in pool: if cur.location.file is None or (cur.location.file.name not in diag): continue span = range(cur.extent.start.line, cur.extent.end.line + 1) if cur.location.line not in span: span = range(cur.location.line, cur.location.line + 1) for l in span: errs.extend(diag.get(cur.location.file.name, None)[l]) # now finally call the handlers: for cur, errs in pool: if conf.DEBUG and cur.location.file: echo("-" * 80) echo("%s: %s [%d errors]" % (cur.kind, cur.spelling, len(errs))) if cur.kind in kind: kv = CHandlers[cur.kind](cur, cxx, errs) # fill defs with collected cursors: if kv: ident, cobj = kv if cobj: for x in cobj.to_db(ident, tag, cur.location.file.name): defs[x["id"]] = x if not conf.QUIET: secho(("[%3d]" % len(defs)).rjust(12), fg="green" if not cxx else "cyan") for i in diag_get_missing(filename, tu): secho(" %s"%i,fg="red") for i in diag_get_incs(filename, tu): secho(" %s"%i[0],fg="magenta") return defs.values()
[docs]def parse_string(s, args=None, options=0, tag=None, config=None): """Crawl wrapper to parse an input string rather than file.""" # create a tmp filename (file can be removed immediately) fd, tmph = tempfile.mkstemp(prefix="ccrawl-", suffix=".h") os.close(fd) os.remove(tmph) return parse(tmph, args, [(tmph, s)], options, tag=tag, config=config)
def selected_errs(r): if ( "unknown type name" in r.spelling or "use of undeclared identifier" in r.spelling or "type specifier missing" in r.spelling or "has incomplete" in r.spelling or "no type named" in r.spelling or "function cannot return function type" in r.spelling or "no template named" in r.spelling ): return True else: return False def parse_debug(filename, cxx=False): old = conf.DEBUG conf.DEBUG = True options = TranslationUnit.PARSE_NONE options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD options |= TranslationUnit.PARSE_INCOMPLETE options |= TranslationUnit.PARSE_INCLUDE_BRIEF_COMMENTS_IN_CODE_COMPLETION ## (preprocessor options not exported in the python bindings): RetainExcludedConditionalBlocks = 0x8000 KeepGoing = 0x200 #options |= RetainExcludedConditionalBlocks options |= KeepGoing _args = [ "-ferror-limit=0", "-fmodules", "-fbuiltin-module-map", ] _args += ["-M", "-MG", "-MF%s" % ".depf"] if cxx: _args += ["-x", "c++", "-std=c++11", "-fno-delayed-template-parsing"] _args += [ "-I.", "-I./xxx", "-I./other", ] unsaved_files = [] index = Index.create() if conf.DEBUG: echo(_args) tu = index.parse(filename, _args, unsaved_files, options) for err in tu.diagnostics: secho(err.format(), fg="yellow") pool = [(c, []) for c in tu.cursor.get_children()] #name = str(tu.cursor.extent.start.file.name) diag = {} for r in tu.diagnostics: if selected_errs(r): if not r.location.file.name in diag: diag[r.location.file.name] = defaultdict(list) diag[r.location.file.name][r.location.line].append(r) for cur, errs in pool: if cur.location.file is None or (cur.location.file.name not in diag): continue span = range(cur.extent.start.line, cur.extent.end.line + 1) if cur.location.line not in span: span = range(cur.location.line, cur.location.line + 1) for l in span: errs.extend(diag.get(cur.location.file.name, None)[l]) defs = OrderedDict() for cur, errs in pool: if conf.DEBUG and cur.location.file: echo("-" * 80) echo("%s: %s [%d errors]" % (cur.kind, cur.spelling, len(errs))) if cur.kind in CHandlers: kv = CHandlers[cur.kind](cur, cxx, errs) if kv: ident, cobj = kv if cobj: for x in cobj.to_db(ident, "debug", cur.location.file.name): defs[x["id"]] = x conf.DEBUG = old return pool, defs def deepflatten(cur, ltypes=Iterable): r = cur.get_children() while True: try: c = next(r) except StopIteration: break else: sub = c.get_children() r = chain(sub, r) yield c def preprocess(files,args=None): if conf.DEBUG: import pprint echo("") pprint.pp(files) G = graphs.Graph() V = {} F = [] M = [""] # create all vertices for files: # f is a relative path to the file from current dir for f in files: basename = os.path.basename(f) # a vertex will hold the relative path, so a vertex # maps exactly to file, v = graphs.Vertex(f) # the dict V holds the vertices by their basenames, # so each value can be a list (for example a/x.h and b/x.h # are remembered as V["x.h"] = [Vertex("a/x.h"), Vertex("b/x.h")] if basename not in V: V[basename] = [v] else: V[basename].append(v) if conf.DEBUG: echo("vertex: %s (basename: %s)"%(v.data,basename)) G.add_vertex(v) # F is just the list of files out of the 'files' set, # to make sure we parseincludes them in the order that was provided... F.append((f,v)) # now try to "link" them based on inclusion: for (filename,v) in F: missing,incs = parseincludes(filename,args) for i in missing: bni = os.path.basename(i) dni = os.path.dirname(i) E = [] if bni in V: for x in V[bni]: I = os.path.dirname(x.data) if I.endswith(dni): j = I.rfind(dni) e = graphs.Edge(v,x,data="-I%s"%I[:j]) e.data = e.data.rstrip('/') E.append(e) else: if conf.DEBUG: secho("vertex '%s' doesn't end with %s ??"%(I,dni),fg='red') if len(E)>1: M.append("multiple include found for '%s'..."%i) if len(E)>0: e = E[0] if conf.DEBUG: secho("%s -> %s : '%s'"%(e.v[0].data, e.v[1].data, e.data), fg='magenta') G.add_edge(e) else: M.append("missing include file for '%s'"%i) for (n,i) in incs: bni = os.path.basename(n) dni = os.path.dirname(n) try: e = graphs.Edge(v,V[os.path.basename(n)][0],data="-I%s"%dni) if conf.DEBUG: secho("%s -> %s : '%s'"%(e.v[0].data, e.v[1].data, e.data)) G.add_edge(e) except Exception: if i.startswith("<"): M.append("system file '%s' is used"%n) else: M.append("file '%s' was included but is filtered out"%bni) # echo preprocessing messages: if (not conf.QUIET) and len(M)>1: secho('\n '.join(M),fg='yellow') # finally, walk the graph to order the files and define their include path: FILES = {} for g in G.C: for r in g.roots(): FILES[r.data] = list(set((e.data for e in g.sE))) return (FILES,G) def diag_get_missing(filename,tu): missing = [] for err in tu.diagnostics: if err.category_number==1: l = str(err.format()) sta = l.find("error: ") sto = l.find(" file not found") if sta==-1 or sto==-1: continue if l[:len(filename)]==filename: missing.append(l[sta+8:sto-1]) return missing def diag_get_incs(filename,tu): incs = [] for t in tu.get_includes(): if t.depth==1: x = tu.get_extent(filename, [(t.location.line,1),(t.location.line+1,1)]) toks = tu.get_tokens(extent=x) next(toks) next(toks) inc = next(toks).spelling if inc=='<': while inc[-1]!='>': inc += next(toks).spelling incs.append((t.include.name,inc)) return incs def parseincludes(filename,args=None): KeepGoing = 0x200 options = TranslationUnit.PARSE_INCOMPLETE | KeepGoing if args is None: _args = ["-ferror-limit=0","-fmodules","-fbuiltin-module-map"] else: _args = args if conf.config.Collect.cxx: cxx_args = ["-x", "c++", "-std=c++11"] if filename.endswith(".hpp") or filename.endswith(".cpp"): _args.extend(cxx_args) cxx = "c++" in _args unsaved_files = [] index = Index.create() if conf.DEBUG: echo("parseincludes(%s)..."%filename,nl="") tu = index.parse(filename, _args, unsaved_files, options) # get all missing includes from diagnostics: missing = diag_get_missing(filename,tu) # get all found includes: incs = diag_get_incs(filename,tu) if conf.DEBUG: echo("done.") secho("missing:%s"%missing,fg='cyan') secho("incs :%s"%incs,fg='green') return (missing,incs)