import pyparsing as pp
# ccrawl low-level utilities:
# ------------------------------------------------------------------------------
struct_letters = {
"...": None,
"void": "?",
"char": "s",
"short": "h",
"int": "i",
"long": "l",
"long long": "q",
"float": "f",
"double": "d",
"ssize_t": "n",
"size_t": "N",
"wchar_t": "L",
}
# C and C++ type declaration parsers:
# ------------------------------------------------------------------------------
# notes:
# this part of ccrawl was a coding nightmare...I was aware that parsing C is
# difficult and this was precisely why I'd use clang. Still, libclang's AST
# only provides the C type string. I first thought it was going to be easy to
# correctly parse this "simple" subpart of C...well, its not. And for C++ its
# even worse! Try playing with cdecl.org and see how funny this can be ;)
#
# ccrawl C type parser is implemented with below 'nested_c' pyparsing object.
# It captures nested parenthesis expressions that allows to define complex C
# types that represent pointer-to array-of ... function prototypes returning a
# C type.
#
# definitions for objecttype --------------------------------------------------
# the elementary type related to the parsed string.
# define 'raw' types:
unsigned = pp.Keyword("unsigned") | pp.Keyword("signed")
const = pp.Keyword("const")
volatile = pp.Keyword("volatile")
noexcept = pp.Keyword("noexcept")
prefix = pp.ZeroOrMore(pp.Or((const, volatile, unsigned)))
cvqual = pp.Or((const, volatile, const + volatile, noexcept))
T = [pp.Keyword(t) for t in struct_letters]
rawtypes = pp.Optional(prefix) + pp.Or(T)
# define pointer indicators:
pstars = pp.Group(pp.Regex(r"\*+") + pp.Optional(const, default=""))
ampers = pp.Regex("&+")
# define structured types (struct,union,enum):
symbol = pp.Regex(r"[?]?[A-Za-z_:<>][A-Za-z0-9_:<>$]*")
structured = pp.oneOf("struct union enum class")
strucdecl = pp.Optional(prefix) + pp.Optional(structured) + symbol
# define objecttype:
objecttype = pp.Or([rawtypes, strucdecl])
# define arrays:
intp = pp.Regex(r"[1-9][0-9]*")
intp.setParseAction(lambda r: int(r[0]))
bitfield = pp.Optional(prefix) + symbol + pp.Suppress("#") + intp
arraydecl = pp.Suppress("[") + intp + pp.Suppress("]")
arrazdecl = pp.Suppress("[") + pp.Or((intp, symbol)) + pp.Suppress("]")
pointer = pp.Optional(pstars, default="") + pp.Optional(arraydecl, default=0)
pointerxx = pp.Optional(ampers, default="") + pp.Optional(arrazdecl, default=0)
cvref = pp.Or((cvqual, ampers))
#
# definitions for nested_c ----------------------------------------------------
# nested_c captures "pointer to function/array" part of the declaration.
# this is the tricky part due to the nesting mix of pointer grouping vs.
# function prototyping using both parenthesis as delimiters!
nested_par = pp.nestedExpr(content=pp.Regex(r"[^()]+"), ignoreExpr=None)
nested_c = pp.OneOrMore(nested_par)
[docs]class c_type(object):
"""
The c_type object parses a C type string and decomposes it into
several parts.
The parser is implemented with below 'nested_c' pyparsing object.
It captures nested parenthesis expressions that allows to define complex C
types that represent pointer-to array-of ... function prototypes returning a
C type.
Attributes:
lbase (str): base typename
lbfw (int): type has a bitfield length (0 means type is not a bitfield)
lconst (bool): type has a 'const' keyword
unsigned (bool): type has an 'unsigned' keyword
volatile (bool): type has a 'volatile' keyword
pstack (list): list of "pointers stack" (see :ref:`pstack` function)
is_ptr (bool): True if the pstack contains a :class:`ptr` object.
dim (int): dimension if the type is an array (or 0.)
"""
def __init__(self, decl):
# get final element type:
bf = decl.rfind("#")
if bf > 0:
try:
x = bitfield.parseString(decl)
except Exception:
x, r = (pp.Group(objecttype) + pp.restOfLine).parseString(decl[:bf])
self.lbfw = 0
else:
r = ""
self.lbfw = x.pop()
else:
x, r = (pp.Group(objecttype) + pp.restOfLine).parseString(decl)
self.lbfw = 0
lbase = []
self.lconst = self.lunsigned = self.lvolatile = False
for w in x:
if w == "const":
self.lconst = True
elif w == "unsigned":
self.lunsigned = True
elif w == "signed":
pass
elif w == "volatile":
self.lvolatile = True
else:
lbase.append(w)
self.lbase = " ".join(lbase)
r = r.replace("[]", "*")
r = "(%s)" % r
try:
nest = nested_c.parseString(r).asList()[0]
except Exception as e:
print("c_type: error while parsing '%s'" % r)
raise e
self.pstack = pstack(nest, self.__class__)
@property
def is_ptr(self):
return ptr in [type(p) for p in self.pstack]
@property
def dim(self):
if self.pstack:
p = self.pstack[-1]
if isinstance(p, arr):
return p.a
return 0
def __repr__(self):
s = ["<%s" % self.__class__.__name__]
s.extend(reversed([str(p) for p in self.pstack]))
if self.lconst:
s.append("const ")
if self.lunsigned:
s.append("unsigned ")
s.append("{0.lbase}>".format(self))
return " ".join(s)
[docs] def show_base(self, kw=False, ns=False):
"""
returns the string that represents the base type
with possibly additional 'const' and 'unsigned'
keywords (if kw is True) and namespace(s) indicators
(if ns is True).
"""
s = [self.lbase]
if self.lunsigned:
s.insert(0, "unsigned")
if self.lconst:
s.insert(0, "const")
return " ".join(s)
[docs] def show_ptr(self, name):
"""
returns the string that represents the pointers stack,
with optional name parameter used as the name of the
function (in case of a prototype).
"""
s = name
stripok = False
for p in reversed(self.pstack):
if p.is_ptr:
s = "({}{})".format(p, s)
stripok = True
else:
s = "{}{}".format(s, str(p))
stripok = False
if stripok:
s = s[1:-1]
return s
[docs] def show(self, name=""):
"""
returns the string that represents full type with optional
name parameter for a function's prototype.
"""
extra = " : %d" % self.lbfw if self.lbfw else ""
s = ("%s %s" % (self.show_base(), self.show_ptr(name))).strip()
return s + extra
# C++ type declaration parser:
# ------------------------------------------------------------------------------
[docs]class cxx_type(c_type):
"""
cxx_type extends c_type with extracting the namespace parts of the fully
qualified name of the C++ type.
"""
def __init__(self, decl):
super().__init__(decl)
# get namespaces:
self.kw = ""
self.ns = ""
k = self.lbase.find(" ")
if k > 0:
self.kw = self.lbase[:k]
x = self.lbase.rfind("::")
if x > 0:
self.ns = self.lbase[k + 1 : x + 2]
@property
def is_method(self):
return fargs in [type(p) for p in self.pstack]
[docs] def show_base(self, kw=False, ns=False):
lbase = self.lbase
if not kw:
lbase = lbase.replace(self.kw, "", 1)
if not ns:
lbase = lbase.replace(self.ns, "", 1)
s = [lbase]
if self.lunsigned:
s.insert(0, "unsigned")
if self.lconst:
s.insert(0, "const")
return " ".join(s).strip()
[docs] def show_ptr(self, name):
s = name
stripok = False
for p in reversed(self.pstack):
if p.is_ptr:
s = "({}{})".format(p, s)
stripok = True
else:
s = "{}{}".format(s, str(p))
stripok = False
if stripok:
s = s[1:-1]
return s
[docs] def show(self, name="", kw=True, ns=True):
extra = " : %d" % self.lbfw if self.lbfw else ""
s = ("%s %s" % (self.show_base(kw, ns), self.show_ptr(name))).strip()
return s + extra
# ------------------------------------------------------------------------------
[docs]class ptr(object):
"""
Object that represents a series of pointer (aka stars) possibly with
additional 'const' keyword.
Attributes:
p (str): list of '*' chars that represent the C pointers
const (str): 'const' keyword or None.
"""
def __init__(self, p, c):
self.is_ptr = True
self.p, self.const = p, c
def __str__(self):
sfx = "%s " % self.const if self.const else ""
return "{}{}".format(self.p, sfx)
[docs]class arr(object):
"""
Object that represents an array indicator.
Attributes:
a (int): dimension of the array
"""
def __init__(self, a):
self.is_ptr = False
self.a = a
def __str__(self):
return "[%s]" % self.a
[docs]class fargs(object):
"""
Object that represents the arguments list of a function prototype.
Attributes:
f (str): the arguments part of a function prototype
args (list): the list of arguments
"""
def __init__(self, f):
self.is_ptr = False
self.f = f
@property
def args(self):
f = nested_par.parseString(self.f)
A = []
for x in f.asList()[0]:
if not isinstance(x, list):
A.extend(x.split(","))
else:
r = A.pop()
r += flatten(x)
A.append(r)
return list(filter(None, A))
def __str__(self):
if hasattr(self, "cvr"):
return "%s %s" % (self.f, self.cvr)
return self.f
[docs]def pstack(plist, cls=c_type):
"""returns the 'stack' of pointers-to array-N-of pointer-to
function() returning pointer to function() returning ..."""
cxx = cls == cxx_type
S = []
cvr = ""
if plist:
if not isinstance(plist[0], list):
# we are declaring either a pointer or array,
# or an array of pointers to previously stacked objs
p0 = plist[0]
p, a = pointer.parseString(p0)
if p:
S.append(ptr(*p))
if a:
S.append(arr(a))
if not (p or a):
if cxx:
r, a = pointerxx.parseString(p0)
if r:
S.append(ptr(r[0], ""))
if a:
S.append(arr(a))
plist.pop(0)
else:
S.append(fargs(flatten(plist)))
plist = []
else:
plist.pop(0)
if len(plist) == 1 and len(plist[0]) == 0:
S.append(fargs("()"))
return S
if len(plist) > 1:
r = plist.pop()
if not isinstance(r, list):
try:
r = arraydecl.parseString(r)[0]
S.append(arr(r))
except pp.ParseException:
if cxx:
cvr = cvref.parseString(r)[0]
else:
S.append(fargs(flatten(r)))
if plist:
if len(plist) == 1 and not cvr:
plist = plist[0]
S.extend(pstack(plist))
if cvr:
if len(S) > 0:
S[-1].cvr = cvr
else:
print("cvr %s but S is empty!" % cvr)
return S
def flatten(args):
s = []
for x in args:
if not isinstance(x, list):
s.append(x)
else:
s.append(flatten(x))
return "(%s)" % (" ".join(s))
def indent(txt, l=4):
L = []
for x in txt.split("\n"):
if x:
x = l + x
L.append(x)
return "\n".join(L)