root/branches/mk/cheesecake/model.py

Revision 83, 22.1 kB (checked in by mk, 7 years ago)

Imported newest version of model.py from pydoctor.
Adapted UnitTests? index to new interface (closes tickets #40 and #41).

Line 
1 """
2 Code borrowed from Michael Hudson's docextractor package with the author's
3 permission.
4
5 The original code is available at http://codespeak.net/svn/user/mwh/docextractor/.
6
7 Changes:
8   * do not print warnings to stdout (in System.warning)
9   * collect all function calls
10 """
11
12
13 from compiler import ast
14 import sys
15 import os
16 import cPickle as pickle
17 import __builtin__
18 import sets
19
20 from compiler.transformer import parse, parseFile
21 from compiler.visitor import walk
22
23 import ast_pp
24
25
26 def get_call_name(node):
27     assert isinstance(node, ast.CallFunc)
28
29     def get_name(node):
30         if isinstance(node, ast.Name):
31             return node.name
32         elif isinstance(node, str):
33             return node
34         elif isinstance(node, tuple):
35             if len(node) == 1:
36                 return node[0]
37             else:
38                 return "%s.%s" % (get_name(node[:-1][0]), node[-1])
39         elif isinstance(node, ast.Getattr):
40             return get_name(node.asList())
41         else:
42             raise TypeError("Bad function name type: %s." % node)
43
44     return get_name(node.node)
45
46 def get_function_calls(node, fc):
47     if not isinstance(node, ast.Node):
48         return
49
50     for child in node.getChildren():
51         if isinstance(child, ast.CallFunc):
52             func_called = get_call_name(child)
53             fc[func_called] = 1
54
55         get_function_calls(child, fc)
56
57
58 class Documentable(object):
59     def __init__(self, system, prefix, name, docstring, parent=None):
60         self.system = system
61         self.prefix = prefix
62         self.name = name
63         self.docstring = docstring
64         self.parent = parent
65         self.setup()
66     def setup(self):
67         self.contents = {}
68         self.orderedcontents = []
69         self._name2fullname = {}
70     def fullName(self):
71         return self.prefix + self.name
72     def shortdocstring(self):
73         docstring = self.docstring
74         if docstring:
75             docstring = docstring.rstrip()
76             if len(docstring) > 20:
77                 docstring = docstring[:8] + '...' + docstring[-8:]
78         return docstring
79     def __repr__(self):
80         return "%s %r"%(self.__class__.__name__, self.fullName())
81     def name2fullname(self, name):
82         if name in self._name2fullname:
83             return self._name2fullname[name]
84         else:
85             return self.parent.name2fullname(name)
86
87     def resolveDottedName(self, dottedname, verbose=False):
88         parts = dottedname.split('.')
89         obj = self
90         system = self.system
91         while parts[0] not in obj._name2fullname:
92             obj = obj.parent
93             if obj is None:
94                 if parts[0] in system.allobjects:
95                     obj = system.allobjects[parts[0]]
96                     break
97                 for othersys in system.moresystems:
98                     if parts[0] in othersys.allobjects:
99                         obj = othersys.allobjects[parts[0]]
100                         break
101                 else:
102                     if verbose:
103                         print "1 didn't find %r from %r"%(dottedname,
104                                                       self.fullName())
105                     return None
106                 break
107         else:
108             fn = obj._name2fullname[parts[0]]
109             if fn in system.allobjects:
110                 obj = system.allobjects[fn]
111             else:
112                 if verbose:
113                     print "1.5 didn't find %r from %r"%(dottedname,
114                                                         self.fullName())
115                 return None
116         for p in parts[1:]:
117             if p not in obj.contents:
118                 if verbose:
119                     print "2 didn't find %r from %r"%(dottedname,
120                                                       self.fullName())
121                 return None
122             obj = obj.contents[p]
123         if verbose:
124             print dottedname, '->', obj.fullName(), 'in', self.fullName()
125         return obj
126
127     def dottedNameToFullName(self, dottedname):
128         if '.' not in dottedname:
129             start, rest = dottedname, ''
130         else:
131             start, rest = dottedname.split('.', 1)
132             rest = '.' + rest
133         obj = self
134         while start not in obj._name2fullname:
135             obj = obj.parent
136             if obj is None:
137                 return dottedname
138         return obj._name2fullname[start] + rest
139
140     def __getstate__(self):
141         # this is so very, very evil.
142         # see doc/extreme-pickling-pain.txt for more.
143         r = {}
144         for k, v in self.__dict__.iteritems():
145             if isinstance(v, Documentable):
146                 r['$'+k] = v.fullName()
147             elif isinstance(v, list) and v:
148                 for vv in v:
149                     if vv is not None and not isinstance(vv, Documentable):
150                         r[k] = v
151                         break
152                 else:
153                     rr = []
154                     for vv in v:
155                         if vv is None:
156                             rr.append(vv)
157                         else:
158                             rr.append(vv.fullName())
159                     r['@'+k] = rr
160             elif isinstance(v, dict) and v:
161                 for vv in v.itervalues():
162                     if not isinstance(vv, Documentable):
163                         r[k] = v
164                         break
165                 else:
166                     rr = {}
167                     for kk, vv in v.iteritems():
168                         rr[kk] = vv.fullName()
169                     r['!'+k] = rr
170             else:
171                 r[k] = v
172         return r
173
174 class Package(Documentable):
175     kind = "Package"
176     def name2fullname(self, name):
177         raise NameError
178
179
180 class Module(Documentable):
181     kind = "Module"
182     def name2fullname(self, name):
183         if name in self._name2fullname:
184             return self._name2fullname[name]
185         elif name in __builtin__.__dict__:
186             return name
187         else:
188             self.system.warning("optimistic name resolution", name)
189             return name
190
191
192 class Class(Documentable):
193     kind = "Class"
194     def setup(self):
195         super(Class, self).setup()
196         self.bases = []
197         self.rawbases = []
198         self.baseobjects = []
199         self.subclasses = []
200
201
202 class Function(Documentable):
203     kind = "Function"
204
205
206 class ModuleVistor(object):
207     def __init__(self, system, modname):
208         self.system = system
209         self.modname = modname
210         self.morenodes = []
211
212     def default(self, node):
213         for child in node.getChildNodes():
214             self.visit(child)
215
216     def postpone(self, docable, node):
217         self.morenodes.append((docable, node))
218
219     def visitModule(self, node):
220         if self.system.current and self.modname in self.system.current.contents:
221             m = self.system.current.contents[self.modname]
222             assert m.docstring is None
223             m.docstring = node.doc
224             self.system.push(m, node)
225             self.default(node)
226             self.system.pop(m)
227         else:
228             if not self.system.current:
229                 roots = [x for x in self.system.rootobjects if x.name == self.modname]
230                 if roots:
231                     mod, = roots
232                     self.system.push(mod, node)
233                     self.default(node)
234                     self.system.pop(mod)
235                     return
236             self.system.pushModule(self.modname, node.doc)
237             self.default(node)
238             self.system.popModule()
239
240     def visitClass(self, node):
241         cls = self.system.pushClass(node.name, node.doc)
242         if node.lineno is not None:
243             cls.linenumber = node.lineno
244         for n in node.bases:
245             str_base = ast_pp.pp(n)
246             cls.rawbases.append(str_base)
247             base = cls.dottedNameToFullName(str_base)
248             cls.bases.append(base)
249         self.default(node)
250         self.system.popClass()
251
252     def visitFrom(self, node):
253         modname = expandModname(self.system, node.modname)
254         name2fullname = self.system.current._name2fullname
255         for fromname, asname in node.names:
256             if fromname == '*':
257                 self.system.warning("import *", modname)
258                 if modname not in self.system.allobjects:
259                     return
260                 mod = self.system.allobjects[modname]
261                 # this might fail if you have an import-* cycle, or if
262                 # you're just not running the import star finder to
263                 # save time (not that this is possibly without
264                 # commenting stuff out yet, but...)
265                 if isinstance(mod, Package):
266                     self.system.warning("import * from a package", modname)
267                     return
268                 if mod.processed:
269                     for n in mod.contents:
270                         name2fullname[n] = modname + '.' + n
271                 else:
272                     self.system.warning("unresolvable import *", modname)
273                 return
274             if asname is None:
275                 asname = fromname
276             name2fullname[asname] = modname + '.' + fromname
277
278     def visitImport(self, node):
279         name2fullname = self.system.current._name2fullname
280         for fromname, asname in node.names:
281             fullname = expandModname(self.system, fromname)
282             if asname is None:
283                 asname = fromname.split('.', 1)[0]
284                 # aaaaargh! python sucks.
285                 parts = fullname.split('.')
286                 for i, part in enumerate(fullname.split('.')[::-1]):
287                     if part == asname:
288                         fullname = '.'.join(parts[:len(parts)-i])
289                         name2fullname[asname] = fullname
290                         break
291                 else:
292                     name2fullname[asname] = '.'.join(parts)
293             else:
294                 name2fullname[asname] = fullname
295
296     def visitFunction(self, node):
297         fc = {}
298         get_function_calls(node, fc)
299         func = self.system.pushFunction(node.name, node.doc, fc)
300         if node.lineno is not None:
301             func.linenumber = node.lineno
302         # ast.Function has a pretty lame representation of
303         # arguments. Let's convert it to a nice concise format
304         # somewhat like what inspect.getargspec returns
305         argnames = node.argnames[:]
306         kwname = starargname = None
307         if node.kwargs:
308             kwname = argnames.pop(-1)
309         if node.varargs:
310             starargname = argnames.pop(-1)
311         defaults = []
312         for default in node.defaults:
313             try:
314                 defaults.append(ast_pp.pp(default))
315             except (KeyboardInterrupt, SystemExit):
316                 raise
317             except Exception, e:
318                 self.system.warning("unparseable default", "%s: %s %r"%(e.__class__.__name__,
319                                                                        e, default))
320                 defaults.append('???')
321         # argh, convert unpacked-arguments from tuples to lists,
322         # because that's what getargspec uses and the unit test
323         # compares it
324         argnames2 = []
325         for argname in argnames:
326             if isinstance(argname, tuple):
327                 argname = list(argname)
328             argnames2.append(argname)
329         func.argspec = (argnames2, starargname, kwname, tuple(defaults))
330         self.postpone(func, node.code)
331         self.system.popFunction()
332
333 states = [
334     'blank',
335     'preparse',
336     'importstarred',
337     'parsed',
338     'finalized',
339     ]
340
341
342 class System(object):
343     Class = Class
344     Module = Module
345     Package = Package
346     Function = Function
347     ModuleVistor = ModuleVistor
348
349     def __init__(self):
350         self.current = None
351         self._stack = []
352         self.allobjects = {}
353         self.orderedallobjects = []
354         self.rootobjects = []
355         self.warnings = {}
356         # importstargraph contains edges {importer:[imported]} but only
357         # for import * statements
358         self.importstargraph = {}
359         self.func_called = {}
360         self.state = 'blank'
361         self.packages = []
362         self.moresystems = []
363         self.urlprefix = ''
364
365     def _push(self, cls, name, docstring):
366         if self.current:
367             prefix = self.current.fullName() + '.'
368             parent = self.current
369         else:
370             prefix = ''
371             parent = None
372         obj = cls(self, prefix, name, docstring, parent)
373         if parent:
374             parent.orderedcontents.append(obj)
375             parent.contents[name] = obj
376             parent._name2fullname[name] = obj.fullName()
377         else:
378             self.rootobjects.append(obj)
379         self.current = obj
380         self.orderedallobjects.append(obj)
381         fullName = obj.fullName()
382         #print 'push', cls.__name__, fullName
383         if fullName in self.allobjects:
384             obj = self.handleDuplicate(obj)
385         else:
386             self.allobjects[obj.fullName()] = obj
387         return obj
388
389     def handleDuplicate(self, obj):
390         '''This is called when we see two objects with the same
391         .fullName(), for example:
392
393         class C:
394             if something:
395                 def meth(self):
396                     implementation 1
397             else:
398                 def meth(self):
399                     implementation 2
400
401         The default is that the second definition "wins".
402         '''
403         i = 0
404         fn = obj.fullName()
405         while (fn + ' ' + str(i)) in self.allobjects:
406             i += 1
407         prev = self.allobjects[obj.fullName()]
408         prev.name = obj.name + ' ' + str(i)
409         self.allobjects[prev.fullName()] = prev
410         self.warning("duplicate", self.allobjects[obj.fullName()])
411         self.allobjects[obj.fullName()] = obj
412         return obj
413
414
415     def _pop(self, cls):
416         assert isinstance(self.current, cls)
417 ##         if self.current.parent:
418 ##             print 'pop', self.current.fullName(), '->', self.current.parent.fullName()
419 ##         else:
420 ##             print 'pop', self.current.fullName(), '->', self.current.parent
421         self.current = self.current.parent
422
423     def push(self, obj, node=None):
424         self._stack.append(self.current)
425         self.current = obj
426
427     def pop(self, obj):
428         assert self.current is obj, "%r is not %r"%(self.current, obj)
429         self.current = self._stack.pop()
430
431     def pushClass(self, name, docstring):
432         return self._push(self.Class, name, docstring)
433     def popClass(self):
434         self._pop(self.Class)
435
436     def pushModule(self, name, docstring):
437         return self._push(self.Module, name, docstring)
438     def popModule(self):
439         self._pop(self.Module)
440
441     def pushFunction(self, name, docstring, func_called):
442         self.func_called.update(func_called)
443         return self._push(self.Function, name, docstring)
444     def popFunction(self):
445         self._pop(self.Function)
446
447     def pushPackage(self, name, docstring):
448         return self._push(self.Package, name, docstring)
449     def popPackage(self):
450         self._pop(self.Package)
451
452     def report(self):
453         for o in self.rootobjects:
454             self._report(o, '')
455
456     def _report(self, o, indent):
457         print indent, o
458         for o2 in o.orderedcontents:
459             self._report(o2, indent+'  ')
460
461     def resolveAlias(self, n):
462         if '.' not in n:
463             return n
464         mod, clsname = n.split('.')
465         if not mod or mod not in self.allobjects:
466             return n
467         m = self.allobjects[mod]
468         if not isinstance(m, Module):
469             return n
470         if clsname in m._name2fullname:
471             newname = m.name2fullname(clsname)
472             if newname not in self.allobjects:
473                 return self.resolveAlias(newname)
474             else:
475                 return newname
476
477     def resolveAliases(self):
478         for ob in self.orderedallobjects:
479             if not isinstance(ob, Class):
480                 continue
481             for i, b in enumerate(ob.bases):
482                 if b not in self.allobjects:
483                     ob.bases[i] = self.resolveAlias(b)
484
485     def warning(self, type, detail):
486         if self.current is not None:
487             fn = self.current.fullName()
488         else:
489             fn = '<None>'
490         self.warnings.setdefault(type, []).append((fn, detail))
491
492     def objectsOfType(self, cls):
493         for o in self.orderedallobjects:
494             if isinstance(o, cls):
495                 yield o
496
497     def finalStateComputations(self):
498         self.recordBasesAndSubclasses()
499
500     def recordBasesAndSubclasses(self):
501         for cls in self.objectsOfType(Class):
502             for n in cls.bases:
503                 o = cls.parent.resolveDottedName(n)
504                 cls.baseobjects.append(o)
505                 if o:
506                     o.subclasses.append(cls)
507
508     def __getstate__(self):
509         state = self.__dict__.copy()
510         del state['moresystems']
511         return state
512
513     def __setstate__(self, state):
514         self.moresystems = []
515         # this is so very, very evil.
516         # see doc/extreme-pickling-pain.txt for more.
517         self.__dict__.update(state)
518         for obj in self.orderedallobjects:
519             for k, v in obj.__dict__.copy().iteritems():
520                 if k.startswith('$'):
521                     del obj.__dict__[k]
522                     obj.__dict__[k[1:]] = self.allobjects[v]
523                 elif k.startswith('@'):
524                     n = []
525                     for vv in v:
526                         if vv is None:
527                             n.append(None)
528                         else:
529                             n.append(self.allobjects[vv])
530                     del obj.__dict__[k]
531                     obj.__dict__[k[1:]] = n
532                 elif k.startswith('!'):
533                     n = {}
534                     for kk, vv in v.iteritems():
535                         n[kk] = self.allobjects[vv]
536                     del obj.__dict__[k]
537                     obj.__dict__[k[1:]] = n
538
539
540 def expandModname(system, modname, givewarning=True):
541     c = system.current
542     if '.' in modname:
543         prefix, suffix = modname.split('.', 1)
544         suffix = '.' + suffix
545     else:
546         prefix, suffix = modname, ''
547     while c is not None and not isinstance(c, Package):
548         c = c.parent
549     while c is not None:
550         if prefix in c.contents:
551             break
552         c = c.parent
553     if c is not None:
554         if givewarning:
555             system.warning("local import", modname)
556         return c.contents[prefix].fullName() + suffix
557     else:
558         return prefix + suffix
559
560 class ImportStarFinder(object):
561     def __init__(self, system, modfullname):
562         self.system = system
563         self.modfullname = modfullname
564
565     def visitFrom(self, node):
566         if node.names[0][0] == '*':
567             modname = expandModname(self.system, node.modname, False)
568             self.system.importstargraph.setdefault(
569                 self.modfullname, []).append(modname)
570
571 def processModuleAst(ast, name, system):
572     mv = system.ModuleVistor(system, name)
573     walk(ast, mv)
574     while mv.morenodes:
575         obj, node = mv.morenodes.pop(0)
576         system.push(obj, node)
577         mv.visit(node)
578         system.pop(obj)
579
580
581 def fromText(src, modname='<test>', system=None):
582     if system is None:
583         _system = System()
584     else:
585         _system = system
586     processModuleAst(parse(src), modname, _system)
587     if system is None:
588         _system.finalStateComputations()
589     return _system.rootobjects[0]
590
591
592 def preprocessDirectory(system, dirpath):
593     assert system.state in ['blank', 'preparse']
594     if os.path.basename(dirpath):
595         package = system.pushPackage(os.path.basename(dirpath), None)
596     else:
597         package = None
598     for fname in os.listdir(dirpath):
599         fullname = os.path.join(dirpath, fname)
600         if os.path.isdir(fullname) and os.path.exists(os.path.join(fullname, '__init__.py')) and fname != 'test':
601             preprocessDirectory(system, fullname)
602         elif fname.endswith('.py'):
603             modname = os.path.splitext(fname)[0]
604             mod = system.pushModule(modname, None)
605             mod.filepath = fullname
606             mod.processed = False
607             system.popModule()
608     if package:
609         system.popPackage()
610     system.state = 'preparse'
611
612 def findImportStars(system):
613     assert system.state in ['preparse']
614     modlist = list(system.objectsOfType(Module))
615     for mod in modlist:
616         system.push(mod.parent)
617         isf = ImportStarFinder(system, mod.fullName())
618         try:
619             ast = parseFile(mod.filepath)
620         except (SyntaxError, ValueError):
621             system.warning("cannot parse", mod.filepath)
622         walk(ast, isf)
623         system.pop(mod.parent)
624     system.state = 'importstarred'
625
626 def extractDocstrings(system):
627     assert system.state in ['preparse', 'importstarred']
628     # and so much more...
629     modlist = list(system.objectsOfType(Module))
630     newlist = toposort([m.fullName() for m in modlist], system.importstargraph)
631
632     for mod in newlist:
633         mod = system.allobjects[mod]
634         system.push(mod.parent)
635         try:
636             ast = parseFile(mod.filepath)
637         except (SyntaxError, ValueError):
638             system.warning("cannot parse", mod.filepath)
639         processModuleAst(ast, mod.name, system)
640         mod.processed = True
641         system.pop(mod.parent)
642     system.state = 'parsed'
643
644 def finalStateComputations(system):
645     assert system.state in ['parsed']
646     system.finalStateComputations()
647     system.state = 'finalized'
648
649 def processDirectory(system, dirpath):
650     preprocessDirectory(system, dirpath)
651     findImportStars(system)
652     extractDocstrings(system)
653     finalStateComputations(system)
654
655 def toposort(input, edges):
656     # this doesn't detect cycles in any clever way.
657     output = []
658     input = dict.fromkeys(input)
659     def p(i):
660         for j in edges.get(i, []):
661             if j in input:
662                 del input[j]
663                 p(j)
664         output.append(i)
665     while input:
666         p(input.popitem()[0])
667     return output
668
669
670 def main(systemcls, argv):
671     if '-r' in argv:
672         argv.remove('-r')
673         assert len(argv) == 1
674         system = systemcls()
675         processDirectory(system, argv[0])
676         pickle.dump(system, open('da.out', 'wb'), pickle.HIGHEST_PROTOCOL)
677         print
678         print 'warning summary:'
679         for k, v in system.warnings.iteritems():
680             print k, len(v)
681     else:
682         system = systemcls()
683         for fname in argv:
684             modname = os.path.splitext(os.path.basename(fname))[0] # XXX!
685             processModuleAst(parseFile(fname), modname, system)
686         system.report()
687
688
689
690 if __name__ == '__main__':
691     main(System, sys.argv[1:])
Note: See TracBrowser for help on using the browser.