root/branches/mk/cheesecake/codeparser.py

Revision 37, 6.4 kB (checked in by mk, 7 years ago)

Check docstrings for use of javadoc (closes ticket #12).

  • Property svn:executable set to *
Line 
1 import os
2 import re
3
4 from model import System, Module, Class, Function, parseFile, processModuleAst
5
6
7 def compile_regex(pattern, user_map=None):
8     """Compile a regex pattern using default or user mapping.
9     """
10
11     # Word in reST can also contain hyphens and punctuation characters.
12     mapping = {'ALPHA': r'[-.,?!\w]', 'WORD': r'[-.,?!\s\w]',
13                        'START': r'(^|\s)', 'END': r'([.,?!\s]|$)'}
14
15     if user_map:
16         mapping = mapping.copy()
17         mapping.update(user_map)
18
19     def sub(text, mapping):
20         for From, To in mapping.iteritems():
21             text = text.replace(From, To)
22         return text
23
24     pattern = sub(pattern, mapping)
25
26     return re.compile(pattern, re.LOCALE | re.VERBOSE)
27
28 def inline_markup(start, end=None, mapping=None):
29     if end is None:
30         end = start
31     return compile_regex(r'''(START  %(start)s  ALPHA  %(end)s  END) |
32            (START  %(start)s  ALPHA  WORD*  ALPHA  %(end)s  END)'''\
33                          % {'start': start, 'end': end}, mapping)
34
35 def line_markup(start, end=None):
36     return inline_markup(start, end, mapping={'ALPHA': r'[-.,?!\s\w]',
37                                               'START': r'(\n|^)[\ \t]*',
38                                               'END': r''})
39
40 supported_formats = {
41     # reST refrence: http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html
42     'reST': [
43         inline_markup(r'\*'), # emphasis
44         inline_markup(r'\*\*'), # strong
45         inline_markup(r'``'), # inline
46         inline_markup(r'\(', r'_\)', # hyperlink
47                       {'ALPHA': r'\w', 'WORD': r'[-.\w]'}),
48         inline_markup(r'\(`', r'`_\)'), # long hyperlink
49         line_markup(r':'), # field
50         line_markup(r'[*+-]', r''), # unordered list
51         line_markup(r'((\d+) | ([a-zA-Z]+) [.\)])', r''), # ordered list
52         line_markup(r'\(  ((\d+)  |  ([a-zA-Z]+))  \)', r''), # ordered list
53     ],
54
55     # epytext reference: http://epydoc.sourceforge.net/epytext.html
56     'epytext': [
57         re.compile(r'[BCEGILMSUX]\{.*\}'), # inline elements
58         line_markup(r'@[a-z]+([\ \t][a-zA-Z]+)?:', r''), # fields
59         line_markup(r'-', r''), # unordered list
60         line_markup(r'\d+(\.\d+)*', r''), # ordered list
61     ],
62
63     # javadoc reference: http://java.sun.com/j2se/1.4.2/docs/tooldocs/solaris/javadoc.html
64     'javadoc': [
65         re.compile(r'<[a-zA-z]+[^>]*>'), # HTML elements
66         line_markup(r'@[a-z][a-zA-Z]*\s', r''), # normal tags
67         re.compile(r'{@  ((docRoot) | (inheritDoc) | (link) | (linkplain) |'\
68                     ' (value))  [^}]*  }', re.VERBOSE), # special tags
69     ],
70 }
71
72
73 def use_format(text, format):
74     """Return True if text includes given documentation format
75     and False otherwise.
76
77     See supported_formats for list of known formats.
78     """
79     for pattern in supported_formats[format]:
80         if re.search(pattern, text):
81             return True
82
83     return False
84
85
86 class CodeParser(object):
87     """Information about the structure of a Python module.
88
89     * Collects modules, classes, methods, functions and associated docstrings
90     * Based on mwh's docextractor.model module
91     """
92     def __init__(self, pyfile, log=None):
93         """
94         :Parameters:
95           `pyfile` : str
96               Path to a Python module to parse.
97           `log` : logger.Producer instance
98               Logger to use during code parsing.
99         """
100         if log:
101             self.log = log.codeparser
102         else:
103             import logger
104             self.log = logger.default.codeparser
105         self.modules = []
106         self.classes = []
107         self.methods = []
108         self.method_func = []
109         self.functions = []
110         self.docstrings = [] # objects that have docstrings
111         self.docstrings_by_format = {}
112
113         # Initialize lists of format docstrings.
114         for format in supported_formats:
115             self.docstrings_by_format[format] = []
116
117         (path, filename) = os.path.split(pyfile)
118         (module, ext) = os.path.splitext(filename)
119         self.log("Inspecting file: " + pyfile)
120
121         self.system = System()
122         try:
123             processModuleAst(parseFile(pyfile), module, self.system)
124         except:
125             return
126
127         for obj in self.system.orderedallobjects:
128             fullname = obj.fullName()
129             if isinstance(obj, Module):
130                 self.modules.append(fullname)
131             if isinstance(obj, Class):
132                 self.classes.append(fullname)
133             if isinstance(obj, Function):
134                 self.method_func.append(fullname)
135             if isinstance(obj.docstring, str) and obj.docstring.strip():
136                 self.docstrings.append(fullname)
137                 # Check docstring for known documenation formats.
138                 for format in supported_formats:
139                     if use_format(obj.docstring, format):
140                         self.docstrings_by_format[format].append(fullname)
141
142         for method_or_func in self.method_func:
143             method_found = 0
144             for cls in self.classes:
145                 if method_or_func.startswith(cls):
146                     self.methods.append(method_or_func)
147                     method_found = 1
148                     break
149             if not method_found:
150                 self.functions.append(method_or_func)
151                
152         self.log("modules: " + ",".join(self.modules))
153         self.log("classes: " + ",".join(self.classes))
154         self.log("methods: " + ",".join(self.methods))
155         self.log("functions: " + ",".join(self.functions))
156         self.log("docstrings: %s" % self.docstrings_by_format)
157
158     def object_count(self):
159         """Return number of objects found in this module.
160
161         Objects include:
162         * module
163         * classes
164         * methods
165         * functions
166         """
167         module_count = len(self.modules)
168         cls_count = len(self.classes)
169         method_count = len(self.methods)
170         func_count = len(self.functions)
171         return module_count + cls_count + method_count + func_count
172
173     def docstring_count(self):
174         """Return number of docstrings found in this module
175         """
176         return len(self.docstrings)
177
178     def docstring_count_by_type(self, type):
179         """Return number of reST docstrings found in this module
180         """
181         return len(self.docstrings_by_format[type])
182
183     def functions_called(self):
184         """
185         Return list of functions called by functions/methods
186         defined in this module
187         """
188         return self.system.func_called.keys()
Note: See TracBrowser for help on using the browser.