root/trunk/cheesecake/pep8.py

Revision 159, 25.2 kB (checked in by grig, 5 years ago)

Added pep8.py from Johann Rocholl and corresponding IndexPEP8 as part of the code kwalitee index.

Corrected minor misspellings.

  • Property svn:executable set to *
Line 
1 #!/usr/bin/python
2 # pep8.py - Check Python source code formatting, according to PEP 8
3 # Copyright (C) 2006 Johann C. Rocholl <johann@browsershots.org>
4 #
5 # Permission is hereby granted, free of charge, to any person
6 # obtaining a copy of this software and associated documentation files
7 # (the "Software"), to deal in the Software without restriction,
8 # including without limitation the rights to use, copy, modify, merge,
9 # publish, distribute, sublicense, and/or sell copies of the Software,
10 # and to permit persons to whom the Software is furnished to do so,
11 # subject to the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 # SOFTWARE.
24
25 """
26 Check Python source code formatting, according to PEP 8:
27 http://www.python.org/dev/peps/pep-0008/
28
29 For usage and a list of options, try this:
30 $ python pep8.py -h
31
32 This program and its regression test suite live here:
33 http://svn.browsershots.org/trunk/devtools/pep8/
34 http://trac.browsershots.org/browser/trunk/devtools/pep8/
35
36 Groups of errors and warnings:
37 E errors
38 W warnings
39 100 indentation
40 200 whitespace
41 300 blank lines
42 400 imports
43 500 line length
44 600 deprecation
45
46 You can add checks to this program by writing plugins. Each plugin is
47 a simple function that is called for each line of source code, either
48 physical or logical.
49
50 Physical line:
51 - Raw line of text from the input file.
52
53 Logical line:
54 - Multi-line statements converted to a single line.
55 - Stripped left and right.
56 - Contents of strings replaced with 'xxx' of same length.
57 - Comments removed.
58
59 The check function requests physical or logical lines by the name of
60 the first argument:
61
62 def maximum_line_length(physical_line)
63 def extraneous_whitespace(logical_line)
64 def indentation(logical_line, indent_level, state)
65
66 The last example above demonstrates how check plugins can request
67 additional information with extra arguments. All attributes of the
68 Checker object are available. Some examples:
69
70 lines: a list of the raw lines from the input file
71 tokens: the tokens that contribute to this logical line
72 state: dictionary for passing information across lines
73 indent_level: indentation (with tabs expanded to multiples of 8)
74
75 The docstring of each check function shall be the relevant part of
76 text from PEP 8. It is printed if the user enables --show-pep8.
77
78 """
79
80 import os
81 import sys
82 import re
83 import time
84 import inspect
85 import tokenize
86 from optparse import OptionParser
87 from keyword import iskeyword
88 from fnmatch import fnmatch
89
90 __version__ = '0.2.0'
91 __revision__ = '$Rev: 930 $'
92
93 default_exclude = '.svn,CVS,*.pyc,*.pyo'
94
95 indent_match = re.compile(r'([ \t]*)').match
96 raise_comma_match = re.compile(r'raise\s+\w+\s*(,)').match
97
98 operators = """
99 +  -  *  /  %  ^  &  |  =  <  >  >>  <<
100 += -= *= /= %= ^= &= |= == <= >= >>= <<=
101 != <> :
102 in is or not and
103 """.split()
104
105 options = None
106 args = None
107
108
109 ##############################################################################
110 # Plugins (check functions) for physical lines
111 ##############################################################################
112
113
114 def tabs_or_spaces(physical_line, state):
115     """
116     Never mix tabs and spaces.
117
118     The most popular way of indenting Python is with spaces only.  The
119     second-most popular way is with tabs only.  Code indented with a mixture
120     of tabs and spaces should be converted to using spaces exclusively.  When
121     invoking the Python command line interpreter with the -t option, it issues
122     warnings about code that illegally mixes tabs and spaces.  When using -tt
123     these warnings become errors.  These options are highly recommended!
124     """
125     indent = indent_match(physical_line).group(1)
126     if not indent:
127         return
128     if 'indent_char' in state:
129         indent_char = state['indent_char']
130     else:
131         indent_char = indent[0]
132         state['indent_char'] = indent_char
133     for offset, char in enumerate(indent):
134         if char != indent_char:
135             return offset, "E101 indentation contains mixed spaces and tabs"
136
137
138 def tabs_obsolete(physical_line):
139     """
140     For new projects, spaces-only are strongly recommended over tabs.  Most
141     editors have features that make this easy to do.
142     """
143     indent = indent_match(physical_line).group(1)
144     if indent.count('\t'):
145         return indent.index('\t'), "W191 indentation contains tabs"
146
147
148 def trailing_whitespace(physical_line):
149     """
150     JCR: Trailing whitespace is superfluous.
151     """
152     physical_line = physical_line.rstrip('\n') # chr(10), newline
153     physical_line = physical_line.rstrip('\r') # chr(13), carriage return
154     physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
155     stripped = physical_line.rstrip()
156     if physical_line != stripped:
157         return len(stripped), "W291 trailing whitespace"
158
159
160 def maximum_line_length(physical_line):
161     """
162     Limit all lines to a maximum of 79 characters.
163
164     There are still many devices around that are limited to 80 character
165     lines; plus, limiting windows to 80 characters makes it possible to have
166     several windows side-by-side.  The default wrapping on such devices looks
167     ugly.  Therefore, please limit all lines to a maximum of 79 characters.
168     For flowing long blocks of text (docstrings or comments), limiting the
169     length to 72 characters is recommended.
170     """
171     length = len(physical_line.rstrip())
172     if length > 79:
173         return 79, "E501 line too long (%d characters)" % length
174
175
176 ##############################################################################
177 # Plugins (check functions) for logical lines
178 ##############################################################################
179
180
181 def blank_lines(logical_line, state, indent_level):
182     """
183     Separate top-level function and class definitions with two blank lines.
184
185     Method definitions inside a class are separated by a single blank line.
186
187     Extra blank lines may be used (sparingly) to separate groups of related
188     functions.  Blank lines may be omitted between a bunch of related
189     one-liners (e.g. a set of dummy implementations).
190
191     Use blank lines in functions, sparingly, to indicate logical sections.
192     """
193     line = logical_line
194     blank_lines = state.get('blank_lines', 0)
195     if line.startswith('def '):
196         if indent_level > 0 and blank_lines != 1:
197             return 0, "E301 expected 1 blank line, found %d" % blank_lines
198         if indent_level == 0 and blank_lines != 2:
199             return 0, "E302 expected 2 blank lines, found %d" % blank_lines
200     if blank_lines > 2:
201         return 0, "E303 too many blank lines (%d)" % blank_lines
202
203
204 def extraneous_whitespace(logical_line):
205     """
206     Avoid extraneous whitespace in the following situations:
207
208     - Immediately inside parentheses, brackets or braces.
209
210     - Immediately before a comma, semicolon, or colon.
211     """
212     line = logical_line
213     for char in '([{':
214         found = line.find(char + ' ')
215         if found > -1:
216             return found + 1, "E201 whitespace after '%s'" % char
217     for char in '}])':
218         found = line.find(' ' + char)
219         if found > -1 and line[found - 1] != ',':
220             return found, "E202 whitespace before '%s'" % char
221     for char in ',;:':
222         found = line.find(' ' + char)
223         if found > -1:
224             return found, "E203 whitespace before '%s'" % char
225
226
227 def indentation(logical_line, indent_level, state):
228     """
229     Use 4 spaces per indentation level.
230
231     For really old code that you don't want to mess up, you can continue to
232     use 8-space tabs.
233     """
234     line = logical_line
235     previous_level = state.get('indent_level', 0)
236     indent_expect = state.get('indent_expect', False)
237     state['indent_expect'] = line.rstrip('#').rstrip().endswith(':')
238     indent_char = state.get('indent_char', ' ')
239     state['indent_level'] = indent_level
240     if indent_char == ' ' and indent_level % 4:
241         return 0, "E111 indentation is not a multiple of four"
242     if indent_expect and indent_level <= previous_level:
243         return 0, "E112 expected an indented block"
244     if not indent_expect and indent_level > previous_level:
245         return 0, "E113 unexpected indentation"
246
247
248 def whitespace_before_parameters(logical_line, tokens):
249     """
250     Avoid extraneous whitespace in the following situations:
251
252     - Immediately before the open parenthesis that starts the argument
253       list of a function call.
254
255     - Immediately before the open parenthesis that starts an indexing or
256       slicing.
257     """
258     prev_type = tokens[0][0]
259     prev_text = tokens[0][1]
260     prev_end = tokens[0][3]
261     for index in range(1, len(tokens)):
262         token_type, text, start, end, line = tokens[index]
263         if (token_type == tokenize.OP and
264             text in '([' and
265             start != prev_end and
266             prev_type == tokenize.NAME and
267             (index < 2 or tokens[index - 2][1] != 'class') and
268             (not iskeyword(prev_text))):
269             return prev_end, "E211 whitespace before '%s'" % text
270         prev_type = token_type
271         prev_text = text
272         prev_end = end
273
274
275 def whitespace_around_operator(logical_line):
276     """
277     Avoid extraneous whitespace in the following situations:
278
279     - More than one space around an assignment (or other) operator to
280       align it with another.
281     """
282     line = logical_line
283     for operator in operators:
284         found = line.find('  ' + operator)
285         if found > -1:
286             return found, "E221 multiple spaces before operator"
287         found = line.find('\t' + operator)
288         if found > -1:
289             return found, "E222 tab before operator"
290
291
292 def imports_on_separate_lines(logical_line):
293     """
294     Imports should usually be on separate lines.
295     """
296     line = logical_line
297     if line.startswith('import '):
298         found = line.find(',')
299         if found > -1:
300             return found, "E401 multiple imports on one line"
301
302
303 def python_3000_has_key(logical_line):
304     """
305     The {}.has_key() method will be removed in the future version of
306     Python. Use the 'in' operation instead, like:
307     d = {"a": 1, "b": 2}
308     if "b" in d:
309         print d["b"]
310     """
311     pos = logical_line.find('.has_key(')
312     if pos > -1:
313         return pos, "W601 .has_key() is deprecated, use 'in'"
314
315
316 def python_3000_raise_comma(logical_line):
317     """
318     When raising an exception, use "raise ValueError('message')"
319     instead of the older form "raise ValueError, 'message'".
320
321     The paren-using form is preferred because when the exception arguments
322     are long or include string formatting, you don't need to use line
323     continuation characters thanks to the containing parentheses.  The older
324     form will be removed in Python 3000.
325     """
326     match = raise_comma_match(logical_line)
327     if match:
328         return match.start(1), "W602 deprecated form of raising exception"
329
330
331 ##############################################################################
332 # Helper functions
333 ##############################################################################
334
335
336 def expand_indent(line):
337     """
338     Return the amount of indentation.
339     Tabs are expanded to the next multiple of 8.
340
341     >>> expand_indent('    ')
342     4
343     >>> expand_indent('\\t')
344     8
345     >>> expand_indent('    \\t')
346     8
347     >>> expand_indent('       \\t')
348     8
349     >>> expand_indent('        \\t')
350     16
351     """
352     result = 0
353     for char in line:
354         if char == '\t':
355             result = result / 8 * 8 + 8
356         elif char == ' ':
357             result += 1
358         else:
359             break
360     return result
361
362
363 ##############################################################################
364 # Framework to run all checks
365 ##############################################################################
366
367
368 def message(text):
369     """Print a message."""
370     # print >> sys.stderr, options.prog + ': ' + text
371     # print >> sys.stderr, text
372     print text
373
374
375 def find_checks(argument_name):
376     """
377     Find all globally visible functions where the first argument name
378     starts with argument_name.
379     """
380     checks = []
381     function_type = type(find_checks)
382     for name, function in globals().iteritems():
383         if type(function) is function_type:
384             args = inspect.getargspec(function)[0]
385             if len(args) >= 1 and args[0].startswith(argument_name):
386                 checks.append((name, function, args))
387     checks.sort()
388     return checks
389
390
391 def mute_string(text):
392     """
393     Replace contents with 'xxx' to prevent syntax matching.
394
395     >>> mute_string('"abc"')
396     '"xxx"'
397     >>> mute_string("'''abc'''")
398     "'''xxx'''"
399     >>> mute_string("r'abc'")
400     "r'xxx'"
401     """
402     start = 1
403     end = len(text) - 1
404     # String modifiers (e.g. u or r)
405     if text.endswith('"'):
406         start += text.index('"')
407     elif text.endswith("'"):
408         start += text.index("'")
409     # Triple quotes
410     if text.endswith('"""') or text.endswith("'''"):
411         start += 2
412         end -= 2
413     return text[:start] + 'x' * (end - start) + text[end:]
414
415
416 class Checker:
417     """
418     Load a Python source file, tokenize it, check coding style.
419     """
420
421     def __init__(self, filename):
422         self.filename = filename
423         self.lines = file(filename).readlines()
424         self.physical_checks = find_checks('physical_line')
425         self.logical_checks = find_checks('logical_line')
426         options.counters['physical lines'] = \
427             options.counters.get('physical lines', 0) + len(self.lines)
428
429     def readline(self):
430         """
431         Get the next line from the input buffer.
432         """
433         self.line_number += 1
434         if self.line_number > len(self.lines):
435             return ''
436         return self.lines[self.line_number - 1]
437
438     def readline_check_physical(self):
439         """
440         Check and return the next physical line. This method can be
441         used to feed tokenize.generate_tokens.
442         """
443         line = self.readline()
444         self.check_physical(line)
445         return line
446
447     def run_check(self, check, argument_names):
448         """
449         Run a check plugin.
450         """
451         arguments = []
452         for name in argument_names:
453             arguments.append(getattr(self, name))
454         return check(*arguments)
455
456     def check_physical(self, line):
457         """
458         Run all physical checks on a raw input line.
459         """
460         self.physical_line = line
461         for name, check, argument_names in self.physical_checks:
462             result = self.run_check(check, argument_names)
463             if result is not None:
464                 offset, text = result
465                 self.report_error(self.line_number, offset, text, check)
466
467     def build_tokens_line(self):
468         """
469         Build a logical line from tokens.
470         """
471         self.mapping = []
472         logical = []
473         length = 0
474         previous = None
475         for token in self.tokens:
476             token_type, text = token[0:2]
477             if token_type in (tokenize.COMMENT, tokenize.NL,
478                               tokenize.INDENT, tokenize.DEDENT,
479                               tokenize.NEWLINE):
480                 continue
481             if token_type == tokenize.STRING:
482                 text = mute_string(text)
483             if previous:
484                 end_line, end = previous[3]
485                 start_line, start = token[2]
486                 if end_line != start_line: # different row
487                     if self.lines[end_line - 1][end - 1] not in '{[(':
488                         logical.append(' ')
489                         length += 1
490                 elif end != start: # different column
491                     fill = self.lines[end_line - 1][end:start]
492                     logical.append(fill)
493                     length += len(fill)
494             self.mapping.append((length, token))
495             logical.append(text)
496             length += len(text)
497             previous = token
498         self.logical_line = ''.join(logical)
499
500     def check_logical(self):
501         """
502         Build a line from tokens and run all logical checks on it.
503         """
504         options.counters['logical lines'] = \
505             options.counters.get('logical lines', 0) + 1
506         self.build_tokens_line()
507         first_line = self.lines[self.mapping[0][1][2][0] - 1]
508         indent = first_line[:self.mapping[0][1][2][1]]
509         self.indent_level = expand_indent(indent)
510         if options.verbose >= 2:
511             print self.logical_line[:80].rstrip()
512         for name, check, argument_names in self.logical_checks:
513             if options.verbose >= 3:
514                 print '   ', name
515             result = self.run_check(check, argument_names)
516             if result is not None:
517                 offset, text = result
518                 if type(offset) is tuple:
519                     original_number, original_offset = offset
520                 else:
521                     for token_offset, token in self.mapping:
522                         if offset >= token_offset:
523                             original_number = token[2][0]
524                             original_offset = (token[2][1]
525                                                + offset - token_offset)
526                 self.report_error(original_number, original_offset,
527                                   text, check)
528
529     def check_all(self):
530         """
531         Run all checks on the input file.
532         """
533         self.file_errors = 0
534         self.line_number = 0
535         self.state = {'blank_lines': 0}
536         self.tokens = []
537         parens = 0
538         for token in tokenize.generate_tokens(self.readline_check_physical):
539             # print tokenize.tok_name[token[0]], repr(token)
540             self.tokens.append(token)
541             token_type, text = token[0:2]
542             if token_type == tokenize.OP and text in '([{':
543                 parens += 1
544             if token_type == tokenize.OP and text in '}])':
545                 parens -= 1
546             if token_type == tokenize.NEWLINE and not parens:
547                 self.check_logical()
548                 self.state['blank_lines'] = 0
549                 self.tokens = []
550             if token_type == tokenize.NL and len(self.tokens) == 1:
551                 self.state['blank_lines'] += 1
552                 self.tokens = []
553         return self.file_errors
554
555     def report_error(self, line_number, offset, text, check):
556         """
557         Report an error, according to options.
558         """
559         if options.quiet == 1 and not self.file_errors:
560             message(self.filename)
561         self.file_errors += 1
562         code = text[:4]
563         options.counters[code] = options.counters.get(code, 0) + 1
564         options.messages[code] = text[5:]
565         if options.quiet:
566             return
567         if options.testsuite:
568             base = os.path.basename(self.filename)[:4]
569             if base == code:
570                 return
571             if base[0] == 'E' and code[0] == 'W':
572                 return
573         if ignore_code(code):
574             return
575         if options.counters[code] == 1 or options.repeat:
576             message("%s:%s:%d: %s" %
577                     (self.filename, line_number, offset + 1, text))
578             if options.show_source:
579                 line = self.lines[line_number - 1]
580                 message(line.rstrip())
581                 message(' ' * offset + '^')
582             if options.show_pep8:
583                 message(check.__doc__.lstrip('\n').rstrip())
584
585
586 def input_file(filename):
587     """
588     Run all checks on a Python source file.
589     """
590     if excluded(filename) or not filename_match(filename):
591         return {}
592     if options.verbose:
593         message('checking ' + filename)
594     options.counters['files'] = options.counters.get('files', 0) + 1
595     errors = Checker(filename).check_all()
596     if options.testsuite and not errors:
597         message("%s: %s" % (filename, "no errors found"))
598
599
600 def input_dir(dirname):
601     """
602     Check all Python source files in this directory and all subdirectories.
603     """
604     dirname = dirname.rstrip('/')
605     if excluded(dirname):
606         return
607     for root, dirs, files in os.walk(dirname):
608         if options.verbose:
609             message('directory ' + root)
610         options.counters['directories'] = \
611             options.counters.get('directories', 0) + 1
612         dirs.sort()
613         for subdir in dirs:
614             if excluded(subdir):
615                 dirs.remove(subdir)
616         files.sort()
617         for filename in files:
618             input_file(os.path.join(root, filename))
619
620
621 def excluded(filename):
622     """
623     Check if options.exclude contains a pattern that matches filename.
624     """
625     for pattern in options.exclude:
626         if fnmatch(filename, pattern):
627             return True
628
629
630 def filename_match(filename):
631     """
632     Check if options.filename contains a pattern that matches filename.
633     If options.filename is unspecified, this always returns True.
634     """
635     if not options.filename:
636         return True
637     for pattern in options.filename:
638         if fnmatch(filename, pattern):
639             return True
640
641
642 def ignore_code(code):
643     """
644     Check if options.ignore contains a prefix of the error code.
645     """
646     for ignore in options.ignore:
647         if code.startswith(ignore):
648             return True
649
650
651 def get_error_statistics():
652     """Get error statistics."""
653     return get_statistics("E")
654
655
656 def get_warning_statistics():
657     """Get warning statistics."""
658     return get_statistics("W")
659
660
661 def get_statistics(prefix=''):
662     """
663     Get statistics for message codes that start with the prefix.
664
665     prefix='' matches all errors and warnings
666     prefix='E' matches all errors
667     prefix='W' matches all warnings
668     prefix='E4' matches all errors that have to do with imports
669     """
670     stats = []
671     keys = options.messages.keys()
672     keys.sort()
673     for key in keys:
674         if key.startswith(prefix):
675             stats.append('%-7s %s %s' %
676                          (options.counters[key], key, options.messages[key]))
677     return stats
678
679
680 def print_statistics(prefix=''):
681     """Print overall statistics (number of errors and warnings)."""
682     for line in get_statistics(prefix):
683         print line
684
685
686 def print_benchmark(elapsed):
687     """
688     Print benchmark numbers.
689     """
690     print '%-7.2f %s' % (elapsed, 'seconds elapsed')
691     keys = ['directories', 'files',
692             'logical lines', 'physical lines']
693     for key in keys:
694         if key in options.counters:
695             print '%-7d %s per second (%d total)' % (
696                 options.counters[key] / elapsed, key,
697                 options.counters[key])
698
699
700 def process_options(arglist=None):
701     """
702     Process options passed either via arglist or via command line args.
703     """
704     global options, args
705     usage = "%prog [options] input ..."
706     parser = OptionParser(usage)
707     parser.add_option('-v', '--verbose', default=0, action='count',
708                       help="print status messages, or debug with -vv")
709     parser.add_option('-q', '--quiet', default=0, action='count',
710                       help="report only file names, or nothing with -qq")
711     parser.add_option('--exclude', metavar='patterns', default=default_exclude,
712                       help="skip matches (default %s)" % default_exclude)
713     parser.add_option('--filename', metavar='patterns',
714                       help="only check matching files (e.g. *.py)")
715     parser.add_option('--ignore', metavar='errors', default='',
716                       help="skip errors and warnings (e.g. E4,W)")
717     parser.add_option('--repeat', action='store_true',
718                       help="show all occurrences of the same error")
719     parser.add_option('--show-source', action='store_true',
720                       help="show source code for each error")
721     parser.add_option('--show-pep8', action='store_true',
722                       help="show text of PEP 8 for each error")
723     parser.add_option('--statistics', action='store_true',
724                       help="count errors and warnings")
725     parser.add_option('--benchmark', action='store_true',
726                       help="measure processing speed")
727     parser.add_option('--testsuite', metavar='dir',
728                       help="run regression tests from dir")
729     parser.add_option('--doctest', action='store_true',
730                       help="run doctest on myself")
731     options, args = parser.parse_args(arglist)
732     if options.testsuite:
733         args.append(options.testsuite)
734     if len(args) == 0:
735         parser.error('input not specified')
736     options.prog = os.path.basename(sys.argv[0])
737     options.exclude = options.exclude.split(',')
738     for index in range(len(options.exclude)):
739         options.exclude[index] = options.exclude[index].rstrip('/')
740     if options.filename:
741         options.filename = options.filename.split(',')
742     if options.ignore:
743         options.ignore = options.ignore.split(',')
744     else:
745         options.ignore = []
746     options.counters = {}
747     options.messages = {}
748
749     return options, args
750
751 def _main():
752     """
753     Parse options and run checks on Python source.
754     """
755     options, args = process_options()
756     if options.doctest:
757         import doctest
758         return doctest.testmod()
759     start_time = time.time()
760     for path in args:
761         if os.path.isdir(path):
762             input_dir(path)
763         else:
764             input_file(path)
765     elapsed = time.time() - start_time
766     if options.statistics:
767         print_statistics()
768     if options.benchmark:
769         print_benchmark(elapsed)
770
771
772 if __name__ == '__main__':
773     _main()
Note: See TracBrowser for help on using the browser.