root/branches/mk/cheesecake/cheesecake_index.py

Revision 41, 41.6 kB (checked in by mk, 7 years ago)

Changed description of --path command line option (following Will Guaraldi suggestion).

  • Property svn:executable set to
Line 
1 #!/usr/bin/env python
2 """
3 Cheesecake: How tasty is your code?
4
5 The idea of the Cheesecake project is to rank Python packages
6 based on various empiric "kwalitee" factors, such as:
7
8         * whether the package can be downloaded
9         * whether the package can be unpacked
10         * whether the package can be installed into an alternate directory
11         * existence of certain files such as README, INSTALL, LICENSE, setup.py etc.
12         * existence of certain directories such as doc, test, demo, examples
13         * percentage of modules/functions/classes/methods with docstrings
14         * percentage of functions/methods that are unit tested
15         * average pylint score for all non-test and non-demo modules
16         * whether the package can be unpacked
17         * whether the package can be installed into an alternate directory
18 """
19
20 import os, sys, re, shutil
21 import tarfile, zipfile
22 import tempfile
23 from optparse import OptionParser
24 from urllib import urlretrieve
25 from urlparse import urlparse
26 from math import ceil
27
28 from _util import run_cmd, pad_with_dots, pad_left_spaces, pad_msg, pad_line, command_successful
29 from _util import StdoutRedirector
30 import logger
31 from config import get_pkg_config
32 from codeparser import CodeParser
33
34 __docformat__ = 'reStructuredText en'
35
36
37 default_temp_directory = os.path.join(tempfile.gettempdir(),
38                                       'cheesecake_sandbox')
39
40 ################################################################################
41 ## Helpers.
42 ################################################################################
43
44 def isiterable(obj):
45     return hasattr(obj, '__iter__')
46
47 def has_extension(filename, ext):
48     """Check if filename has given extension.
49
50     >>> has_extension("foobar.py", ".py")
51     True
52     >>> has_extension("foo.bar.py", ".py")
53     True
54     >>> has_extension("foobar.pyc", ".py")
55     False
56     """
57     return os.path.splitext(filename)[1] == ext
58
59 def discover_file_type(filename):
60     """Discover type of a file according to its name and its parent directory.
61
62     Currently supported file types:
63         * pyc
64         * pyo
65         * module: .py files of an application
66         * demo: .py files for documentation/demonstration purposes
67         * test: .py files used for testing
68         * special: .py file for special purposes
69
70     :Note: This function only check file's name, and doesn't touch the
71            filesystem. If you have to, check if file exists by yourself.
72
73     >>> discover_file_type('module.py')
74     'module'
75     >>> discover_file_type('./setup.py')
76     'special'
77     >>> discover_file_type('some/directory/junk.pyc')
78     'pyc'
79     """
80     dirs = filename.split(os.path.sep)
81     dirs, filename = dirs[:-1], dirs[-1]
82
83     if filename in ["setup.py", "ez_setup.py", "__pkginfo__.py"]:
84         return 'special'
85
86     if has_extension(filename, ".pyc"):
87         return 'pyc'
88     if has_extension(filename, ".pyo"):
89         return 'pyo'
90     if has_extension(filename, ".py"):
91         for dir in dirs:
92             if dir in ['test', 'tests']:
93                 return 'test'
94             elif dir in ['docs', 'demo', 'example']:
95                 return 'demo'
96         return 'module'
97
98 def get_files_of_type(file_list, file_type):
99     """Return files from `file_list` that match given `file_type`.
100
101     >>> file_list = ['test/test_foo.py', 'setup.py', 'README', 'test/test_bar.py']
102     >>> get_files_of_type(file_list, 'test')
103     ['test/test_foo.py', 'test/test_bar.py']
104     """
105     return filter(lambda x: discover_file_type(x) == file_type, file_list)
106
107 def get_method_arguments(method):
108     """Return tuple of arguments for given method, excluding self.
109
110     >>> class Class:
111     ...     def method(s, arg1, arg2, other_arg):
112     ...         pass
113     >>> get_method_arguments(Class.method)
114     ('arg1', 'arg2', 'other_arg')
115     """
116     return method.func_code.co_varnames[1:method.func_code.co_argcount]
117
118 def get_attributes(obj, names):
119     """Return attributes dictionary with keys from `names`.
120
121     Object is queried for each attribute name, if it doesn't have this
122     attribute, default value None will be returned.
123
124     >>> class Class:
125     ...     pass
126     >>> obj = Class()
127     >>> obj.attr = True
128     >>> obj.value = 13
129     >>> obj.string = "Hello"
130
131     >>> d = get_attributes(obj, ['attr', 'string', 'other'])
132     >>> d == {'attr': True, 'string': "Hello", 'other': None}
133     True
134     """
135     attrs = {}
136
137     for name in names:
138         attrs[name] = getattr(obj, name, None)
139
140     return attrs
141
142 def camel2underscore(name):
143     """Convert name from CamelCase to underscore_name.
144
145     >>> camel2underscore('CamelCase')
146     'camel_case'
147     >>> camel2underscore('already_underscore_name')
148     'already_underscore_name'
149     >>> camel2underscore('BigHTMLClass')
150     'big_html_class'
151     >>> camel2underscore('')
152     ''
153     """
154     if name and name[0].upper:
155         name = name[0].lower() + name[1:]
156
157     def capitalize(match):
158         string = match.group(1).lower().capitalize()
159         return string[:-1] + string[-1].upper()
160
161     def underscore(match):
162         return '_' + match.group(1).lower()
163
164     name = re.sub(r'([A-Z]+)', capitalize, name)
165     return re.sub(r'([A-Z])', underscore, name)
166
167 def index_class_to_name(clsname):
168     """Covert index class name to index name.
169
170     >>> index_class_to_name("IndexDownload")
171     'download'
172     >>> index_class_to_name("IndexUnitTests")
173     'unit_tests'
174     >>> index_class_to_name("IndexPyPIDownload")
175     'py_pi_download'
176     """
177     return camel2underscore(clsname.replace('Index', '', 1))
178
179 def is_empty(path):
180     """Returns True if file or directory pointed by `path` is empty.
181     """
182     if os.path.isfile(path) and os.path.getsize(path) == 0:
183         return True
184     if os.path.isdir(path) and os.listdir(path) == []:
185         return True
186
187     return False
188
189 def strip_dir_part(path, root):
190     """Strip `root` part from `path`.
191
192     >>> strip_dir_part('/home/ruby/file', '/home')
193     'ruby/file'
194     >>> strip_dir_part('/home/ruby/file', '/home/')
195     'ruby/file'
196     >>> strip_dir_part('/home/ruby/', '/home')
197     'ruby/'
198     >>> strip_dir_part('/home/ruby/', '/home/')
199     'ruby/'
200     """
201     path = path.replace(root, '', 1)
202
203     if path.startswith(os.path.sep):
204         path = path[1:]
205
206     return path
207
208 def get_files_dirs_list(root):
209     """Return list of all files and directories below `root`.
210
211     Root directory is excluded from files/directories paths.
212     """
213     files = []
214     directories = []
215
216     for dirpath, dirnames, filenames in os.walk(root):
217         dirpath = strip_dir_part(dirpath, root)
218         files.extend(map(lambda x: os.path.join(dirpath, x), filenames))
219         directories.extend(map(lambda x: os.path.join(dirpath, x), dirnames))
220
221     return files, directories
222
223 ################################################################################
224 ## Main index class.
225 ################################################################################
226
227 class NameSetter(type):
228     def __init__(cls, name, bases, dict):
229         if 'name' not in dict:
230             setattr(cls, 'name', index_class_to_name(name))
231
232 def make_indices_dict(indices):
233     indices_dict = {}
234     for index in indices:
235         indices_dict[index.name] = index
236     return indices_dict
237
238 class Index(object):
239     """Class describing one index.
240
241     Use it as a container index or subclass to create custom indices.
242
243     During class initialization, special attribute `name` is magically
244     set based on class name. See `index_class_to_name` and `NameSetter`
245     definitions for details.
246     """
247     __metaclass__ = NameSetter
248
249     subindices = None
250
251     name = "unnamed"
252     value = -1
253     details = ""
254
255     def __init__(self, indices=[]):
256         if not self.subindices:
257             self.subindices = []
258
259         # Create dictionary for fast reference.
260         self._indices_dict = make_indices_dict(self.subindices)
261
262         for index in indices:
263             self.add_subindex(index)
264
265         self._compute_arguments = get_method_arguments(self.compute)
266
267     def _iter_indices(self):
268         """Iterate over each subindex and yield their values.
269         """
270         for index in self.subindices:
271             # Pass Cheesecake instance to other indices.
272             yield index.compute_with(self.cheesecake)
273             # Print index info after computing.
274             index.print_info()
275
276     def compute_with(self, cheesecake):
277         """Take given Cheesecake instance and compute index value.
278         """
279         self.cheesecake = cheesecake
280         return self.compute(**get_attributes(cheesecake, self._compute_arguments))
281
282     def compute(self):
283         """Compute index value and return it.
284
285         By default this method computes sum of all subindices. Override this
286         method when subclassing for different behaviour.
287
288         Parameters to this function are dynamically prepared with use of
289         `get_attributes` function.
290
291         :Warning: Don't use *args and **kwds arguments for this method.
292         """
293         self.value = sum(self._iter_indices())
294         return self.value
295
296     def _get_max_value(self):
297         if self.subindices:
298             return sum(map(lambda index: index.max_value,
299                            self.subindices))
300         return 0
301
302     max_value = property(_get_max_value)
303
304     def add_subindex(self, index):
305         """Add subindex.
306
307         :Parameters:
308           `index` : Index instance
309               Index instance for inclusion.
310         """
311         if not isinstance(index, Index):
312             raise ValueError("subindex have to be instance of Index")
313
314         self.subindices.append(index)
315         self._indices_dict[index.name] = index
316
317     def print_info(self):
318         """Print index name padded with dots, followed by value and details.
319         """
320         print "%s%s (%s)" % (pad_with_dots(self.name),
321                              pad_left_spaces(self.value),
322                              self.details)
323
324     def __getitem__(self, name):
325         return self._indices_dict[name]
326
327 class MegaIndex(Index):
328     """Index with special information schema, suitable for composite indices.
329     """
330     def print_info(self):
331         max_value = self.max_value
332         if max_value == 0:
333             return
334
335         percentage = int(ceil(float(self.value) / float(max_value) * 100))
336         print pad_line("-")
337
338         print pad_msg("%s INDEX (ABSOLUTE)" % self.name, self.value)
339         msg = pad_msg("%s INDEX (RELATIVE)" % self.name, percentage)
340         msg += " (%d out of a maximum of %d points is %d%%)" %\
341              (self.value, max_value, percentage)
342
343         print msg
344         print
345
346 ################################################################################
347 ## Installability index.
348 ################################################################################
349
350 class IndexUrlDownload(Index):
351     max_value = 25
352
353     def compute(self, downloaded_from_url, package, url):
354         if downloaded_from_url:
355             self.details = "downloaded package %s from URL %s"  % (package, url)
356             self.value = self.max_value
357         else:
358             self.value = 0
359
360         return self.value
361
362 class IndexUnpack(Index):
363     max_value = 25
364
365     def compute(self, unpacked):
366         if unpacked:
367             self.details = "package unpacked successfully"
368             self.value = self.max_value
369         else:
370             self.value = 0
371
372         return self.value
373
374 class IndexUnpackDir(Index):
375     max_value = 15
376
377     def compute(self, unpack_dir, original_package_name):
378         self.details = "unpack directory is " + unpack_dir
379
380         if original_package_name:
381             self.details += " instead of the expected " + original_package_name
382             self.value = 0
383         else:
384             self.details += " as expected"
385             self.value = self.max_value
386
387         return self.value
388
389 class IndexInstall(Index):
390     max_value = 50
391
392     def compute(self, installed, sandbox_install_dir):
393         if installed:
394             self.details = "package installed in %s" % sandbox_install_dir
395             self.value = self.max_value
396         else:
397             self.details = "could not install package in %s" % sandbox_install_dir
398             self.value = 0
399
400         return self.value
401
402 class IndexPyPIDownload(Index):
403     max_value = 50
404     distance_penalty = -5
405
406     def compute(self, package, found_on_cheeseshop, distance_from_pypi, download_url):
407         if download_url:
408             self.value = self.max_value
409
410             self.details = "downloaded package " + package
411
412             if not found_on_cheeseshop:
413                 self.value += distance_from_pypi * self.distance_penalty
414
415                 if distance_from_pypi:
416                     self.details += " following %d link" % distance_from_pypi
417                     if distance_from_pypi > 1:
418                         self.details += "s"
419                         self.details += " from PyPI"
420                     else:
421                         self.details += " from " + download_url
422             else:
423                 self.details += " directly from the Cheese Shop"
424         else:
425             self.value = 0
426
427         return self.value
428
429 class IndexGeneratedFiles(Index):
430     generated_files_penalty = -20
431
432     def compute(self, files_list):
433         self.value = 0
434
435         pyc_files = len(get_files_of_type(files_list, 'pyc'))
436         pyo_files = len(get_files_of_type(files_list, 'pyo'))
437
438         if pyc_files > 0 or pyo_files > 0:
439             self.value += self.generated_files_penalty
440
441         self.details = "%d .pyc and %d .pyo files found" % \
442                                   (pyc_files, pyo_files)
443
444         return self.value
445
446 class IndexInstallability(MegaIndex):
447     name = "INSTALLABILITY"
448
449     subindices = [
450         IndexUnpack(),
451         IndexUnpackDir(),
452         IndexInstall(),
453         IndexGeneratedFiles(),
454     ]
455
456 ################################################################################
457 ## Documentation index.
458 ################################################################################
459
460 def match_filename(name, rule):
461     """Check if `name` matches given `rule`.
462     """
463     def equal(x, y):
464         x_root, x_ext = os.path.splitext(x)
465         y_root, y_ext = os.path.splitext(y.lower())
466         if x_root in [y_root.lower(), y_root.upper(), y_root.capitalize()] \
467                and x_ext in [y_ext.lower(), y_ext.upper()]:
468             return True
469         return False
470
471     if isinstance(rule, basestring):
472         if equal(name, rule):
473             return True
474     elif isinstance(rule, OneOf) and not rule.used:
475         for poss in rule.possibilities:
476             if match_filename(name, poss):
477                 rule.used = True
478                 return True
479
480     return False
481
482 class OneOf(object):
483     def __init__(self, *possibilities):
484         self.possibilities = possibilities
485         self.used = False
486     def __str__(self):
487         return 'one of %s' % (self.possibilities,)
488
489 def WithOptionalExt(name, extensions):
490     """Handy way of writing Cheese rules for files with extensions.
491
492     Instead of writing:
493         >>> one_of = OneOf('readme', 'readme.html', 'readme.txt')
494
495     Write this:
496         >>> opt_ext = WithOptionalExt('readme', ['html', 'txt'])
497
498     It means the same! (representation have a meaning)
499         >>> str(one_of) == str(opt_ext)
500         True
501     """
502     possibilities = [name]
503     possibilities.extend(map(lambda x: name + '.' + x, extensions))
504
505     return OneOf(*possibilities)
506
507 def Doc(name):
508     return WithOptionalExt(name, ['html', 'txt'])
509
510 class IndexRequiredFiles(Index):
511     cheese_files = {
512         'setup.py': 15,
513         Doc('readme'): 15,
514         OneOf(Doc('license'), Doc('copying')): 15,
515
516         Doc('authors'): 10,
517         Doc('announce'): 10,
518         Doc('changelog'): 10,
519         Doc('faq'): 10,
520         Doc('install'): 10,
521         Doc('news'): 10,
522         Doc('thanks'): 10,
523         Doc('todo'): 10,
524     }
525
526     cheese_dirs = {
527         'demo': 20,
528         'doc': 25,
529         'example': 20,
530         OneOf('test', 'tests'): 25,
531     }
532
533     max_value = sum(cheese_files.values() + cheese_dirs.values())
534
535     def compute(self, files_list, dirs_list, package_dir):
536         self.value = 0
537         self.reset_rules(self.cheese_files.keys() + self.cheese_dirs.keys())
538
539         files_count = 0
540         for filename in files_list:
541             if not is_empty(os.path.join(package_dir, filename)):
542                 score = self.get_score(os.path.basename(filename), self.cheese_files)
543                 if score != 0:
544                     self.value += score
545                     files_count += 1
546
547         directories_count = 0
548         for directory in dirs_list:
549             if not is_empty(os.path.join(package_dir, directory)):
550                 score = self.get_score(os.path.basename(directory), self.cheese_dirs)
551                 if score != 0:
552                     self.value += score
553                     directories_count += 1
554
555         self.details = "%d files and %d required directories found" % \
556                        (files_count, directories_count)
557
558         return self.value
559
560     def get_score(self, name, specs):
561         for entry, value in specs.iteritems():
562             if match_filename(name, entry):
563                 self.cheesecake.log.debug("%d points entry found: %s (%s)" % \
564                                           (value, name, entry))
565                 return value
566
567         return 0
568
569     def reset_rules(self, rules):
570         if isiterable(rules):
571             for rule in rules:
572                 self.reset_rules(rule)
573         elif isinstance(rules, OneOf):
574             rules.used = False
575             self.reset_rules(rules.possibilities)
576
577 class IndexDocstrings(Index):
578     max_value = 100
579
580     def compute(self, object_cnt, docstring_cnt):
581         percent = 0
582         if object_cnt > 0:
583             percent = float(docstring_cnt)/float(object_cnt)
584
585         # Scale the result.
586         self.value = int(ceil(percent * self.max_value))
587
588         self.details = "found %d/%d=%.2f%% objects with docstrings" %\
589                  (docstring_cnt, object_cnt, percent*100)
590
591         return self.value
592
593 class IndexFormattedDocstrings(Index):
594     max_value = 50
595
596     def compute(self, object_cnt, docformat_cnt):
597         percent = 0
598         if object_cnt > 0:
599             percent = float(docformat_cnt)/float(object_cnt)
600
601         # Scale the result.
602         self.value = int(ceil(percent * self.max_value))
603
604         self.details = "found %d/%d=%.2f%% objects with formatted docstrings" %\
605                  (docformat_cnt, object_cnt, percent*100)
606
607         return self.value
608
609 class IndexDocumentation(MegaIndex):
610     name = "DOCUMENTATION"
611
612     subindices = [
613         IndexRequiredFiles(),
614         IndexDocstrings(),
615         IndexFormattedDocstrings(),
616     ]
617
618 ################################################################################
619 ## Code "kwalitee" index.
620 ################################################################################
621
622 class IndexUnitTests(Index):
623     """Compute unittest index as percentage of methods/functions
624     that are exercised in unit tests.
625     """
626     max_value = 50
627
628     def compute(self, files_list, functions, package_dir):
629         unittest_cnt = 0
630         self.functions_tested = {}
631
632         for testfile in get_files_of_type(files_list, 'test'):
633             fullpath = os.path.join(package_dir, testfile)
634             code = CodeParser(fullpath, self.cheesecake.log.debug)
635
636             func_called = code.functions_called()
637
638             for func in func_called:
639                 self.functions_tested[func] = 1
640
641         for funcname in functions:
642             if self.is_unit_tested(funcname):
643                 unittest_cnt += 1
644                 self.log.cheesecake.debug("%s is unit tested" % funcname)
645
646         percent = 0
647         if len(functions) > 0:
648             percent = float(unittest_cnt)/float(len(functions))
649
650         # Scale the result.
651         self.value = int(ceil(percent * self.max_value))
652
653         self.details = "found %d/%d=%.2f%% unit tested methods/functions." %\
654                  (unittest_cnt, len(functions), percent*100)
655
656         return self.value
657
658     def is_unit_tested(self, funcname):
659         elem = funcname.split(".")
660         n1 = elem[-1]
661         n2 = ""
662         if len(elem) > 1:
663             n2 = elem[-2] + "." + elem[-1]
664         for key in self.functions_tested.keys():
665             if key.startswith(n1) or (n2 and key.startswith(n2)):
666                 return True
667         return False
668
669 class IndexPyLint(Index):
670     """Compute pylint index as average of positive pylint scores obtained for
671     the Python files identified in the package.
672     """
673     name = "pylint"
674     max_value = 50
675
676     def compute(self, files_list, package_dir):
677         self.value = 0
678
679         # Try to run the pylint script
680         if not command_successful("pylint --version"):
681             self.details = "pylint not properly installed"
682             return self.value
683
684         pylint_value = 0
685         cnt = 0
686         for pyfile in get_files_of_type(files_list, 'module'):
687             fullpath = os.path.join(package_dir, pyfile)
688             path, filename = os.path.split(fullpath)
689             module, ext = os.path.splitext(filename)
690
691             self.cheesecake.log.debug("Running pylint on file " + fullpath)
692             rc, output = run_cmd("pylint " + fullpath)
693             if rc:
694                 self.cheesecake.log.debug("encountered an error (%d)." % rc)
695                 continue
696
697             score_line = output.split("\n")[-3]
698             s = re.search(r" (\d+\.\d+)/10", score_line)
699             # We only take positive scores into account
700             if s:
701                 score = s.group(1)
702                 if score == "0.00":
703                     self.cheesecake.log.debug("ignoring 0.00 score.")
704                     continue
705                 else:
706                     self.cheesecake.log.debug("pylint score for module %s: %s" % (module, score))
707                 pylint_value += float(score)
708                 cnt += 1
709
710         avg_score = 0
711         if cnt:
712             avg_score = float(pylint_value)/float(cnt)
713
714         self.value = int(ceil(avg_score/10.0 * self.max_value))
715         self.details = "average pylint score is %.2f out of 10" % avg_score
716
717         return self.value
718
719 class IndexCodeKwalitee(MegaIndex):
720     name = "CODE KWALITEE"
721
722     subindices = [
723         IndexPyLint(),
724         # IndexUnitTests(), TODO
725     ]
726
727 ################################################################################
728 ## Main Cheesecake class.
729 ################################################################################
730
731 class CheesecakeError(Exception):
732     """
733     Custom exception class for Cheesecake-specific errors
734     """
735     pass
736
737
738 class CheesecakeIndex(Index):
739     name = "Cheesecake"
740     subindices = [
741         IndexInstallability(),
742         IndexDocumentation(),
743         IndexCodeKwalitee(),
744     ]
745
746
747 class Cheesecake(object):
748     """
749     Computes 'goodness' of Python packages
750
751     Generates "cheesecake index" that takes into account things like:
752
753         * whether the package can be downloaded
754         * whether the package can be unpacked
755         * whether the package can be installed into an alternate directory
756         * existence of certain files such as README, INSTALL, LICENSE, setup.py etc.
757         * existence of certain directories such as doc, test, demo, examples
758         * percentage of modules/functions/classes/methods with docstrings
759         * percentage of functions/methods that are unit tested
760         * average pylint score for all non-test and non-demo modules
761     """
762     index = CheesecakeIndex()
763
764     def __init__(self, name="", url="", path="", sandbox=None, config=None,
765                 logfile=None, verbose=False, quiet=False):
766         """Initialize critical variables, download and unpack package,
767         walk package tree.
768         """
769         self.name = name
770         self.url = url
771         self.package_path = path
772
773         if not self.name and not self.url and not self.package_path:
774             self.raise_exception("No package name, URL or path specified ... exiting")
775
776         self.sandbox = sandbox or tempfile.mkdtemp(prefix='cheesecake')
777         if not os.path.isdir(self.sandbox):
778             os.mkdir(self.sandbox)
779
780         self.config = config
781         self.verbose = verbose
782         self.quiet = quiet
783
784         self.package_types = ["tar.gz", "tgz", "zip"]
785         self.sandbox_pkg_file = ""
786         self.sandbox_pkg_dir = ""
787         self.sandbox_install_dir = ""
788
789         # Include indices revelant to current situation.
790         if self.name:
791             self.index["INSTALLABILITY"].add_subindex(IndexPyPIDownload())
792         if self.url:
793             self.index["INSTALLABILITY"].add_subindex(IndexUrlDownload())
794
795         self.determine_pkg_name()
796         self.configure_logging(logfile)
797         #self.set_defaults()
798         #self.get_config()
799         self.retrieve_pkg()
800         self.unpack_pkg()
801         self.walk_pkg()
802         self.install_pkg()
803
804     def raise_exception(self, msg):
805         """Cleanup, print error message and raise CheesecakeError.
806
807         Don't use logging, since it can be called before logging has been setup.
808         """
809         self.cleanup()
810         os.unlink(os.path.join(self.sandbox, self.logfile))
811
812         msg += "\n" + pad_msg("CHEESECAKE INDEX", 0)
813         raise CheesecakeError(msg)
814  
815     def cleanup(self):
816         """Delete temporary directories and files that were created
817         in the sandbox. At the end delete the sandbox itself.
818         """
819         if os.path.isfile(self.sandbox_pkg_file):
820             self.log("Removing file %s" % self.sandbox_pkg_file)
821             os.unlink(self.sandbox_pkg_file)
822
823         def delete_dir(dirname):
824             "Delete directory recursively and generate log message."
825             if os.path.isdir(dirname):
826                 self.log("Removing directory %s" % dirname)
827                 shutil.rmtree(dirname)
828
829         delete_dir(self.sandbox)
830
831     def set_defaults(self):
832         """Set default values for variables that can also be defined
833         in the config file.
834         """
835         pass
836
837     def get_config(self, config_dir=None):
838         """Retrieve values from configuration file.
839         """
840         pass
841
842     def determine_pkg_name(self):
843         if self.name:
844             self.package = self.name
845             self.short_pkg_name = self.name
846         elif self.package_path:
847             self.package = self.get_package_from_path(self.package_path)
848         else:
849             self.package = self.get_package_from_url()
850
851     def get_package_from_url(self):
852         """Use ``urlparse`` to obtain package path from URL.
853         """
854         (scheme,location,path,param,query,fragment_id) = urlparse(self.url)
855         return self.get_package_from_path(path)
856
857     def get_package_from_path(self, path):
858         """Get package name as file portion of path.
859         """
860         dir, file = os.path.split(path)
861         self.short_pkg_name = file
862         for package_type in self.package_types:
863             s = re.search("(.+)\.%s" % package_type, file)
864             if s:
865                 self.short_pkg_name = s.group(1)
866                 break
867         return file
868
869     def configure_logging(self, logfile=None):
870         """Default settings for logging.
871
872         If verbose, log goes to console, else it goes to logfile
873         log.debug goes to logfile
874         log.info goes to console
875         log.warn and log.error go to both logfile and stdout
876         """
877         if logfile:
878             self.logfile = logfile
879         else:
880             self.logfile = os.path.join(tempfile.gettempdir(), self.short_pkg_name + ".log")
881
882         logger.setconsumer('logfile', open(str(self.logfile), 'w', buffering=1))
883         logger.setconsumer('console', logger.STDOUT)
884         logger.setconsumer('null', None)
885
886         if self.verbose:
887             self.log = logger.MultipleProducer('cheesecake console')
888         else:
889             self.log = logger.MultipleProducer('cheesecake logfile')
890         if self.quiet:
891             self.log.info = logger.MultipleProducer('cheesecake logfile')
892         else:
893             self.log.info = logger.MultipleProducer('cheesecake console')
894         self.log.debug = logger.MultipleProducer('cheesecake logfile')
895         self.log.warn = logger.MultipleProducer('cheesecake console')
896         self.log.error = logger.MultipleProducer('cheesecake console')
897
898         self.log.debug("package = ", self.short_pkg_name)
899
900     def retrieve_pkg(self):
901         if self.name:
902             self.get_pkg_from_pypi()
903         elif self.url:
904             self.download_pkg()
905         else:
906             self.copy_pkg()
907
908     def get_package_from_url(self):
909         """Use ``urlparse`` to obtain package path from URL.
910         """
911         (scheme,location,path,param,query,fragment_id) = urlparse(self.url)
912         return self.get_package_from_path(path)       
913
914     def get_package_from_path(self, path):
915         """Get package name as file portion of path.
916         """
917         dir, file = os.path.split(path)
918         self.short_pkg_name = file
919         for package_type in self.package_types:
920             s = re.search("(.+)\.%s" % package_type, file)
921             if s:
922                 self.short_pkg_name = s.group(1)
923                 break
924         return file
925
926     def get_pkg_from_pypi(self):
927         """Download package using setuptools utilities.
928
929         :Ivariables:
930           download_url : str
931               URL that package was downloaded from.
932           distance_from_pypi : int
933               How many hops setuptools had to make to download package.
934           found_on_cheeseshop : bool
935               Whenever package has been found on CheeseShop.
936         """
937         try:
938             self.log.info("Trying to download package %s from PyPI using setuptools utilities" % self.name)
939             from setuptools.package_index import PackageIndex
940             from pkg_resources import Requirement
941             from distutils import log
942
943             # Temporarily set the log verbosity to INFO so we can capture setuptools info messages
944             old_threshold = log.set_threshold(log.INFO)
945             pkgindex = PackageIndex()
946             old_stdout = sys.stdout
947             sys.stdout = StdoutRedirector()
948             output = pkgindex.fetch(Requirement.parse(self.name),
949                                     self.sandbox,
950                                     force_scan=True,
951                                     source=True)
952             captured_stdout = sys.stdout.read_buffer()
953             sys.stdout = old_stdout
954             log.set_threshold(old_threshold)
955
956             if output is None:
957                 self.raise_exception("Error: Could not find distribution for " + self.name)
958
959             # Defaults.
960             self.download_url = ""
961             self.distance_from_pypi = 0
962             self.found_on_cheeseshop = False
963
964             for line in captured_stdout.split('\n'):
965                 s = re.search(r"Reading http(.*)", line)
966                 if s:
967                     inspected_url = s.group(1)
968                     if not re.search(r"www.python.org\/pypi", inspected_url):
969                         self.distance_from_pypi += 1
970                     continue
971                 s = re.search(r"Downloading (.*)", line)
972                 if s:
973                     self.download_url = s.group(1)
974                     break
975
976             self.sandbox_pkg_file = output
977             self.package = self.get_package_from_path(output)
978             self.log.info("Downloaded package %s from %s" % (self.package, self.download_url))
979
980             if re.search(r"cheeseshop.python.org", self.download_url):
981                 self.found_on_cheeseshop = True
982
983         except ImportError, e:
984             msg = "Error: setuptools is not installed and is required for downloading a package by name\n"
985             msg += "You can donwload and process a package by its full URL via the -u or --url option\n"
986             msg += "Example: python cheesecake.py --url=http://www.mems-exchange.org/software/durus/Durus-3.1.tar.gz"
987             self.raise_exception(msg)
988
989     def download_pkg(self):
990         """Use ``urllib.urlretrieve`` to download package to file in sandbox dir.
991         """
992         #self.log("Downloading package %s from URL %s" % (self.package, self.url))
993         self.sandbox_pkg_file = os.path.join(self.sandbox, self.package)
994         try:
995             downloaded_filename, headers = urlretrieve(self.url, self.sandbox_pkg_file)
996         except IOError, e:
997             self.log.error("Error downloading package %s from URL %s"  % (self.package, self.url))
998             self.raise_exception(str(e))
999         #self.log("Downloaded package %s to %s" % (self.package, downloaded_filename))
1000
1001         if re.search("Content-Type: details/html", str(headers)):
1002             f = open(downloaded_filename)
1003             if re.search("404 Not Found", "".join(f.readlines())):
1004                 f.close()
1005                 self.raise_exception("Got '404 Not Found' error while trying to download package ... exiting")
1006             f.close()
1007
1008         self.downloaded_from_url = True
1009        
1010     def copy_pkg(self):
1011         """Copy package file to sandbox directory.
1012         """
1013         self.sandbox_pkg_file = os.path.join(self.sandbox, self.package)
1014         if not os.path.isfile(self.package_path):
1015             self.raise_exception("%s is not a valid file ... exiting" % self.package_path)
1016         self.log("Copying file %s to %s" % (self.package_path, self.sandbox_pkg_file))
1017         shutil.copyfile(self.package_path, self.sandbox_pkg_file)
1018
1019     def unpack_pkg(self):
1020         """Unpack the package in the sandbox directory.
1021        
1022         Currently supported archive types:
1023
1024         * .tar.gz (handled with ``tarfile`` module)
1025         * .zip (handled with ``zipfile`` module)
1026
1027         :Ivariables:
1028           original_package_name : str
1029         """
1030         self.package_type = ""
1031
1032         for type in self.package_types:
1033             s = re.search(r"(.+)\.%s" % type, self.package)
1034             if s:
1035                 # package_name is name of package without file extension (ex. twill-7.3)
1036                 self.package_name = s.group(1)
1037                 self.package_type = type
1038                 break
1039         if not self.package_type:
1040             msg = "Could not determine package type for package '%s'" % self.package
1041             msg += "\nCurrently recognized types: " + " ".join(self.package_types)
1042             self.raise_exception(msg)
1043         self.log.debug("Package name: " + self.package_name)
1044         self.log.debug("Package type: " + self.package_type)
1045
1046         self.sandbox_pkg_dir = os.path.join(self.sandbox, self.package_name)
1047         if os.path.isdir(self.sandbox_pkg_dir):
1048             shutil.rmtree(self.sandbox_pkg_dir)
1049
1050         if self.package_type in ["tar.gz", "tgz"]:
1051             self.untar_pkg()
1052         elif self.package_type == "zip":
1053             self.unzip_pkg()
1054
1055         if self.unpack_dir != self.package_name:
1056             self.original_package_name = self.package_name
1057             self.package_name = self.unpack_dir
1058
1059         if not self.quiet:
1060             self.log.info("Detailed info available in log file %s" % self.logfile)
1061
1062     def untar_pkg(self):
1063         """Untar the package in the sandbox directory.
1064
1065         Uses tarfile module.
1066         """
1067         try:
1068             t = tarfile.open(self.sandbox_pkg_file)
1069         except tarfile.ReadError, e:
1070             self.raise_exception("Could not read tar file %s ... exiting" % self.sandbox_pkg_file)
1071
1072         for member in t.getmembers():
1073             t.extract(member, self.sandbox)
1074
1075         tarinfo = t.members[0]
1076         self.unpack_dir = tarinfo.name.split(os.sep)[0]
1077
1078         self.unpacked = True
1079            
1080     def unzip_pkg(self):
1081         """Unzip the package in the sandbox directory.
1082
1083         Uses zipfile module.
1084         """
1085         try:
1086             z = zipfile.ZipFile(self.sandbox_pkg_file)
1087         except zipfile.error:
1088             self.raise_exception("Error unzipping file %s ... exiting" % self.sandbox_pkg_file)
1089
1090         # Get directory structure from zip and create it in sandbox
1091         for name in z.namelist():
1092             (dir, file) = os.path.split(name)
1093             unpack_dir = dir
1094             target_dir = os.path.join(self.sandbox, dir)
1095             if not os.path.exists(target_dir):
1096                 os.makedirs(target_dir)
1097
1098         # Extract files to directory structure
1099         for i, name in enumerate(z.namelist()):
1100             if not name.endswith('/'):
1101                 outfile = open(os.path.join(self.sandbox, name), 'wb')
1102                 outfile.write(z.read(name))
1103                 outfile.flush()
1104                 outfile.close()
1105
1106         self.unpack_dir = unpack_dir.split(os.sep)[0]
1107
1108         self.unpacked = True
1109
1110     def walk_pkg(self):
1111         """Get package files and directories.
1112
1113         :Ivariables:
1114           dirs_list : list
1115               List of directories package contains.
1116           docstring_cnt : int
1117               Number of docstrings found in all package objects.
1118           docformat_cnt : int
1119               Number of formatted docstrings found in all package objects.
1120           files_list : list
1121               List of files package contains.
1122           functions : list
1123               List of all functions defined in package sources.
1124           object_cnt : int
1125               Number of documentable objects found in all package modules.
1126           package_dir : str
1127               Path to project directory.
1128         """
1129         self.package_dir = os.path.join(self.sandbox, self.package_name)
1130
1131         self.files_list, self.dirs_list = get_files_dirs_list(self.package_dir)
1132
1133         self.object_cnt = 0
1134         self.docstring_cnt = 0
1135         self.docformat_cnt = 0
1136         self.functions = []
1137
1138         # Parse all application files and count objects
1139         # (modules/classes/functions) and their associated docstrings.
1140         for py_file in get_files_of_type(self.files_list, 'module'):
1141             pyfile = os.path.join(self.package_dir, py_file)
1142             code = CodeParser(pyfile, self.log.debug)
1143
1144             self.object_cnt += code.object_count()
1145             self.docstring_cnt += code.docstring_count()
1146             self.docformat_cnt += code.formatted_docstrings_count
1147             self.functions += code.functions
1148
1149         # Log a bit of debugging info.
1150         self.log.debug("Found %d files: %s." % (len(self.files_list),
1151                                                 ', '.join(self.files_list)))
1152         self.log.debug("Found %d directories: %s." % (len(self.dirs_list),
1153                                                       ', '.join(self.dirs_list)))
1154
1155     def install_pkg(self):
1156         """Verify that package can be installed in alternate directory.
1157
1158         :Ivariables:
1159           installed : bool
1160               Describes whenever package has been succefully installed.
1161         """
1162         self.sandbox_install_dir = os.path.join(self.sandbox, "tmp_install_%s" % self.package_name)
1163
1164         cwd = os.getcwd()
1165         os.chdir(os.path.join(self.sandbox, self.package_name))
1166
1167         rc, output = run_cmd("python setup.py install --root=" + self.sandbox_install_dir)
1168
1169         # Install succeeded
1170         if not rc:
1171             self.installed = True
1172
1173         os.chdir(cwd)
1174
1175     def compute_cheesecake_index(self):
1176         """Compute overall Cheesecake index for the package by adding up
1177         specific indexes.
1178         """
1179         # Recursively compute all indices.
1180         max_cheesecake_index = self.index.max_value
1181
1182         # Pass Cheesecake instance to the main Index object.
1183         cheesecake_index = self.index.compute_with(self)
1184         percentage = (cheesecake_index * 100) / max_cheesecake_index
1185
1186         self.log.info("A given package can currently reach a MAXIMUM number of %d points" % max_cheesecake_index)
1187         self.log.info("Starting computation of Cheesecake index for package '%s'" % (self.package))
1188
1189         # Print summary.
1190         print
1191         print pad_line("=")
1192         print pad_msg("OVERALL CHEESECAKE INDEX (ABSOLUTE)", cheesecake_index)
1193         print "%s (%d out of a maximum of %d points is %d%%)" % \
1194               (pad_msg("OVERALL CHEESECAKE INDEX (RELATIVE)", percentage),
1195                cheesecake_index,
1196                max_cheesecake_index,
1197                percentage)
1198
1199         return cheesecake_index
1200
1201
1202 def process_cmdline_args():
1203     """
1204     Parse command-line options
1205     """
1206     parser = OptionParser()
1207     parser.add_option("-n", "--name", dest="name",
1208                       default="", help="package name (will be retrieved via setuptools utilities, if present)")
1209     parser.add_option("-u", "--url", dest="url",
1210                       default="", help="package URL")
1211     parser.add_option("-p", "--path", dest="path",
1212                       default="", help="path of tar.gz/zip package on local file system")
1213     parser.add_option("-s", "--sandbox", dest="sandbox",
1214                       default=None,
1215                       help="directory where package will be unpacked "\
1216                            "(default is to use random directory inside %s)" % tempfile.gettempdir())
1217     parser.add_option("-c", "--config", dest="config",
1218                       default=None,
1219                       help="directory with custom configuration (default=~/.cheesecake)")
1220     parser.add_option("-l", "--logfile", dest="logfile",
1221                       default=None,
1222                       help="file to log all cheesecake messages")
1223     parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
1224                       default=False, help="verbose output (default=False)")
1225     parser.add_option("-q", "--quiet", action="store_true", dest="quiet",
1226                       default=False, help="only print Cheesecake index value (default=False)")
1227
1228     (options, args) = parser.parse_args()
1229     return options
1230
1231 def main():
1232     """
1233     Display Cheesecake index for package specified via command-line options
1234     """
1235     options = process_cmdline_args()
1236     name = options.name
1237     url = options.url
1238     path = options.path
1239     sandbox = options.sandbox
1240     config = options.config
1241     logfile = options.logfile
1242     verbose = options.verbose
1243     quiet = options.quiet
1244
1245     if not name and not url and not path:
1246         print "Error: No package name, URL or path specified (see --help)"
1247         sys.exit(1)
1248
1249     try:
1250         c = Cheesecake(name=name, url=url, path=path, sandbox=sandbox,
1251                        config=config, logfile=logfile, verbose=verbose,
1252                        quiet=quiet)
1253         c.compute_cheesecake_index()
1254         c.cleanup()
1255     except CheesecakeError, e:
1256         print str(e)
1257
1258 if __name__ == "__main__":
1259     main()
Note: See TracBrowser for help on using the browser.