root/branches/mk/support/score_pypi.py

Revision 72, 4.1 kB (checked in by mk, 7 years ago)

Added script for computing scores of all PyPI packages (closes ticket #32).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
Line 
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Compute Cheesecake scores for all packages on PyPI.
5 #
6
7 import datetime
8 import os
9 import re
10 import sys
11 import time
12 import urllib2
13
14 current_dir = os.path.dirname(__file__)
15 sys.path.insert(0, os.path.join(current_dir, '../'))
16
17 try:
18     import subprocess
19 except ImportError, ex:
20     from cheesecake import subprocess
21
22
23 CHEESECAKE_PATH = os.path.abspath(os.path.join(current_dir,
24                                                '../cheesecake_index'))
25
26 LOG_PATH = '/tmp/cheesecake_pypi_results'
27
28
29 def read_file_contents(filename):
30     fd = file(filename)
31
32     contents = fd.read()
33     fd.close()
34
35     return contents
36
37 def replace_chars(string):
38     replacements = {'%20': '_',
39                     '%27': "\\'",
40                     '%28': '\\(',
41                     '%29': '\\)',
42                     '%2A': '\\*',
43                     '%3A': ':',
44                     '%3F': '\\?',
45                     '%C3%B1': 'ñ',
46     }
47
48     for From, To in replacements.iteritems():
49         string = string.replace(From, To)
50
51     return string
52
53 def get_package_names():
54     """Get list of all packages on PyPI.
55
56     For each package return (name, version) tuple.
57     """
58     package_regex = r'<td><a href="/pypi/([^/]+)/([^/]+)">'
59
60     pypi = urllib2.urlopen("http://python.org/pypi?%3Aaction=index")
61     html_lines = pypi.readlines()
62     pypi.close()
63
64     for line in html_lines:
65         m = re.search(package_regex, line)
66         if m:
67             # To make setuptools download a package, convert all spaces to undescores.
68             yield (replace_chars(m.group(1)), replace_chars(m.group(2)))
69
70 def score_one_package(package_name, log_template):
71     """Score one package leaving information in logs along the way.
72
73     :Logs:
74       * .stdout -> Cheesecake stdout
75       * .stderr -> Cheesecake stderr
76       * .log -> Cheesecake log for given package
77     """
78     log_file = log_template % 'log'
79
80     stdout_fd = file(log_template % 'stdout', 'w')
81     stderr_fd = file(log_template % 'stderr', 'w')
82
83     process = subprocess.Popen('%s -l %s -n %s' % \
84                                (CHEESECAKE_PATH, log_file, package_name),
85                          stdout=stdout_fd,
86                          stderr=stderr_fd,
87                          shell=True)
88
89     result = process.wait()
90
91     stdout_fd.close()
92     stderr_fd.close()
93
94     if result == 0:
95         score_regex = r'OVERALL CHEESECAKE INDEX \(RELATIVE\) \.\.\.\.\s+([\d]+)'
96         stdout = read_file_contents(log_template % 'stdout')
97         m = re.search(score_regex, stdout)
98         if m:
99             return int(m.group(1))
100
101     return -1
102
103 def time2datetime(t):
104     t = time.localtime(t)
105     return datetime.datetime(t.tm_year, t.tm_mon, t.tm_mday,
106                              t.tm_hour, t.tm_min, t.tm_sec)
107
108 def time_delta(start, end):
109     return str(time2datetime(end) - time2datetime(start))
110
111 def score_all_packages():
112     packages_failed = []
113     packages_scores = []
114
115     if not os.path.exists(LOG_PATH):
116         os.mkdir(LOG_PATH)
117
118     for name, version in get_package_names():
119         name_and_version = '%s-%s' % (name, version)
120         log_template = os.path.join(LOG_PATH, name_and_version + '.%s')
121         start = time.time()
122         result = score_one_package('%s==%s' % (name, version), log_template)
123         end = time.time()
124         if result == -1:
125             packages_failed.append(name_and_version)
126         else:
127             packages_scores.append((name_and_version, result, time_delta(start, end)))
128
129     print "=== Packages that Cheesecake failed to score ==="
130     for failed in packages_failed:
131         print failed
132
133     print
134     print "=== All packages scores ==="
135     # Sorty by score.
136     packages_scores.sort(lambda x,y: cmp(x[1], y[1]))
137
138     for name, score, timing in packages_scores:
139         print "%s SCORE:%s (in %s time)" % (name, score, timing)
140
141     print
142     print "=== Summary ==="
143     print "Checked %d packages in overall." % (len(packages_scores) + len(packages_failed))
144     print "Failed for %d." % len(packages_failed)
145     print "%d packages got more than 50%% Cheesecake score." % len(filter(lambda x: x[1] > 50, packages_scores))
146
147
148 if __name__ == '__main__':
149     score_all_packages()
150
Note: See TracBrowser for help on using the browser.