| 1 |
#!/usr/bin/env python |
|---|
| 2 |
# -*- coding: utf-8 -*- |
|---|
| 3 |
# |
|---|
| 4 |
# Compute Cheesecake scores for all packages on PyPI. |
|---|
| 5 |
# |
|---|
| 6 |
|
|---|
| 7 |
import datetime |
|---|
| 8 |
import os |
|---|
| 9 |
import re |
|---|
| 10 |
import sys |
|---|
| 11 |
import time |
|---|
| 12 |
import urllib2 |
|---|
| 13 |
|
|---|
| 14 |
current_dir = os.path.dirname(__file__) |
|---|
| 15 |
sys.path.insert(0, os.path.join(current_dir, '../')) |
|---|
| 16 |
|
|---|
| 17 |
try: |
|---|
| 18 |
import subprocess |
|---|
| 19 |
except ImportError, ex: |
|---|
| 20 |
from cheesecake import subprocess |
|---|
| 21 |
|
|---|
| 22 |
|
|---|
| 23 |
CHEESECAKE_PATH = os.path.abspath(os.path.join(current_dir, |
|---|
| 24 |
'../cheesecake_index')) |
|---|
| 25 |
|
|---|
| 26 |
LOG_PATH = '/tmp/cheesecake_pypi_results' |
|---|
| 27 |
|
|---|
| 28 |
|
|---|
| 29 |
def read_file_contents(filename): |
|---|
| 30 |
fd = file(filename) |
|---|
| 31 |
|
|---|
| 32 |
contents = fd.read() |
|---|
| 33 |
fd.close() |
|---|
| 34 |
|
|---|
| 35 |
return contents |
|---|
| 36 |
|
|---|
| 37 |
def replace_chars(string): |
|---|
| 38 |
replacements = {'%20': '_', |
|---|
| 39 |
'%27': "\\'", |
|---|
| 40 |
'%28': '\\(', |
|---|
| 41 |
'%29': '\\)', |
|---|
| 42 |
'%2A': '\\*', |
|---|
| 43 |
'%3A': ':', |
|---|
| 44 |
'%3F': '\\?', |
|---|
| 45 |
'%C3%B1': 'ñ', |
|---|
| 46 |
} |
|---|
| 47 |
|
|---|
| 48 |
for From, To in replacements.iteritems(): |
|---|
| 49 |
string = string.replace(From, To) |
|---|
| 50 |
|
|---|
| 51 |
return string |
|---|
| 52 |
|
|---|
| 53 |
def get_package_names(): |
|---|
| 54 |
"""Get list of all packages on PyPI. |
|---|
| 55 |
|
|---|
| 56 |
For each package return (name, version) tuple. |
|---|
| 57 |
""" |
|---|
| 58 |
package_regex = r'<td><a href="/pypi/([^/]+)/([^/]+)">' |
|---|
| 59 |
|
|---|
| 60 |
pypi = urllib2.urlopen("http://python.org/pypi?%3Aaction=index") |
|---|
| 61 |
html_lines = pypi.readlines() |
|---|
| 62 |
pypi.close() |
|---|
| 63 |
|
|---|
| 64 |
for line in html_lines: |
|---|
| 65 |
m = re.search(package_regex, line) |
|---|
| 66 |
if m: |
|---|
| 67 |
# To make setuptools download a package, convert all spaces to undescores. |
|---|
| 68 |
yield (replace_chars(m.group(1)), replace_chars(m.group(2))) |
|---|
| 69 |
|
|---|
| 70 |
def score_one_package(package_name, log_template): |
|---|
| 71 |
"""Score one package leaving information in logs along the way. |
|---|
| 72 |
|
|---|
| 73 |
:Logs: |
|---|
| 74 |
* .stdout -> Cheesecake stdout |
|---|
| 75 |
* .stderr -> Cheesecake stderr |
|---|
| 76 |
* .log -> Cheesecake log for given package |
|---|
| 77 |
""" |
|---|
| 78 |
log_file = log_template % 'log' |
|---|
| 79 |
|
|---|
| 80 |
stdout_fd = file(log_template % 'stdout', 'w') |
|---|
| 81 |
stderr_fd = file(log_template % 'stderr', 'w') |
|---|
| 82 |
|
|---|
| 83 |
process = subprocess.Popen('%s -l %s -n %s' % \ |
|---|
| 84 |
(CHEESECAKE_PATH, log_file, package_name), |
|---|
| 85 |
stdout=stdout_fd, |
|---|
| 86 |
stderr=stderr_fd, |
|---|
| 87 |
shell=True) |
|---|
| 88 |
|
|---|
| 89 |
result = process.wait() |
|---|
| 90 |
|
|---|
| 91 |
stdout_fd.close() |
|---|
| 92 |
stderr_fd.close() |
|---|
| 93 |
|
|---|
| 94 |
if result == 0: |
|---|
| 95 |
score_regex = r'OVERALL CHEESECAKE INDEX \(RELATIVE\) \.\.\.\.\s+([\d]+)' |
|---|
| 96 |
stdout = read_file_contents(log_template % 'stdout') |
|---|
| 97 |
m = re.search(score_regex, stdout) |
|---|
| 98 |
if m: |
|---|
| 99 |
return int(m.group(1)) |
|---|
| 100 |
|
|---|
| 101 |
return -1 |
|---|
| 102 |
|
|---|
| 103 |
def time2datetime(t): |
|---|
| 104 |
t = time.localtime(t) |
|---|
| 105 |
return datetime.datetime(t.tm_year, t.tm_mon, t.tm_mday, |
|---|
| 106 |
t.tm_hour, t.tm_min, t.tm_sec) |
|---|
| 107 |
|
|---|
| 108 |
def time_delta(start, end): |
|---|
| 109 |
return str(time2datetime(end) - time2datetime(start)) |
|---|
| 110 |
|
|---|
| 111 |
def score_all_packages(): |
|---|
| 112 |
packages_failed = [] |
|---|
| 113 |
packages_scores = [] |
|---|
| 114 |
|
|---|
| 115 |
if not os.path.exists(LOG_PATH): |
|---|
| 116 |
os.mkdir(LOG_PATH) |
|---|
| 117 |
|
|---|
| 118 |
for name, version in get_package_names(): |
|---|
| 119 |
name_and_version = '%s-%s' % (name, version) |
|---|
| 120 |
log_template = os.path.join(LOG_PATH, name_and_version + '.%s') |
|---|
| 121 |
start = time.time() |
|---|
| 122 |
result = score_one_package('%s==%s' % (name, version), log_template) |
|---|
| 123 |
end = time.time() |
|---|
| 124 |
if result == -1: |
|---|
| 125 |
packages_failed.append(name_and_version) |
|---|
| 126 |
else: |
|---|
| 127 |
packages_scores.append((name_and_version, result, time_delta(start, end))) |
|---|
| 128 |
|
|---|
| 129 |
print "=== Packages that Cheesecake failed to score ===" |
|---|
| 130 |
for failed in packages_failed: |
|---|
| 131 |
print failed |
|---|
| 132 |
|
|---|
| 133 |
print |
|---|
| 134 |
print "=== All packages scores ===" |
|---|
| 135 |
# Sorty by score. |
|---|
| 136 |
packages_scores.sort(lambda x,y: cmp(x[1], y[1])) |
|---|
| 137 |
|
|---|
| 138 |
for name, score, timing in packages_scores: |
|---|
| 139 |
print "%s SCORE:%s (in %s time)" % (name, score, timing) |
|---|
| 140 |
|
|---|
| 141 |
print |
|---|
| 142 |
print "=== Summary ===" |
|---|
| 143 |
print "Checked %d packages in overall." % (len(packages_scores) + len(packages_failed)) |
|---|
| 144 |
print "Failed for %d." % len(packages_failed) |
|---|
| 145 |
print "%d packages got more than 50%% Cheesecake score." % len(filter(lambda x: x[1] > 50, packages_scores)) |
|---|
| 146 |
|
|---|
| 147 |
|
|---|
| 148 |
if __name__ == '__main__': |
|---|
| 149 |
score_all_packages() |
|---|
| 150 |
|
|---|