-
Notifications
You must be signed in to change notification settings - Fork 28
Expand file tree
/
Copy pathocr.py
More file actions
executable file
·28 lines (22 loc) · 945 Bytes
/
ocr.py
File metadata and controls
executable file
·28 lines (22 loc) · 945 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/python
# http://www.pythonchallenge.com/pc/def/ocr.html
import urllib
import re
import string
def get_challenge():
text_src=urllib.urlopen('http://www.pythonchallenge.com/pc/def/ocr.html').read()
text=re.compile('<!--((?:[^-]+|-[^-]|--[^>])*)-->',re.S).findall(text_src)[-1]
return text
text=get_challenge()
counts={}
for c in text:
counts[c]=counts.get(c,0)+1
# {'\n': 1221, '!': 6079, '#': 6115, '%': 6104, '$': 6046, '&': 6043, ')': 6186, '(': 6154, '+': 6066, '*': 6034, '@': 6157, '[': 6108, ']': 6152, '_': 6112, '^': 6030, 'a': 1, 'e': 1, 'i': 1, 'l': 1, 'q': 1, 'u': 1, 't': 1, 'y': 1, '{': 6046, '}': 6105}
# Method I
avg=len(text)/len(counts)
print ''.join([c for c in text if c!='\n' and counts[c]<avg])
# Method II: Find all characters
print ''.join(re.compile('[a-z]').findall(text))
print filter(lambda x:x in string.letters, text)
# equality
# next level: http://www.pythonchallenge.com/pc/def/equality.html