Package gluon :: Module sanitizer
[hide private]
[frames] | no frames]

Source Code for Module gluon.sanitizer

  1  # from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/496942 
  2  # Title: Cross-site scripting (XSS) defense 
  3  # Submitter: Josh Goldfoot (other recipes) 
  4  # Last Updated: 2006/08/05 
  5  # Version no: 1.0  
  6   
  7  from htmllib import HTMLParser 
  8  from cgi import escape 
  9  from urlparse import urlparse 
 10  from formatter import AbstractFormatter 
 11  from htmlentitydefs import entitydefs 
 12  from xml.sax.saxutils import quoteattr 
 13   
 14  __all__=['sanitize'] 
 15   
16 -def xssescape(text):
17 """Gets rid of < and > and & and, for good measure, :""" 18 return escape(text, quote=True).replace(':','&#58;')
19
20 -class XssCleaner(HTMLParser):
21 - def __init__(self, 22 permitted_tags = ['a','b','blockquote','br/','i', 'li', 'ol', 'ul', 'p', 'cite','code','pre','img/'], 23 allowed_attributes={'a':['href','title'],'img':['src','alt'],'blockquote':['type']}, 24 fmt = AbstractFormatter):
25 HTMLParser.__init__(self, fmt) 26 self.result="" 27 self.open_tags= [] 28 self.permitted_tags=[i for i in permitted_tags if i[-1]!='/'] 29 self.requires_no_close=[i[:-1] for i in permitted_tags if i[-1]=='/'] 30 self.permitted_tags+=self.requires_no_close 31 self.allowed_attributes = allowed_attributes 32 # The only schemes allowed in URLs (for href and src attributes). 33 # Adding "javascript" or "vbscript" to this list would not be smart. 34 self.allowed_schemes = ['http','https','ftp']
35 - def handle_data(self, data):
36 if data: self.result += xssescape(data)
37 - def handle_charref(self, ref):
38 if len(ref) < 7 and ref.isdigit(): self.result += '&#%s;' % ref 39 else: self.result += xssescape('&#%s' % ref)
40 - def handle_entityref(self, ref):
41 if ref in entitydefs: self.result += '&%s;' % ref 42 else: self.result += xssescape('&%s' % ref)
43 - def handle_comment(self, comment):
44 if comment: self.result += xssescape("<!--%s-->" % comment)
45 - def handle_starttag(self, tag, method, attrs):
46 if tag not in self.permitted_tags: 47 self.result += xssescape("<%s>" % tag) 48 else: 49 bt = "<" + tag 50 if tag in self.allowed_attributes: 51 attrs = dict(attrs) 52 self.allowed_attributes_here = \ 53 [x for x in self.allowed_attributes[tag] if x in attrs \ 54 and len(attrs[x]) > 0] 55 for attribute in self.allowed_attributes_here: 56 if attribute in ['href', 'src', 'background']: 57 if self.url_is_acceptable(attrs[attribute]): 58 bt += ' %s="%s"' % (attribute, attrs[attribute]) 59 else: 60 bt += ' %s=%s' % (xssescape(attribute), quoteattr(attrs[attribute])) 61 if bt == "<a" or bt == "<img": return 62 if tag in self.requires_no_close: bt += "/" 63 bt += ">" 64 self.result += bt 65 self.open_tags.insert(0, tag)
66 - def handle_endtag(self, tag, attrs):
67 bracketed = "</%s>" % tag 68 if tag not in self.permitted_tags: 69 self.result += xssescape(bracketed) 70 elif tag in self.open_tags: 71 self.result += bracketed 72 self.open_tags.remove(tag)
73 - def unknown_starttag(self, tag, attributes):
74 self.handle_starttag(tag, None, attributes)
75 - def unknown_endtag(self, tag):
76 self.handle_endtag(tag, None)
77 - def url_is_acceptable(self,url):
78 """ 79 Requires all URLs to be "absolute." 80 """ 81 parsed = urlparse(url) 82 return parsed[0] in self.allowed_schemes and '.' in parsed[1]
83 - def strip(self, rawstring):
84 """ 85 Returns the argument stripped of potentially harmful 86 HTML or Javascript code 87 """ 88 self.result = "" 89 self.feed(rawstring) 90 for endtag in self.open_tags: 91 if endtag not in self.requires_no_close: 92 self.result += "</%s>" % endtag 93 return self.result
94 - def xtags(self):
95 """ 96 Returns a printable string informing the user which tags are allowed 97 """ 98 self.permitted_tags.sort() 99 tg = "" 100 for x in self.permitted_tags: 101 tg += "<" + x 102 if x in self.allowed_attributes: 103 for y in self.allowed_attributes[x]: 104 tg += ' %s=""' % y 105 tg += "> " 106 return xssescape(tg.strip())
107
108 -def sanitize(text,permitted_tags = ['a','b','blockquote','br/','i', 'li', 'ol','ul', 'p', 'cite','code','pre','img/'],allowed_attributes={'a':['href','title'],'img':['src','alt'],'blockquote':['type']}):
109 return XssCleaner(permitted_tags=permitted_tagsa,allowed_attributes=allowed_attributes).strip(text)
110