1 """
2 This file is part of web2py Web Framework (Copyrighted, 2007)
3 Developed by Massimo Di Pierro <mdipierro@cs.depaul.edu>
4 License: GPL v2
5 """
6
7 import re
8 import cgi
9
10 __all__=['highlight']
11
13 """
14 Do syntax highlighting.
15 """
16
17 - def __init__( self, mode, link=None, styles={}):
18 """
19 Initialise highlighter: mode = language (PYTHON, WEB2PY,C, CPP, HTML, HTML_PLAIN)
20 """
21 mode=mode.upper()
22 self.link=link
23 self.styles=styles
24 self.output = []
25 self.span_style = None
26 if mode=='WEB2PY': mode,self.suppress_tokens='PYTHON',[]
27 elif mode=='PYTHON': self.suppress_tokens = ['GOTOHTML']
28 elif mode=='CPP': mode, self.suppress_tokens = 'C', []
29 elif mode=='C': self.suppress_tokens = ['CPPKEYWORD']
30 elif mode=='HTML_PLAIN':mode,self.suppress_tokens='HTML',['GOTOPYTHON']
31 elif mode=='HTML':self.suppress_tokens=[]
32 else: raise SyntaxError
33 self.mode = mode
34
36 """
37 Callback for C specific highlighting.
38 """
39 value = cgi.escape( match.group())
40 self.change_style(token,style)
41 self.output.append( value)
42
44 """
45 Callback for python specific highlighting.
46 """
47 value = cgi.escape( match.group())
48 if token == 'MULTILINESTRING':
49 self.change_style(token,style)
50 self.output.append( value)
51 self.strMultilineString = match.group(1)
52 return 'PYTHONMultilineString'
53 elif token == 'ENDMULTILINESTRING':
54 if match.group(1) == self.strMultilineString:
55 self.output.append( value)
56 self.strMultilineString = ''
57 return 'PYTHON'
58 if style and style[:5]=='link:':
59 self.change_style(None,None)
60 url,style=style[5:].split(';',1)
61 self.output.append('<a href="%s%s" style="%s">%s</a>' % (url,value,style,value))
62 else:
63 self.change_style(token,style)
64 self.output.append( value)
65 if token == 'GOTOHTML': return 'HTML'
66 return None
67
69 """
70 Callback for HTML specific highlighting.
71 """
72 value = cgi.escape( match.group())
73 self.change_style(token,style)
74 self.output.append( value)
75 if token == 'GOTOPYTHON': return 'PYTHON'
76 return None
77
78 all_styles = {
79 'C': ( c_tokenizer,
80 (
81 ('COMMENT', re.compile( r'//.*\r?\n'), 'color: green; font-style: italic'),
82 ('MULTILINECOMMENT', re.compile( r'/\*.*?\*/', re.DOTALL), 'color: green; font-style: italic'),
83 ('PREPROCESSOR', re.compile( r'\s*#.*?[^\\]\s*\n', re.DOTALL), 'color: magenta; font-style: italic'),
84 ('PUNC', re.compile( r'[-+*!&|^~/%\=<>\[\]{}(),.:]'), 'font-weight: bold'),
85 ('NUMBER', re.compile( r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'),
86 'color: red'),
87 ('KEYWORD', re.compile( r'(sizeof|int|long|short|char|void|' +
88 r'signed|unsigned|float|double|' +
89 r'goto|break|return|continue|asm|' +
90 r'case|default|if|else|switch|while|for|do|' +
91 r'struct|union|enum|typedef|' +
92 r'static|register|auto|volatile|extern|const)(?![a-zA-Z0-9_])'), 'color:#185369; font-weight: bold'),
93 ( 'CPPKEYWORD', re.compile( r'(class|private|protected|public|template|new|delete|' +
94 r'this|friend|using|inline|export|bool|throw|try|catch|' +
95 r'operator|typeid|virtual)(?![a-zA-Z0-9_])'), 'color: blue; font-weight: bold'),
96 ('STRING', re.compile( r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'), 'color: #FF9966'),
97 ('IDENTIFIER', re.compile( r'[a-zA-Z_][a-zA-Z0-9_]*'), None),
98 ('WHITESPACE', re.compile( r'[ \r\n]+'), 'Keep'),
99 )),
100
101 'PYTHON': ( python_tokenizer,
102 (
103 ('GOTOHTML', re.compile( r'\}\}'), 'color: red'),
104 ('PUNC', re.compile( r'[-+*!|&^~/%\=<>\[\]{}(),.:]'), 'font-weight: bold'),
105 ('NUMBER', re.compile( r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'),
106 'color: red'),
107 ('KEYWORD', re.compile( r'(def|class|break|continue|del|exec|finally|pass|' +
108 r'print|raise|return|try|except|global|assert|lambda|' +
109 r'yield|for|while|if|elif|else|and|in|is|not|or|import|' +
110 r'from|True|False)(?![a-zA-Z0-9_])'), 'color:#185369; font-weight: bold'),
111 ('WEB2PY', re.compile( r'(request|response|session|cache|redirect|HTTP|TR|XML|URL|BEAUTIFY|A|BODY|BR|B|CENTER|CODE|DIV|EM|EMBED|FIELDSET|FORM|H1|H2|H3|H4|H5|H6|IFRAME|HEAD|HR|HTML|IMG|INPUT|LABEL|LI|LINK|META|OBJECT|OL|ON|OPTION|P|PRE|SCRIPT|SELECT|SPAN|STYLE|TABLE|TD|TEXTAREA|TH|TITLE|TT|T|UL|IS_ALPHANUMERIC|IS_DATETIME|IS_DATE|IS_EMAIL|IS_EXPR|IS_FLOAT_IN_RANGE|IS_INT_IN_RANGE|IS_IN_SET|IS_LENGTH|IS_MATCH|IS_NULL_OR|IS_NOT_EMPTY|IS_TIME|IS_URL|CLEANUP|CRYPT|IS_IN_DB|IS_NOT_IN_DB|SQLDB|SQLField|SQLFORM|SQLTABLE)(?![a-zA-Z0-9_])'), 'link:%(link)s;text-decoration:None;color:#FF5C1F;'),
112 ('MAGIC', re.compile( r'self|None'), 'color:#185369; font-weight: bold'),
113 ('MULTILINESTRING', re.compile( r'r?u?(\'\'\'|""")'), 'color: #FF9966'),
114 ('STRING', re.compile( r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'), 'color: #FF9966'),
115 ('IDENTIFIER', re.compile( r'[a-zA-Z_][a-zA-Z0-9_]*'), None),
116 ('COMMENT', re.compile( r'\#.*\r?\n'), 'color: green; font-style: italic'),
117 ('WHITESPACE', re.compile( r'[ \r\n]+'), 'Keep'),
118 )),
119
120 'PYTHONMultilineString': ( python_tokenizer,
121 (
122 ('ENDMULTILINESTRING', re.compile( r'.*?("""|\'\'\')', re.DOTALL), 'color: darkred'),
123 )),
124
125 'HTML': ( html_tokenizer,
126
127 (
128 ('GOTOPYTHON', re.compile( r'\{\{'), 'color: red'),
129 ('COMMENT', re.compile( r'<!--[^>]*-->|<!>'), 'color: green; font-style: italic'),
130 ('XMLCRAP', re.compile( r'<![^>]*>'), 'color: blue; font-style: italic'),
131 ('SCRIPT', re.compile( r'<script .*?</script>', re.IGNORECASE + re.DOTALL), 'color: black'),
132 ('TAG', re.compile( r'</?\s*[a-zA-Z0-9]+'), 'color: darkred; font-weight: bold'),
133 ('ENDTAG', re.compile( r'/?>'), 'color: darkred; font-weight: bold'),
134 )),
135 }
136
138 """
139 Syntax highlight some python code.
140 Returns html version of code.
141 """
142 i = 0
143 mode = self.mode
144 while i < len(data):
145 for token, o_re, style in Highlighter.all_styles[mode][1]:
146 if not token in self.suppress_tokens:
147 match = o_re.match( data, i)
148 if match:
149 if style: new_mode=Highlighter.all_styles[mode][0](self,token,match,style%dict(link=self.link))
150 else: new_mode = Highlighter.all_styles[mode][0]( self, token, match, style)
151 if new_mode != None: mode = new_mode
152 i += max(1,len( match.group()))
153 break
154 else:
155 self.change_style(None,None)
156 self.output.append( data[i])
157 i += 1
158 self.change_style(None,None)
159 return "".join( self.output).expandtabs(4)
160
162 """
163 Generate output to change from existing style to another style only.
164 """
165 if self.styles.has_key(token): style=self.styles[token]
166 if self.span_style != style:
167 if style != 'Keep':
168 if self.span_style != None: self.output.append( '</span>')
169 if style != None: self.output.append( '<span style="%s">' % style)
170 self.span_style = style
171
172 -def highlight(code,language,link='/exmaples/globals/vars/',counter=1,styles={},attributes={}):
173 if not styles.has_key('CODE'): code_style="""
174 font-size: 11px;
175 font-family: Bitstream Vera Sans Mono,monospace;
176 background-color: transparent;
177 margin: 0;
178 padding: 5px;
179 border: none;
180 overflow: auto;
181 """
182 else: code_style=styles['CODE']
183 if not styles.has_key('LINENUMBERS'): linenumbers_style="""
184 font-size: 11px;
185 font-family: Bitstream Vera Sans Mono,monospace;
186 background-color: transparent;
187 margin: 0;
188 padding: 5px;
189 border: none;
190 background-color: #E0E0E0;
191 color: #A0A0A0;
192 """
193 else: linenumbers_style=styles['LINENUMBES']
194 if language and language.upper() in ['PYTHON','C','CPP','HTML','WEB2PY']:
195 code=Highlighter(language,link,styles).highlight(code)
196 else:
197 code=cgi.escape(code)
198 lines=code.split('\n')
199 if counter is None: numbers='<br/>'*len(lines)
200 elif isinstance(counter,str): numbers=cgi.escape(counter)+'<br/>'*len(lines)
201 else: numbers='<br/>'.join([str(i+counter)+'.' for i in xrange(len(lines))])
202 code='<br/>'.join(lines)
203 items=attributes.items()
204 fa=' '.join([key[1:].lower() for key,value in items if key[:1]=='_' and value==None]+['%s="%s"' % (key[1:].lower(),str(value).replace('"',"'")) for key,value in attributes.items() if key[:1]=='_' and value])
205 if fa: fa=' '+fa
206 return '<table%s><tr valign="top"><td style="width:40px; text-align: right;"><pre style="%s">%s</pre></td><td><pre style="%s">%s</pre></td></tr></table>' % (fa,linenumbers_style,numbers,code_style,code)
207
208 if __name__=='__main__':
209 import sys
210 data = open(sys.argv[1]).read()
211 print '<html><body>'+highlight(data,sys.argv[2])+'</body></html>'
212