1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
|
import urlparse import re
xmlhttprequest = '''XMLHttpRequest.prototype._open=XMLHttpRequest.prototype.open; XMLHttpRequest.prototype.open = function(m,u,a,us,p) { var proxyurl='%s', baseurl='%s', a=document.createElement('a'); a.href=u;u=a.href.replace(proxyurl,'');u=proxyurl+(u.indexOf('http')==0?u:baseurl+u); if(console&&console.log){console.log("XMLHTTPRequest:",u);} return this._open(m,u,a,us,p); }''' http_re = re.compile("https?://", re.I) href_re1 = re.compile("((href|src|action)\s*=\s*\"([^\"<>]+)\")", re.I) href_re2 = re.compile("((href|src|action)\s*=\s*\'([^\'<>]+)\')", re.I) href_re3 = re.compile("((href|src|action)\s*=\s*([^\'\"\s<>]+))", re.I) xmlhttprequest_re = re.compile("<script", re.I) def rewrite(proxyurl, baseurl, content): for all, href, url in href_re1.findall(content): rewrited_url = urlparse.urljoin(baseurl, url) if not rewrited_url.startswith(proxyurl) and rewrited_url.startswith("http"): content = content.replace(all, '%s="%s%s"' % (href, proxyurl, rewrited_url)) for all, href, url in href_re2.findall(content): rewrited_url = urlparse.urljoin(baseurl, url) if not rewrited_url.startswith(proxyurl) and rewrited_url.startswith("http"): content = content.replace(all, '%s=\'%s%s\'' % (href, proxyurl, rewrited_url)) for all, href, url in href_re3.findall(content): rewrited_url = urlparse.urljoin(baseurl, url) if not rewrited_url.startswith(proxyurl) and rewrited_url.startswith("http"): content = content.replace(all, '%s="%s%s"' % (href, proxyurl, rewrited_url)) content = content.replace("</title>", "</title><script>%s</script>" % xmlhttprequest % (proxyurl, urlparse.urljoin(baseurl, "/")), 1) return content
|