1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.crawler.datamodel.credential;
24
25 import java.util.HashMap;
26 import java.util.Iterator;
27 import java.util.Map;
28 import java.util.logging.Logger;
29
30 import javax.management.Attribute;
31 import javax.management.AttributeNotFoundException;
32
33 import org.apache.commons.httpclient.HttpClient;
34 import org.apache.commons.httpclient.HttpMethod;
35 import org.apache.commons.httpclient.HttpMethodBase;
36 import org.apache.commons.httpclient.NameValuePair;
37 import org.apache.commons.httpclient.URIException;
38 import org.apache.commons.httpclient.methods.GetMethod;
39 import org.apache.commons.httpclient.methods.PostMethod;
40 import org.archive.crawler.datamodel.CrawlURI;
41 import org.archive.crawler.settings.MapType;
42 import org.archive.crawler.settings.SimpleType;
43 import org.archive.crawler.settings.Type;
44 import org.archive.net.UURI;
45 import org.archive.net.UURIFactory;
46
47
48
49 /***
50 * Credential that holds all needed to do a GET/POST to a HTML form.
51 *
52 * @author stack
53 * @version $Revision: 4668 $, $Date: 2006-09-26 21:49:01 +0000 (Tue, 26 Sep 2006) $
54 */
55 public class HtmlFormCredential extends Credential {
56
57 private static final long serialVersionUID = -4732570804435453949L;
58
59 private static final Logger logger =
60 Logger.getLogger(HtmlFormCredential.class.getName());
61
62 private static final String ATTR_LOGIN_URI = "login-uri";
63 private static final String ATTR_FORM_ITEMS = "form-items";
64 private static final String ATTR_FORM_METHOD = "http-method";
65 private static final String [] METHODS = {"POST", "GET"};
66
67 /***
68 * Constructor.
69 *
70 * A constructor that takes name of the credential is required by settings
71 * framework.
72 *
73 * @param name Name of this credential.
74 */
75 public HtmlFormCredential(final String name)
76 {
77 super(name, "Credential that has all necessary" +
78 " for running a POST/GET to an HTML login form.");
79
80 Type t = addElementToDefinition(new SimpleType("login-uri",
81 "Full URI of page that contains the HTML login form we're to" +
82 " apply these credentials too: E.g. http://www.archive.org", ""));
83 t.setOverrideable(false);
84 t.setExpertSetting(true);
85
86
87 t = addElementToDefinition(new SimpleType(ATTR_FORM_METHOD,
88 "GET or POST", METHODS[0], METHODS));
89 t.setOverrideable(false);
90 t.setExpertSetting(true);
91
92 t = addElementToDefinition(new MapType(ATTR_FORM_ITEMS, "Form items.",
93 String.class));
94 t.setOverrideable(false);
95 t.setExpertSetting(true);
96 }
97
98 /***
99 * @param context CrawlURI context to use.
100 * @return login-uri.
101 * @throws AttributeNotFoundException
102 */
103 public String getLoginUri(final CrawlURI context)
104 throws AttributeNotFoundException {
105 return (String)getAttribute(ATTR_LOGIN_URI, context);
106 }
107
108 /***
109 * @param context CrawlURI context to use.
110 * @return login-uri.
111 * @throws AttributeNotFoundException
112 */
113 public String getHttpMethod(final CrawlURI context)
114 throws AttributeNotFoundException {
115 return (String)getAttribute(ATTR_FORM_METHOD, context);
116 }
117
118 /***
119 * @param context CrawlURI context to use.
120 * @return Form inputs as convenient map. Returns null if no form items.
121 * @throws AttributeNotFoundException
122 */
123 public Map<String,Object> getFormItems(final CrawlURI context)
124 throws AttributeNotFoundException {
125 Map<String,Object> result = null;
126 MapType items = (MapType)getAttribute(ATTR_FORM_ITEMS, context);
127 if (items != null) {
128 for (Iterator i = items.iterator(context); i.hasNext();) {
129 Attribute a = (Attribute)i.next();
130 if (result == null) {
131 result = new HashMap<String,Object>();
132 }
133 result.put(a.getName(), a.getValue());
134 }
135 }
136 return result;
137 }
138
139 public boolean isPrerequisite(final CrawlURI curi) {
140 boolean result = false;
141 String curiStr = curi.getUURI().toString();
142 String loginUri = getPrerequisite(curi);
143 if (loginUri != null) {
144 try {
145 UURI uuri = UURIFactory.getInstance(curi.getUURI(), loginUri);
146 if (uuri != null && curiStr != null &&
147 uuri.toString().equals(curiStr)) {
148 result = true;
149 if (!curi.isPrerequisite()) {
150 curi.setPrerequisite(true);
151 logger.fine(curi + " is prereq.");
152 }
153 }
154 } catch (URIException e) {
155 logger.severe("Failed to uuri: " + curi + ", " +
156 e.getMessage());
157 }
158 }
159 return result;
160 }
161
162 public boolean hasPrerequisite(CrawlURI curi) {
163 return getPrerequisite(curi) != null;
164 }
165
166 public String getPrerequisite(CrawlURI curi) {
167 String loginUri = null;
168 try {
169 loginUri = getLoginUri(curi);
170 } catch (AttributeNotFoundException e) {
171 logger.severe("Failed to getLoginUri: " + this + ", " + curi + ","
172 + e.getMessage());
173
174
175
176 }
177 return loginUri;
178 }
179
180 public String getKey(CrawlURI curi) throws AttributeNotFoundException {
181 return getLoginUri(curi);
182 }
183
184 public boolean isEveryTime() {
185
186 return false;
187 }
188
189 public boolean populate(CrawlURI curi, HttpClient http, HttpMethod method,
190 String payload) {
191
192
193 boolean result = false;
194 Map formItems = null;
195 try {
196 formItems = getFormItems(curi);
197 }
198 catch (AttributeNotFoundException e1) {
199 logger.severe("Failed get of form items for " + curi);
200 }
201 if (formItems == null || formItems.size() <= 0) {
202 try {
203 logger.severe("No form items for " + method.getURI());
204 }
205 catch (URIException e) {
206 logger.severe("No form items and exception getting uri: " +
207 e.getMessage());
208 }
209 return result;
210 }
211
212 NameValuePair[] data = new NameValuePair[formItems.size()];
213 int index = 0;
214 String key = null;
215 for (Iterator i = formItems.keySet().iterator(); i.hasNext();) {
216 key = (String)i.next();
217 data[index++] = new NameValuePair(key, (String)formItems.get(key));
218 }
219 if (method instanceof PostMethod) {
220 ((PostMethod)method).setRequestBody(data);
221 result = true;
222 } else if (method instanceof GetMethod) {
223
224
225
226 HttpMethodBase hmb = (HttpMethodBase)method;
227 String currentQuery = hmb.getQueryString();
228 hmb.setQueryString(data);
229 String newQuery = hmb.getQueryString();
230 hmb.setQueryString(((currentQuery != null)? currentQuery: "") +
231 "&" + newQuery);
232 result = true;
233 } else {
234 logger.severe("Unknown method type: " + method);
235 }
236 return result;
237 }
238
239 public boolean isPost(CrawlURI curi) {
240 String method = null;
241 try {
242 method = getHttpMethod(curi);
243 }
244 catch (AttributeNotFoundException e) {
245 logger.severe("Failed to get method for " + curi + ", " + this);
246 }
247 return method != null && method.equalsIgnoreCase("POST");
248 }
249 }