1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.net;
26
27 import gnu.inet.encoding.IDNA;
28 import gnu.inet.encoding.IDNAException;
29 import it.unimi.dsi.mg4j.util.MutableString;
30
31 import java.io.UnsupportedEncodingException;
32 import java.util.Arrays;
33 import java.util.BitSet;
34 import java.util.logging.Level;
35 import java.util.logging.Logger;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
38
39 import org.apache.commons.httpclient.URI;
40 import org.apache.commons.httpclient.URIException;
41 import org.archive.util.TextUtils;
42
43
44 /***
45 * Factory that returns UURIs.
46 *
47 * Does escaping and fixup on URIs massaging in accordance with RFC2396
48 * and to match browser practice. For example, it removes any
49 * '..' if first thing in the path as per IE, converts backslashes to forward
50 * slashes, and discards any 'fragment'/anchor portion of the URI. This
51 * class will also fail URIs if they are longer than IE's allowed maximum
52 * length.
53 *
54 * <p>TODO: Test logging.
55 *
56 * @author stack
57 */
58 public class UURIFactory extends URI {
59
60 private static final long serialVersionUID = -6146295130382209042L;
61
62 /***
63 * Logging instance.
64 */
65 private static Logger logger =
66 Logger.getLogger(UURIFactory.class.getName());
67
68 /***
69 * The single instance of this factory.
70 */
71 private static final UURIFactory factory = new UURIFactory();
72
73 /***
74 * RFC 2396-inspired regex.
75 *
76 * From the RFC Appendix B:
77 * <pre>
78 * URI Generic Syntax August 1998
79 *
80 * B. Parsing a URI Reference with a Regular Expression
81 *
82 * As described in Section 4.3, the generic URI syntax is not sufficient
83 * to disambiguate the components of some forms of URI. Since the
84 * "greedy algorithm" described in that section is identical to the
85 * disambiguation method used by POSIX regular expressions, it is
86 * natural and commonplace to use a regular expression for parsing the
87 * potential four components and fragment identifier of a URI reference.
88 *
89 * The following line is the regular expression for breaking-down a URI
90 * reference into its components.
91 *
92 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
93 * 12 3 4 5 6 7 8 9
94 *
95 * The numbers in the second line above are only to assist readability;
96 * they indicate the reference points for each subexpression (i.e., each
97 * paired parenthesis). We refer to the value matched for subexpression
98 * <n> as $<n>. For example, matching the above expression to
99 *
100 * http://www.ics.uci.edu/pub/ietf/uri/#Related
101 *
102 * results in the following subexpression matches:
103 *
104 * $1 = http:
105 * $2 = http
106 * $3 = //www.ics.uci.edu
107 * $4 = www.ics.uci.edu
108 * $5 = /pub/ietf/uri/
109 * $6 = <undefined>
110 * $7 = <undefined>
111 * $8 = #Related
112 * $9 = Related
113 *
114 * where <undefined> indicates that the component is not present, as is
115 * the case for the query component in the above example. Therefore, we
116 * can determine the value of the four components and fragment as
117 *
118 * scheme = $2
119 * authority = $4
120 * path = $5
121 * query = $7
122 * fragment = $9
123 * </pre>
124 *
125 * --
126 * <p>Below differs from the rfc regex in that it has java escaping of
127 * regex characters and we allow a URI made of a fragment only (Added extra
128 * group so indexing is off by one after scheme).
129 */
130 final static Pattern RFC2396REGEX = Pattern.compile(
131 "^(([^:/?#]+):)?((//([^/?#]*))?([^?#]*)(//?([^#]*))?)?(#(.*))?");
132
133
134
135
136
137
138
139
140
141
142
143
144
145 public static final String SLASHDOTDOTSLASH = "^(///.//./)+";
146 public static final String SLASH = "/";
147 public static final String HTTP = "http";
148 public static final String HTTP_PORT = ":80";
149 public static final String HTTPS = "https";
150 public static final String HTTPS_PORT = ":443";
151 public static final String DOT = ".";
152 public static final String EMPTY_STRING = "";
153 public static final String NBSP = "\u00A0";
154 public static final String SPACE = " ";
155 public static final String ESCAPED_SPACE = "%20";
156 public static final String TRAILING_ESCAPED_SPACE = "^(.*)(%20)+$";
157 public static final String PIPE = "|";
158 public static final String PIPE_PATTERN = "//|";
159 public static final String ESCAPED_PIPE = "%7C";
160 public static final String CIRCUMFLEX = "^";
161 public static final String CIRCUMFLEX_PATTERN = "//^";
162 public static final String ESCAPED_CIRCUMFLEX = "%5E";
163 public static final String QUOT = "\"";
164 public static final String ESCAPED_QUOT = "%22";
165 public static final String SQUOT = "'";
166 public static final String ESCAPED_SQUOT = "%27";
167 public static final String APOSTROPH = "`";
168 public static final String ESCAPED_APOSTROPH = "%60";
169 public static final String LSQRBRACKET = "[";
170 public static final String LSQRBRACKET_PATTERN = "//[";
171 public static final String ESCAPED_LSQRBRACKET = "%5B";
172 public static final String RSQRBRACKET = "]";
173 public static final String RSQRBRACKET_PATTERN = "//]";
174 public static final String ESCAPED_RSQRBRACKET = "%5D";
175 public static final String LCURBRACKET = "{";
176 public static final String LCURBRACKET_PATTERN = "//{";
177 public static final String ESCAPED_LCURBRACKET = "%7B";
178 public static final String RCURBRACKET = "}";
179 public static final String RCURBRACKET_PATTERN = "//}";
180 public static final String ESCAPED_RCURBRACKET = "%7D";
181 public static final String BACKSLASH = "//";
182 public static final String BACKSLASH_PATTERN = "////";
183 public static final String ESCAPED_BACKSLASH = "%5C";
184 public static final String STRAY_SPACING = "[\n\r\t]+";
185 public static final String IMPROPERESC_REPLACE = "%25$1";
186 public static final String IMPROPERESC =
187 "%((?:[^//p{XDigit}])|(?:.[^//p{XDigit}])|(?://z))";
188 public static final String COMMERCIAL_AT = "@";
189 public static final char PERCENT_SIGN = '%';
190 public static final char COLON = ':';
191
192 /***
193 * First percent sign in string followed by two hex chars.
194 */
195 public static final String URI_HEX_ENCODING =
196 "^[^%]*%[//p{XDigit}][//p{XDigit}].*";
197
198 /***
199 * Authority port number regex.
200 */
201 final static Pattern PORTREGEX = Pattern.compile("(.*:)([0-9]+)$");
202
203 /***
204 * Characters we'll accept in the domain label part of a URI
205 * authority: ASCII letters-digits-hyphen (LDH) plus underscore,
206 * with single intervening '.' characters.
207 *
208 * (We accept '_' because DNS servers have tolerated for many
209 * years counter to spec; we also accept dash patterns and ACE
210 * prefixes that will be rejected by IDN-punycoding attempt.)
211 */
212 final static String ACCEPTABLE_ASCII_DOMAIN =
213 "^(?:[a-zA-Z0-9_-]++(?://.)?)++$";
214
215 /***
216 * Pattern that looks for case of three or more slashes after the
217 * scheme. If found, we replace them with two only as mozilla does.
218 */
219 final static Pattern HTTP_SCHEME_SLASHES =
220 Pattern.compile("^(https?://)/+(.*)");
221
222 /***
223 * Pattern that looks for case of two or more slashes in a path.
224 */
225 final static Pattern MULTIPLE_SLASHES = Pattern.compile("//+");
226
227 /***
228 * System property key for list of supported schemes.
229 */
230 private static final String SCHEMES_KEY = ".schemes";
231
232 /***
233 * System property key for list of purposefully-ignored schemes.
234 */
235 private static final String IGNORED_SCHEMES_KEY = ".ignored-schemes";
236
237 private String[] schemes = null;
238 private String[] ignoredSchemes = null;
239
240 public static final int IGNORED_SCHEME = 9999999;
241
242 /***
243 * Protected constructor.
244 */
245 private UURIFactory() {
246 super();
247 String s = System.getProperty(this.getClass().getName() + SCHEMES_KEY);
248 if (s != null && s.length() > 0) {
249 schemes = s.split("[, ]+");
250 Arrays.sort(schemes);
251 }
252 String ignored = System.getProperty(this.getClass().getName() + IGNORED_SCHEMES_KEY);
253 if (ignored != null && ignored.length() > 0) {
254 ignoredSchemes = ignored.split("[, ]+");
255 Arrays.sort(ignoredSchemes);
256 }
257 }
258
259 /***
260 * @param uri URI as string.
261 * @return An instance of UURI
262 * @throws URIException
263 */
264 public static UURI getInstance(String uri) throws URIException {
265 return UURIFactory.factory.create(uri);
266 }
267
268 /***
269 * @param uri URI as string.
270 * @param charset Character encoding of the passed uri string.
271 * @return An instance of UURI
272 * @throws URIException
273 */
274 public static UURI getInstance(String uri, String charset)
275 throws URIException {
276 return UURIFactory.factory.create(uri, charset);
277 }
278
279 /***
280 * @param base Base uri to use resolving passed relative uri.
281 * @param relative URI as string.
282 * @return An instance of UURI
283 * @throws URIException
284 */
285 public static UURI getInstance(UURI base, String relative)
286 throws URIException {
287 return UURIFactory.factory.create(base, relative);
288 }
289
290 /***
291 * Test of whether passed String has an allowed URI scheme.
292 * First tests if likely scheme suffix. If so, we then test if its one of
293 * the supported schemes.
294 * @param possibleUrl URL string to examine.
295 * @return True if passed string looks like it could be an URL.
296 */
297 public static boolean hasSupportedScheme(String possibleUrl) {
298 boolean hasScheme = UURI.hasScheme(possibleUrl);
299 if (!hasScheme || UURIFactory.factory.schemes == null) {
300 return hasScheme;
301 }
302 String tmpStr = possibleUrl.substring(0, possibleUrl.indexOf(':'));
303 return Arrays.binarySearch(UURIFactory.factory.schemes, tmpStr) >= 0;
304 }
305
306 /***
307 * @param uri URI as string.
308 * @return Instance of UURI.
309 * @throws URIException
310 */
311 private UURI create(String uri) throws URIException {
312 return create(uri, UURI.getDefaultProtocolCharset());
313 }
314
315 /***
316 * @param uri URI as string.
317 * @param charset Original encoding of the string.
318 * @return Instance of UURI.
319 * @throws URIException
320 */
321 private UURI create(String uri, String charset) throws URIException {
322 UURI uuri = new UURI(fixup(uri, null, charset), true, charset);
323 if (logger.isLoggable(Level.FINE)) {
324 logger.fine("URI " + uri +
325 " PRODUCT " + uuri.toString() +
326 " CHARSET " + charset);
327 }
328 return validityCheck(uuri);
329 }
330
331 /***
332 * @param base UURI to use as a base resolving <code>relative</code>.
333 * @param relative Relative URI.
334 * @return Instance of UURI.
335 * @throws URIException
336 */
337 private UURI create(UURI base, String relative) throws URIException {
338 UURI uuri = new UURI(base, new UURI(fixup(relative, base, base.getProtocolCharset()),
339 true, base.getProtocolCharset()));
340 if (logger.isLoggable(Level.FINE)) {
341 logger.fine(" URI " + relative +
342 " PRODUCT " + uuri.toString() +
343 " CHARSET " + base.getProtocolCharset() +
344 " BASE " + base);
345 }
346 return validityCheck(uuri);
347 }
348
349 /***
350 * Check the generated UURI.
351 *
352 * At the least look at length of uuri string. We were seeing case
353 * where before escaping, string was < MAX_URL_LENGTH but after was
354 * >. Letting out a too-big message was causing us troubles later
355 * down the processing chain.
356 * @param uuri Created uuri to check.
357 * @return The passed <code>uuri</code> so can easily inline this check.
358 * @throws URIException
359 */
360 protected UURI validityCheck(UURI uuri) throws URIException {
361 if (uuri.getRawURI().length > UURI.MAX_URL_LENGTH) {
362 throw new URIException("Created (escaped) uuri > " +
363 UURI.MAX_URL_LENGTH +": "+uuri.toString());
364 }
365 return uuri;
366 }
367
368 /***
369 * Do heritrix fix-up on passed uri string.
370 *
371 * Does heritrix escaping; usually escaping done to make our behavior align
372 * with IEs. This method codifies our experience pulling URIs from the
373 * wilds. Its does all the escaping we want; its output can always be
374 * assumed to be 'escaped' (though perhaps to a laxer standard than the
375 * vanilla HttpClient URI class or official specs might suggest).
376 *
377 * @param uri URI as string.
378 * @param base May be null.
379 * @param e True if the uri is already escaped.
380 * @return A fixed up URI string.
381 * @throws URIException
382 */
383 private String fixup(String uri, final URI base, final String charset)
384 throws URIException {
385 if (uri == null) {
386 throw new NullPointerException();
387 } else if (uri.length() == 0 && base == null) {
388 throw new URIException("URI length is zero (and not relative).");
389 }
390
391 if (uri.length() > UURI.MAX_URL_LENGTH) {
392
393 throw new URIException("URI length > " + UURI.MAX_URL_LENGTH +
394 ": " + uri);
395 }
396
397
398
399 if (uri.indexOf(NBSP) >= 0) {
400 uri = TextUtils.replaceAll(NBSP, uri, SPACE);
401 }
402
403
404 uri = uri.trim();
405
406
407
408
409
410
411 if (uri.indexOf(BACKSLASH) >= 0) {
412 uri = TextUtils.replaceAll(BACKSLASH_PATTERN, uri, SLASH);
413 }
414
415
416 uri = TextUtils.replaceAll(STRAY_SPACING, uri, EMPTY_STRING);
417
418
419
420
421 Matcher matcher = HTTP_SCHEME_SLASHES.matcher(uri);
422 if (matcher.matches()) {
423 uri = matcher.group(1) + matcher.group(2);
424 }
425
426
427 uri = escapeWhitespace(uri);
428
429
430
431 matcher = RFC2396REGEX.matcher(uri);
432 if (!matcher.matches()) {
433 throw new URIException("Failed parse of " + uri);
434 }
435 String uriScheme = checkUriElementAndLowerCase(matcher.group(2));
436 String uriSchemeSpecificPart = checkUriElement(matcher.group(3));
437 String uriAuthority = checkUriElement(matcher.group(5));
438 String uriPath = checkUriElement(matcher.group(6));
439 String uriQuery = checkUriElement(matcher.group(8));
440
441
442
443 if (uriScheme != null && uriScheme.length() > 0 &&
444 this.schemes != null) {
445 if (!(Arrays.binarySearch(schemes,uriScheme)>=0)) {
446
447 if((Arrays.binarySearch(ignoredSchemes,uriScheme)>=0)) {
448 throw new URIException(
449 IGNORED_SCHEME, "Ignored scheme: " + uriScheme);
450 } else {
451 throw new URIException("Unsupported scheme: " + uriScheme);
452 }
453 }
454 }
455
456
457 if (uriScheme == null || uriScheme.length() <= 0) {
458 if (base == null) {
459 throw new URIException("Relative URI but no base: " + uri);
460 }
461 } else {
462 checkHttpSchemeSpecificPartSlashPrefix(base, uriScheme,
463 uriSchemeSpecificPart);
464 }
465
466
467
468 uriAuthority = fixupAuthority(uriAuthority);
469
470
471 if (uriSchemeSpecificPart != null &&
472 uriSchemeSpecificPart.startsWith(SLASH)) {
473 if (uriPath != null) {
474
475 uriPath = TextUtils.replaceFirst(SLASHDOTDOTSLASH, uriPath,
476 SLASH);
477 }
478
479
480
481 if (uriPath == null || EMPTY_STRING.equals(uriPath)) {
482 uriPath = SLASH;
483 }
484 }
485
486 if (uriAuthority != null) {
487 if (uriScheme != null && uriScheme.length() > 0 &&
488 uriScheme.equals(HTTP)) {
489 uriAuthority = checkPort(uriAuthority);
490 uriAuthority = stripTail(uriAuthority, HTTP_PORT);
491 } else if (uriScheme != null && uriScheme.length() > 0 &&
492 uriScheme.equals(HTTPS)) {
493 uriAuthority = checkPort(uriAuthority);
494 uriAuthority = stripTail(uriAuthority, HTTPS_PORT);
495 }
496
497 uriAuthority = stripTail(uriAuthority, DOT);
498 uriAuthority = stripPrefix(uriAuthority, DOT);
499 } else {
500
501
502
503 if (uriScheme != null && base != null
504 && uriScheme.equals(base.getScheme())) {
505
506 uriScheme = null;
507 }
508 }
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527 uriPath = ensureMinimalEscaping(uriPath, charset);
528 uriQuery = ensureMinimalEscaping(uriQuery, charset,
529 LaxURLCodec.QUERY_SAFE);
530
531
532
533 MutableString s = new MutableString(
534 ((uriScheme != null)? uriScheme.length(): 0)
535 + 1
536 + ((uriAuthority != null)? uriAuthority.length(): 0)
537 + 2
538 + ((uriPath != null)? uriPath.length(): 0)
539 + 1
540 + ((uriQuery != null)? uriQuery.length(): 0));
541 appendNonNull(s, uriScheme, ":", true);
542 appendNonNull(s, uriAuthority, "//", false);
543 appendNonNull(s, uriPath, "", false);
544 appendNonNull(s, uriQuery, "?", false);
545 return s.toString();
546 }
547
548 /***
549 * If http(s) scheme, check scheme specific part begins '//'.
550 * @throws URIException
551 * @see http://www.faqs.org/rfcs/rfc1738.html Section 3.1. Common Internet
552 * Scheme Syntax
553 */
554 protected void checkHttpSchemeSpecificPartSlashPrefix(final URI base,
555 final String scheme, final String schemeSpecificPart)
556 throws URIException {
557 if (scheme == null || scheme.length() <= 0) {
558 return;
559 }
560 if (!scheme.equals("http") && !scheme.equals("https")) {
561 return;
562 }
563 if ( schemeSpecificPart == null
564 || !schemeSpecificPart.startsWith("//")) {
565
566 if (base == null || !scheme.equals(base.getScheme())) {
567 throw new URIException(
568 "relative URI with scheme only allowed for " +
569 "scheme matching base");
570 }
571 return;
572 }
573 if (schemeSpecificPart.length() <= 2) {
574 throw new URIException("http scheme specific part is " +
575 "too short: " + schemeSpecificPart);
576 }
577 }
578
579 /***
580 * Fixup 'authority' portion of URI, by removing any stray
581 * encoded spaces, lowercasing any domain names, and applying
582 * IDN-punycoding to Unicode domains.
583 *
584 * @param uriAuthority the authority string to fix
585 * @return fixed version
586 * @throws URIException
587 */
588 private String fixupAuthority(String uriAuthority) throws URIException {
589
590
591
592 if (uriAuthority != null) {
593
594
595
596
597 while(uriAuthority.endsWith(ESCAPED_SPACE)) {
598 uriAuthority = uriAuthority.substring(0,uriAuthority.length()-3);
599 }
600
601
602 int atIndex = uriAuthority.indexOf(COMMERCIAL_AT);
603 int portColonIndex = uriAuthority.indexOf(COLON,(atIndex<0)?0:atIndex);
604 if(atIndex<0 && portColonIndex<0) {
605
606 return fixupDomainlabel(uriAuthority);
607 } else if (atIndex<0 && portColonIndex>-1) {
608
609 String domain = fixupDomainlabel(uriAuthority.substring(0,portColonIndex));
610 String port = uriAuthority.substring(portColonIndex);
611 return domain + port;
612 } else if (atIndex>-1 && portColonIndex<0) {
613
614 String userinfo = uriAuthority.substring(0,atIndex+1);
615 String domain = fixupDomainlabel(uriAuthority.substring(atIndex+1));
616 return userinfo + domain;
617 } else {
618
619 String userinfo = uriAuthority.substring(0,atIndex+1);
620 String domain = fixupDomainlabel(uriAuthority.substring(atIndex+1,portColonIndex));
621 String port = uriAuthority.substring(portColonIndex);
622 return userinfo + domain + port;
623 }
624 }
625 return uriAuthority;
626 }
627
628 /***
629 * Fixup the domain label part of the authority.
630 *
631 * We're more lax than the spec. in that we allow underscores.
632 *
633 * @param label Domain label to fix.
634 * @return Return fixed domain label.
635 * @throws URIException
636 */
637 private String fixupDomainlabel(String label)
638 throws URIException {
639
640
641 try {
642
643
644 label = IDNA.toASCII(label);
645 } catch (IDNAException e) {
646 if(TextUtils.matches(ACCEPTABLE_ASCII_DOMAIN,label)) {
647
648
649
650 } else {
651
652
653
654
655 URIException ue = new URIException(e+" "+label);
656 ue.initCause(e);
657 throw ue;
658 }
659 }
660 label = label.toLowerCase();
661 return label;
662 }
663
664 /***
665 * Ensure that there all characters needing escaping
666 * in the passed-in String are escaped. Stray '%' characters
667 * are *not* escaped, as per browser behavior.
668 *
669 * @param u String to escape
670 * @param charset
671 * @return string with any necessary escaping applied
672 */
673 private String ensureMinimalEscaping(String u, final String charset) {
674 return ensureMinimalEscaping(u, charset, LaxURLCodec.EXPANDED_URI_SAFE);
675 }
676
677 /***
678 * Ensure that there all characters needing escaping
679 * in the passed-in String are escaped. Stray '%' characters
680 * are *not* escaped, as per browser behavior.
681 *
682 * @param u String to escape
683 * @param charset
684 * @param bitset
685 * @return string with any necessary escaping applied
686 */
687 private String ensureMinimalEscaping(String u, final String charset,
688 final BitSet bitset) {
689 if (u == null) {
690 return null;
691 }
692 for (int i = 0; i < u.length(); i++) {
693 char c = u.charAt(i);
694 if (!bitset.get(c)) {
695 try {
696 u = LaxURLCodec.DEFAULT.encode(bitset, u, charset);
697 } catch (UnsupportedEncodingException e) {
698 e.printStackTrace();
699 }
700 break;
701 }
702 }
703 return u;
704 }
705
706 /***
707 * Escape any whitespace found.
708 *
709 * The parent class takes care of the bulk of escaping. But if any
710 * instance of escaping is found in the URI, then we ask for parent
711 * to do NO escaping. Here we escape any whitespace found irrespective
712 * of whether the uri has already been escaped. We do this for
713 * case where uri has been judged already-escaped only, its been
714 * incompletly done and whitespace remains. Spaces, etc., in the URI are
715 * a real pain. Their presence will break log file and ARC parsing.
716 * @param uri URI string to check.
717 * @return uri with spaces escaped if any found.
718 */
719 protected String escapeWhitespace(String uri) {
720
721
722
723
724
725
726 MutableString buffer = null;
727 for (int i = 0; i < uri.length(); i++) {
728 char c = uri.charAt(i);
729 if (Character.isWhitespace(c)) {
730 if (buffer == null) {
731 buffer = new MutableString(uri.length() +
732 2
733 buffer.append(uri.substring(0, i));
734 }
735 buffer.append("%");
736 String hexStr = Integer.toHexString(c);
737 if ((hexStr.length() % 2) > 0) {
738 buffer.append("0");
739 }
740 buffer.append(hexStr);
741
742 } else {
743 if (buffer != null) {
744 buffer.append(c);
745 }
746 }
747 }
748 return (buffer != null)? buffer.toString(): uri;
749 }
750
751 /***
752 * Check port on passed http authority. Make sure the size is not larger
753 * than allowed: See the 'port' definition on this
754 * page, http://www.kerio.com/manual/wrp/en/418.htm.
755 * Also, we've seen port numbers of '0080' whose leading zeros confuse
756 * the parent class. Strip the leading zeros.
757 *
758 * @param uriAuthority
759 * @return Null or an amended port number.
760 * @throws URIException
761 */
762 private String checkPort(String uriAuthority)
763 throws URIException {
764 Matcher m = PORTREGEX.matcher(uriAuthority);
765 if (m.matches()) {
766 String no = m.group(2);
767 if (no != null && no.length() > 0) {
768
769
770
771
772 while (no.charAt(0) == '0' && no.length() > 1) {
773 no = no.substring(1);
774 }
775 uriAuthority = m.group(1) + no;
776
777 int portNo = Integer.parseInt(no);
778 if (portNo <= 0 || portNo > 65535) {
779 throw new URIException("Port out of bounds: " +
780 uriAuthority);
781 }
782 }
783 }
784 return uriAuthority;
785 }
786
787 /***
788 * @param b Buffer to append to.
789 * @param str String to append if not null.
790 * @param substr Suffix or prefix to use if <code>str</code> is not null.
791 * @param suffix True if <code>substr</code> is a suffix.
792 */
793 private void appendNonNull(MutableString b, String str, String substr,
794 boolean suffix) {
795 if (str != null && str.length() > 0) {
796 if (!suffix) {
797 b.append(substr);
798 }
799 b.append(str);
800 if (suffix) {
801 b.append(substr);
802 }
803 }
804 }
805
806 /***
807 * @param str String to work on.
808 * @param prefix Prefix to strip if present.
809 * @return <code>str</code> w/o <code>prefix</code>.
810 */
811 private String stripPrefix(String str, String prefix) {
812 return str.startsWith(prefix)?
813 str.substring(prefix.length(), str.length()):
814 str;
815 }
816
817 /***
818 * @param str String to work on.
819 * @param tail Tail to strip if present.
820 * @return <code>str</code> w/o <code>tail</code>.
821 */
822 private static String stripTail(String str, String tail) {
823 return str.endsWith(tail)?
824 str.substring(0, str.length() - tail.length()):
825 str;
826 }
827
828 /***
829 * @param element to examine.
830 * @return Null if passed null or an empty string otherwise
831 * <code>element</code>.
832 */
833 private String checkUriElement(String element) {
834 return (element == null || element.length() <= 0)? null: element;
835 }
836
837 /***
838 * @param element to examine and lowercase if non-null.
839 * @return Null if passed null or an empty string otherwise
840 * <code>element</code> lowercased.
841 */
842 private String checkUriElementAndLowerCase(String element) {
843 String tmp = checkUriElement(element);
844 return (tmp != null)? tmp.toLowerCase(): tmp;
845 }
846 }