1   /* $Id: Handler.java 4566 2006-08-31 16:51:41Z stack-sf $
2    *
3    * Created August 11th, 2006
4    *
5    * Copyright (C) 2006 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.net.md5;
24  
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.net.URL;
28  import java.net.URLConnection;
29  import java.net.URLStreamHandler;
30  
31  /***
32   * A protocol handler for an 'md5' URI scheme.
33   * Md5 URLs look like this: <code>md5:deadbeefdeadbeefdeadbeefdeadbeef</code>
34   * When this handler is invoked against an md5 URL, it passes the raw md5 to 
35   * the configured script as an argument.  The configured script then does the
36   * work to bring the item pointed to by the md5 local so we can open a Stream
37   * on the local copy.  Local file is deleted when we finish. Do
38   * {@link org.archive.net.DownloadURLConnection#getFile()} to get name of
39   * temporary file.
40   * 
41   * <p>You need to define the system property
42   * <code>-Djava.protocol.handler.pkgs=org.archive.net</code> to add this handler
43   * to the java.net.URL set. Also define system properties
44   * <code>-Dorg.archive.net.md5.Md5URLConnection.path=PATH_TO_SCRIPT</code> to
45   * pass path of script to run as well as
46   * <code>-Dorg.archive.net.md5.Md5URLConnection.options=OPTIONS</code> for
47   * any options you'd like to include.  The pointed-to PATH_TO_SCRIPT
48   * will be invoked as follows: <code>PATH_TO_SCRIPT OPTIONS MD5
49   * LOCAL_TMP_FILE</code>.  The LOCAL_TMP_FILE file is made in
50   * <code>java.io.tmpdir</code> using java tmp name code.
51   * @author stack
52   */
53  public class Handler extends URLStreamHandler {
54      protected URLConnection openConnection(URL u) {
55          return new Md5URLConnection(u);
56      }
57  
58      /***
59       * Main dumps rsync file to STDOUT.
60       * @param args
61       * @throws IOException
62       */
63      public static void main(String[] args)
64      throws IOException {  
65          if (args.length != 1) {
66              System.out.println("Usage: java java " +
67                  "-Djava.protocol.handler.pkgs=org.archive.net " +
68                  "org.archive.net.md5.Handler " +
69                  "md5:deadbeefdeadbeefdeadbeefdeadbeef");
70              System.exit(1);
71          }
72          System.setProperty("org.archive.net.md5.Md5URLConnection.path",
73              "/tmp/manifest");
74          System.setProperty("java.protocol.handler.pkgs", "org.archive.net");
75          URL u = new URL(args[0]);
76          URLConnection connect = u.openConnection();
77          // Write download to stdout.
78          final int bufferlength = 4096;
79          byte [] buffer = new byte [bufferlength];
80          InputStream is = connect.getInputStream();
81          try {
82              for (int count = is.read(buffer, 0, bufferlength);
83                      (count = is.read(buffer, 0, bufferlength)) != -1;) {
84                  System.out.write(buffer, 0, count);
85              }
86              System.out.flush();
87          } finally {
88              is.close();
89          }
90      }
91  }