View Javadoc

1   /* PersistStoreProcessor
2    * 
3    * Created on Feb 12, 2005
4    *
5    * Copyright (C) 2007 Internet Archive.
6    * 
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    * 
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   * 
14   * Heritrix is distributed in the hope that it will be useful, 
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   * 
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.processor.recrawl;
24  
25  import java.io.File;
26  
27  import org.archive.crawler.datamodel.CrawlURI;
28  import org.archive.crawler.event.CrawlStatusListener;
29  
30  import com.sleepycat.je.DatabaseException;
31  
32  /***
33   * Store CrawlURI attributes from latest fetch to persistent storage for
34   * consultation by a later recrawl. 
35   * 
36   * @author gojomo
37   * @version $Date: 2006-09-25 20:19:54 +0000 (Mon, 25 Sep 2006) $, $Revision: 4654 $
38   */
39  public class PersistStoreProcessor extends PersistOnlineProcessor 
40  implements CrawlStatusListener {
41      private static final long serialVersionUID = -8308356194337303758L;
42  
43      /***
44       * Usual constructor
45       * 
46       * @param name
47       */
48      public PersistStoreProcessor(String name) {
49          super(name, "PersistStoreProcessor. Stores CrawlURI attributes " +
50                  "from latest fetch for consultation by a later recrawl.");
51      }
52  
53      protected void initialTasks() {
54          super.initialTasks();
55          // Add this class to crawl state listeners to note checkpoints
56          getController().addCrawlStatusListener(this);
57      }
58      
59      @Override
60      protected void innerProcess(CrawlURI curi) throws InterruptedException {
61          if(shouldStore(curi)) {
62              store.put(persistKeyFor(curi),curi.getPersistentAList());
63          }
64      }
65      
66      public void crawlCheckpoint(File checkpointDir) throws Exception {
67          // sync db
68          try {
69              historyDb.sync();
70          } catch (DatabaseException e) {
71              // TODO Auto-generated catch block
72              throw new RuntimeException(e);
73          }
74      }
75  
76      public void crawlEnded(String sExitMessage) {
77          // ignored
78          
79      }
80  
81      public void crawlEnding(String sExitMessage) {
82          // ignored
83          
84      }
85  
86      public void crawlPaused(String statusMessage) {
87          // ignored
88          
89      }
90  
91      public void crawlPausing(String statusMessage) {
92          // ignored
93          
94      }
95  
96      public void crawlResuming(String statusMessage) {
97          // ignored
98          
99      }
100 
101     public void crawlStarted(String message) {
102         // ignored
103     }
104 }