1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.datamodel;
26
27 import java.io.BufferedReader;
28 import java.io.IOException;
29 import java.io.StringReader;
30 import java.util.HashMap;
31 import java.util.LinkedList;
32 import java.util.List;
33
34 import junit.framework.TestCase;
35
36 public class RobotstxtTest extends TestCase {
37 public void testParseRobots() throws IOException {
38 LinkedList<String> userAgents = new LinkedList<String>();
39 HashMap<String,List<String>> disallows
40 = new HashMap<String,List<String>>();
41 BufferedReader reader = new BufferedReader(new StringReader("BLAH"));
42 assertFalse(Robotstxt.parse(reader, userAgents, disallows));
43 assertTrue(disallows.size() == 0);
44
45 String agent = "archive.org_bot";
46 reader = new BufferedReader(
47 new StringReader("User-agent: " + agent + "\n" +
48 "Disallow: /cgi-bin/\n" +
49 "Disallow: /details/software\n"));
50 assertFalse(Robotstxt.parse(reader, userAgents, disallows));
51 assertTrue(disallows.size() == 1);
52 assertTrue(userAgents.size() == 1);
53 assertEquals(userAgents.get(0), agent);
54
55 agent = "*";
56 reader = new BufferedReader(
57 new StringReader("User-agent: " + agent + "\n" +
58 "Disallow: /cgi-bin/\n" +
59 "Disallow: /details/software\n"));
60 disallows = new HashMap<String,List<String>>();
61 userAgents = new LinkedList<String>();
62 assertFalse(Robotstxt.parse(reader, userAgents, disallows));
63 assertTrue(disallows.size() == 1);
64 assertTrue(userAgents.size() == 1);
65 assertEquals(userAgents.get(0), "");
66 }
67 }