1 /* 2 * Copyright 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.example.android.basicsyncadapter.net; 18 19 import android.text.format.Time; 20 import android.util.Xml; 21 22 import org.xmlpull.v1.XmlPullParser; 23 import org.xmlpull.v1.XmlPullParserException; 24 25 import java.io.IOException; 26 import java.io.InputStream; 27 import java.text.ParseException; 28 import java.util.ArrayList; 29 import java.util.List; 30 31 /** 32 * This class parses generic Atom feeds. 33 * 34 * <p>Given an InputStream representation of a feed, it returns a List of entries, 35 * where each list element represents a single entry (post) in the XML feed. 36 * 37 * <p>An example of an Atom feed can be found at: 38 * http://en.wikipedia.org/w/index.php?title=Atom_(standard)&oldid=560239173#Example_of_an_Atom_1.0_feed 39 */ 40 public class FeedParser { 41 42 // Constants indicting XML element names that we're interested in 43 private static final int TAG_ID = 1; 44 private static final int TAG_TITLE = 2; 45 private static final int TAG_PUBLISHED = 3; 46 private static final int TAG_LINK = 4; 47 48 // We don't use XML namespaces 49 private static final String ns = null; 50 51 /** Parse an Atom feed, returning a collection of Entry objects. 52 * 53 * @param in Atom feed, as a stream. 54 * @return List of {@link com.example.android.basicsyncadapter.net.FeedParser.Entry} objects. 55 * @throws org.xmlpull.v1.XmlPullParserException on error parsing feed. 56 * @throws java.io.IOException on I/O error. 57 */ 58 public List<Entry> parse(InputStream in) 59 throws XmlPullParserException, IOException, ParseException { 60 try { 61 XmlPullParser parser = Xml.newPullParser(); 62 parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, false); 63 parser.setInput(in, null); 64 parser.nextTag(); 65 return readFeed(parser); 66 } finally { 67 in.close(); 68 } 69 } 70 71 /** 72 * Decode a feed attached to an XmlPullParser. 73 * 74 * @param parser Incoming XMl 75 * @return List of {@link com.example.android.basicsyncadapter.net.FeedParser.Entry} objects. 76 * @throws org.xmlpull.v1.XmlPullParserException on error parsing feed. 77 * @throws java.io.IOException on I/O error. 78 */ 79 private List<Entry> readFeed(XmlPullParser parser) 80 throws XmlPullParserException, IOException, ParseException { 81 List<Entry> entries = new ArrayList<Entry>(); 82 83 // Search for <feed> tags. These wrap the beginning/end of an Atom document. 84 // 85 // Example: 86 // <?xml version="1.0" encoding="utf-8"?> 87 // <feed xmlns="http://www.w3.org/2005/Atom"> 88 // ... 89 // </feed> 90 parser.require(XmlPullParser.START_TAG, ns, "feed"); 91 while (parser.next() != XmlPullParser.END_TAG) { 92 if (parser.getEventType() != XmlPullParser.START_TAG) { 93 continue; 94 } 95 String name = parser.getName(); 96 // Starts by looking for the <entry> tag. This tag repeates inside of <feed> for each 97 // article in the feed. 98 // 99 // Example: 100 // <entry> 101 // <title>Article title</title> 102 // <link rel="alternate" type="text/html" href="http://example.com/article/1234"/> 103 // <link rel="edit" href="http://example.com/admin/article/1234"/> 104 // <id>urn:uuid:218AC159-7F68-4CC6-873F-22AE6017390D</id> 105 // <published>2003-06-27T12:00:00Z</published> 106 // <updated>2003-06-28T12:00:00Z</updated> 107 // <summary>Article summary goes here.</summary> 108 // <author> 109 // <name>Rick Deckard</name> 110 // <email>[email protected]</email> 111 // </author> 112 // </entry> 113 if (name.equals("entry")) { 114 entries.add(readEntry(parser)); 115 } else { 116 skip(parser); 117 } 118 } 119 return entries; 120 } 121 122 /** 123 * Parses the contents of an entry. If it encounters a title, summary, or link tag, hands them 124 * off to their respective "read" methods for processing. Otherwise, skips the tag. 125 */ 126 private Entry readEntry(XmlPullParser parser) 127 throws XmlPullParserException, IOException, ParseException { 128 parser.require(XmlPullParser.START_TAG, ns, "entry"); 129 String id = null; 130 String title = null; 131 String link = null; 132 long publishedOn = 0; 133 134 while (parser.next() != XmlPullParser.END_TAG) { 135 if (parser.getEventType() != XmlPullParser.START_TAG) { 136 continue; 137 } 138 String name = parser.getName(); 139 if (name.equals("id")){ 140 // Example: <id>urn:uuid:218AC159-7F68-4CC6-873F-22AE6017390D</id> 141 id = readTag(parser, TAG_ID); 142 } else if (name.equals("title")) { 143 // Example: <title>Article title</title> 144 title = readTag(parser, TAG_TITLE); 145 } else if (name.equals("link")) { 146 // Example: <link rel="alternate" type="text/html" href="http://example.com/article/1234"/> 147 // 148 // Multiple link types can be included. readAlternateLink() will only return 149 // non-null when reading an "alternate"-type link. Ignore other responses. 150 String tempLink = readTag(parser, TAG_LINK); 151 if (tempLink != null) { 152 link = tempLink; 153 } 154 } else if (name.equals("published")) { 155 // Example: <published>2003-06-27T12:00:00Z</published> 156 Time t = new Time(); 157 t.parse3339(readTag(parser, TAG_PUBLISHED)); 158 publishedOn = t.toMillis(false); 159 } else { 160 skip(parser); 161 } 162 } 163 return new Entry(id, title, link, publishedOn); 164 } 165 166 /** 167 * Process an incoming tag and read the selected value from it. 168 */ 169 private String readTag(XmlPullParser parser, int tagType) 170 throws IOException, XmlPullParserException { 171 String tag = null; 172 String endTag = null; 173 174 switch (tagType) { 175 case TAG_ID: 176 return readBasicTag(parser, "id"); 177 case TAG_TITLE: 178 return readBasicTag(parser, "title"); 179 case TAG_PUBLISHED: 180 return readBasicTag(parser, "published"); 181 case TAG_LINK: 182 return readAlternateLink(parser); 183 default: 184 throw new IllegalArgumentException("Unknown tag type: " + tagType); 185 } 186 } 187 188 /** 189 * Reads the body of a basic XML tag, which is guaranteed not to contain any nested elements. 190 * 191 * <p>You probably want to call readTag(). 192 * 193 * @param parser Current parser object 194 * @param tag XML element tag name to parse 195 * @return Body of the specified tag 196 * @throws java.io.IOException 197 * @throws org.xmlpull.v1.XmlPullParserException 198 */ 199 private String readBasicTag(XmlPullParser parser, String tag) 200 throws IOException, XmlPullParserException { 201 parser.require(XmlPullParser.START_TAG, ns, tag); 202 String result = readText(parser); 203 parser.require(XmlPullParser.END_TAG, ns, tag); 204 return result; 205 } 206 207 /** 208 * Processes link tags in the feed. 209 */ 210 private String readAlternateLink(XmlPullParser parser) 211 throws IOException, XmlPullParserException { 212 String link = null; 213 parser.require(XmlPullParser.START_TAG, ns, "link"); 214 String tag = parser.getName(); 215 String relType = parser.getAttributeValue(null, "rel"); 216 if (relType.equals("alternate")) { 217 link = parser.getAttributeValue(null, "href"); 218 } 219 while (true) { 220 if (parser.nextTag() == XmlPullParser.END_TAG) break; 221 // Intentionally break; consumes any remaining sub-tags. 222 } 223 return link; 224 } 225 226 /** 227 * For the tags title and summary, extracts their text values. 228 */ 229 private String readText(XmlPullParser parser) throws IOException, XmlPullParserException { 230 String result = null; 231 if (parser.next() == XmlPullParser.TEXT) { 232 result = parser.getText(); 233 parser.nextTag(); 234 } 235 return result; 236 } 237 238 /** 239 * Skips tags the parser isn't interested in. Uses depth to handle nested tags. i.e., 240 * if the next tag after a START_TAG isn't a matching END_TAG, it keeps going until it 241 * finds the matching END_TAG (as indicated by the value of "depth" being 0). 242 */ 243 private void skip(XmlPullParser parser) throws XmlPullParserException, IOException { 244 if (parser.getEventType() != XmlPullParser.START_TAG) { 245 throw new IllegalStateException(); 246 } 247 int depth = 1; 248 while (depth != 0) { 249 switch (parser.next()) { 250 case XmlPullParser.END_TAG: 251 depth--; 252 break; 253 case XmlPullParser.START_TAG: 254 depth++; 255 break; 256 } 257 } 258 } 259 260 /** 261 * This class represents a single entry (post) in the XML feed. 262 * 263 * <p>It includes the data members "title," "link," and "summary." 264 */ 265 public static class Entry { 266 public final String id; 267 public final String title; 268 public final String link; 269 public final long published; 270 271 Entry(String id, String title, String link, long published) { 272 this.id = id; 273 this.title = title; 274 this.link = link; 275 this.published = published; 276 } 277 } 278 }