1
/*
2
* Copyright 2013 The Android Open Source Project
3
*
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
7
*
8
* http://www.apache.org/licenses/LICENSE-2.0
9
*
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
15
*/
16
17
package com.example.android.basicsyncadapter.net;
18
19
import android.text.format.Time;
20
import android.util.Xml;
21
22
import org.xmlpull.v1.XmlPullParser;
23
import org.xmlpull.v1.XmlPullParserException;
24
25
import java.io.IOException;
26
import java.io.InputStream;
27
import java.text.ParseException;
28
import java.util.ArrayList;
29
import java.util.List;
30
31
/**
32
* This class parses generic Atom feeds.
33
*
34
* <p>Given an InputStream representation of a feed, it returns a List of entries,
35
* where each list element represents a single entry (post) in the XML feed.
36
*
37
* <p>An example of an Atom feed can be found at:
38
* http://en.wikipedia.org/w/index.php?title=Atom_(standard)&oldid=560239173#Example_of_an_Atom_1.0_feed
39
*/
40
public class FeedParser {
41
42
// Constants indicting XML element names that we're interested in
43
private static final int TAG_ID = 1;
44
private static final int TAG_TITLE = 2;
45
private static final int TAG_PUBLISHED = 3;
46
private static final int TAG_LINK = 4;
47
48
// We don't use XML namespaces
49
private static final String ns = null;
50
51
/** Parse an Atom feed, returning a collection of Entry objects.
52
*
53
* @param in Atom feed, as a stream.
54
* @return List of {@link com.example.android.basicsyncadapter.net.FeedParser.Entry} objects.
55
* @throws org.xmlpull.v1.XmlPullParserException on error parsing feed.
56
* @throws java.io.IOException on I/O error.
57
*/
58
public List<Entry> parse(InputStream in)
59
throws XmlPullParserException, IOException, ParseException {
60
try {
61
XmlPullParser parser = Xml.newPullParser();
62
parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, false);
63
parser.setInput(in, null);
64
parser.nextTag();
65
return readFeed(parser);
66
} finally {
67
in.close();
68
}
69
}
70
71
/**
72
* Decode a feed attached to an XmlPullParser.
73
*
74
* @param parser Incoming XMl
75
* @return List of {@link com.example.android.basicsyncadapter.net.FeedParser.Entry} objects.
76
* @throws org.xmlpull.v1.XmlPullParserException on error parsing feed.
77
* @throws java.io.IOException on I/O error.
78
*/
79
private List<Entry> readFeed(XmlPullParser parser)
80
throws XmlPullParserException, IOException, ParseException {
81
List<Entry> entries = new ArrayList<Entry>();
82
83
// Search for <feed> tags. These wrap the beginning/end of an Atom document.
84
//
85
// Example:
86
// <?xml version="1.0" encoding="utf-8"?>
87
// <feed xmlns="http://www.w3.org/2005/Atom">
88
// ...
89
// </feed>
90
parser.require(XmlPullParser.START_TAG, ns, "feed");
91
while (parser.next() != XmlPullParser.END_TAG) {
92
if (parser.getEventType() != XmlPullParser.START_TAG) {
93
continue;
94
}
95
String name = parser.getName();
96
// Starts by looking for the <entry> tag. This tag repeates inside of <feed> for each
97
// article in the feed.
98
//
99
// Example:
100
// <entry>
101
// <title>Article title</title>
102
// <link rel="alternate" type="text/html" href="http://example.com/article/1234"/>
103
// <link rel="edit" href="http://example.com/admin/article/1234"/>
104
// <id>urn:uuid:218AC159-7F68-4CC6-873F-22AE6017390D</id>
105
// <published>2003-06-27T12:00:00Z</published>
106
// <updated>2003-06-28T12:00:00Z</updated>
107
// <summary>Article summary goes here.</summary>
108
// <author>
109
// <name>Rick Deckard</name>
110
// <email>[email protected]</email>
111
// </author>
112
// </entry>
113
if (name.equals("entry")) {
114
entries.add(readEntry(parser));
115
} else {
116
skip(parser);
117
}
118
}
119
return entries;
120
}
121
122
/**
123
* Parses the contents of an entry. If it encounters a title, summary, or link tag, hands them
124
* off to their respective "read" methods for processing. Otherwise, skips the tag.
125
*/
126
private Entry readEntry(XmlPullParser parser)
127
throws XmlPullParserException, IOException, ParseException {
128
parser.require(XmlPullParser.START_TAG, ns, "entry");
129
String id = null;
130
String title = null;
131
String link = null;
132
long publishedOn = 0;
133
134
while (parser.next() != XmlPullParser.END_TAG) {
135
if (parser.getEventType() != XmlPullParser.START_TAG) {
136
continue;
137
}
138
String name = parser.getName();
139
if (name.equals("id")){
140
// Example: <id>urn:uuid:218AC159-7F68-4CC6-873F-22AE6017390D</id>
141
id = readTag(parser, TAG_ID);
142
} else if (name.equals("title")) {
143
// Example: <title>Article title</title>
144
title = readTag(parser, TAG_TITLE);
145
} else if (name.equals("link")) {
146
// Example: <link rel="alternate" type="text/html" href="http://example.com/article/1234"/>
147
//
148
// Multiple link types can be included. readAlternateLink() will only return
149
// non-null when reading an "alternate"-type link. Ignore other responses.
150
String tempLink = readTag(parser, TAG_LINK);
151
if (tempLink != null) {
152
link = tempLink;
153
}
154
} else if (name.equals("published")) {
155
// Example: <published>2003-06-27T12:00:00Z</published>
156
Time t = new Time();
157
t.parse3339(readTag(parser, TAG_PUBLISHED));
158
publishedOn = t.toMillis(false);
159
} else {
160
skip(parser);
161
}
162
}
163
return new Entry(id, title, link, publishedOn);
164
}
165
166
/**
167
* Process an incoming tag and read the selected value from it.
168
*/
169
private String readTag(XmlPullParser parser, int tagType)
170
throws IOException, XmlPullParserException {
171
String tag = null;
172
String endTag = null;
173
174
switch (tagType) {
175
case TAG_ID:
176
return readBasicTag(parser, "id");
177
case TAG_TITLE:
178
return readBasicTag(parser, "title");
179
case TAG_PUBLISHED:
180
return readBasicTag(parser, "published");
181
case TAG_LINK:
182
return readAlternateLink(parser);
183
default:
184
throw new IllegalArgumentException("Unknown tag type: " + tagType);
185
}
186
}
187
188
/**
189
* Reads the body of a basic XML tag, which is guaranteed not to contain any nested elements.
190
*
191
* <p>You probably want to call readTag().
192
*
193
* @param parser Current parser object
194
* @param tag XML element tag name to parse
195
* @return Body of the specified tag
196
* @throws java.io.IOException
197
* @throws org.xmlpull.v1.XmlPullParserException
198
*/
199
private String readBasicTag(XmlPullParser parser, String tag)
200
throws IOException, XmlPullParserException {
201
parser.require(XmlPullParser.START_TAG, ns, tag);
202
String result = readText(parser);
203
parser.require(XmlPullParser.END_TAG, ns, tag);
204
return result;
205
}
206
207
/**
208
* Processes link tags in the feed.
209
*/
210
private String readAlternateLink(XmlPullParser parser)
211
throws IOException, XmlPullParserException {
212
String link = null;
213
parser.require(XmlPullParser.START_TAG, ns, "link");
214
String tag = parser.getName();
215
String relType = parser.getAttributeValue(null, "rel");
216
if (relType.equals("alternate")) {
217
link = parser.getAttributeValue(null, "href");
218
}
219
while (true) {
220
if (parser.nextTag() == XmlPullParser.END_TAG) break;
221
// Intentionally break; consumes any remaining sub-tags.
222
}
223
return link;
224
}
225
226
/**
227
* For the tags title and summary, extracts their text values.
228
*/
229
private String readText(XmlPullParser parser) throws IOException, XmlPullParserException {
230
String result = null;
231
if (parser.next() == XmlPullParser.TEXT) {
232
result = parser.getText();
233
parser.nextTag();
234
}
235
return result;
236
}
237
238
/**
239
* Skips tags the parser isn't interested in. Uses depth to handle nested tags. i.e.,
240
* if the next tag after a START_TAG isn't a matching END_TAG, it keeps going until it
241
* finds the matching END_TAG (as indicated by the value of "depth" being 0).
242
*/
243
private void skip(XmlPullParser parser) throws XmlPullParserException, IOException {
244
if (parser.getEventType() != XmlPullParser.START_TAG) {
245
throw new IllegalStateException();
246
}
247
int depth = 1;
248
while (depth != 0) {
249
switch (parser.next()) {
250
case XmlPullParser.END_TAG:
251
depth--;
252
break;
253
case XmlPullParser.START_TAG:
254
depth++;
255
break;
256
}
257
}
258
}
259
260
/**
261
* This class represents a single entry (post) in the XML feed.
262
*
263
* <p>It includes the data members "title," "link," and "summary."
264
*/
265
public static class Entry {
266
public final String id;
267
public final String title;
268
public final String link;
269
public final long published;
270
271
Entry(String id, String title, String link, long published) {
272
this.id = id;
273
this.title = title;
274
this.link = link;
275
this.published = published;
276
}
277
}
278
}