1 #!/usr/bin/python2.4
2 #
3 # CDDL HEADER START
4 #
5 # The contents of this file are subject to the terms of the
6 # Common Development and Distribution License (the "License").
7 # You may not use this file except in compliance with the License.
8 #
9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 # or http://www.opensolaris.org/os/licensing.
11 # See the License for the specific language governing permissions
12 # and limitations under the License.
13 #
14 # When distributing Covered Code, include this CDDL HEADER in each
15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 # If applicable, add the following below this CDDL HEADER, with the
17 # fields enclosed by brackets "[]" replaced with your own identifying
18 # information: Portions Copyright [yyyy] [name of copyright owner]
19 #
20 # CDDL HEADER END
21 #
22 # Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 # Use is subject to license terms.
24
25 """feed - routines for generating RFC 4287 Atom feeds for packaging server
26
27 At present, the pkg.server.feed module provides a set of routines that, from
28 a catalog, allow the construction of a feed representing the activity within
29 a given time period."""
30
31 import cherrypy
32 from cherrypy.lib.static import serve_file
33 import cStringIO
34 import datetime
35 import httplib
36 import os
37 import rfc822
38 import time
39 import urllib
40 import xml.dom.minidom as xmini
41
42 from pkg.misc import get_rel_path, get_res_path
43 import pkg.catalog as catalog
44 import pkg.fmri as fmri
45 import pkg.Uuid25 as uuid
46
47 MIME_TYPE = 'application/atom+xml'
48 CACHE_FILENAME = "feed.xml"
49 RFC3339_FMT = "%Y-%m-%dT%H:%M:%SZ"
50
51 def dt_to_rfc3339_str(ts):
52 """Returns a string representing a datetime object formatted according
53 to RFC 3339.
54 """
55 return ts.strftime(RFC3339_FMT)
56
57 def rfc3339_str_to_ts(ts_str):
58 """Returns a timestamp representing 'ts_str', which should be in the
59 format specified by RFC 3339.
60 """
61 return time.mktime(time.strptime(ts_str, RFC3339_FMT))
62
63 def rfc3339_str_to_dt(ts_str):
64 """Returns a datetime object representing 'ts_str', which should be in
65 the format specified by RFC 3339.
66 """
67 return datetime.datetime(*time.strptime(ts_str, RFC3339_FMT)[0:6])
68
69 def ults_to_ts(ts_str):
70 """Returns a timestamp representing 'ts_str', which should be in
71 updatelog format.
72 """
73 # Python doesn't support fractional seconds for strptime.
74 ts_str = ts_str.split('.')[0]
75 # Currently, updatelog entries are in local time, not UTC.
76 return time.mktime(time.strptime(ts_str, "%Y-%m-%dT%H:%M:%S"))
77
78 def ults_to_rfc3339_str(ts_str):
79 """Returns a timestamp representing 'ts_str', which should be in
80 updatelog format.
81 """
82 ltime = ults_to_ts(ts_str)
83 # Currently, updatelog entries are in local time, not UTC.
84 return dt_to_rfc3339_str(datetime.datetime(
85 *time.gmtime(ltime)[0:6]))
86
87 def fmri_to_taguri(rcfg, f):
88 """Generates a 'tag' uri compliant with RFC 4151. Visit
89 http://www.taguri.org/ for more information.
90 """
91 return "tag:%s,%s:%s" % (rcfg.get_attribute("feed",
92 "authority"), f.get_timestamp().strftime("%Y-%m-%d"),
93 urllib.unquote(f.get_url_path()))
94
95 def init(scfg, rcfg):
96 """This function performs general initialization work that is needed
97 for feeds to work correctly.
98 """
99
100 if not scfg.is_read_only():
101 # RSS/Atom feeds require a unique identifier, so
102 # generate one if isn't defined already. This
103 # needs to be a persistent value, so we only
104 # generate this if we can save the configuration.
105 fid = rcfg.get_attribute("feed", "id")
106 if not fid:
107 # Create a random UUID (type 4).
108 rcfg._set_attribute("feed", "id", uuid.uuid4())
109
110 # Ensure any configuration changes are reflected in the feed.
111 __clear_cache(scfg)
112
113 def set_title(request, rcfg, doc, feed, update_ts):
114 """This function attaches the necessary RSS/Atom feed elements needed
115 to provide title, author and contact information to the provided
116 xmini document object using the provided feed object and update
117 time.
118 """
119
120 t = doc.createElement("title")
121 ti = xmini.Text()
122 ti.replaceWholeText(rcfg.get_attribute("feed", "name"))
123 t.appendChild(ti)
124 feed.appendChild(t)
125
126 l = doc.createElement("link")
127 l.setAttribute("href", cherrypy.url())
128 l.setAttribute("rel", "self")
129 feed.appendChild(l)
130
131 # Atom requires each feed to have a permanent, universally unique
132 # identifier.
133 i = doc.createElement("id")
134 it = xmini.Text()
135 it.replaceWholeText("urn:uuid:%s" % rcfg.get_attribute("feed", "id"))
136 i.appendChild(it)
137 feed.appendChild(i)
138
139 # Indicate when the feed was last updated.
140 u = doc.createElement("updated")
141 ut = xmini.Text()
142 ut.replaceWholeText(dt_to_rfc3339_str(update_ts))
143 u.appendChild(ut)
144 feed.appendChild(u)
145
146 # Add our icon.
147 i = doc.createElement("icon")
148 it = xmini.Text()
149 it.replaceWholeText(get_res_path(request, rcfg.get_attribute(
150 "feed", "icon")))
151 i.appendChild(it)
152 feed.appendChild(i)
153
154 # Add our logo.
155 l = doc.createElement("logo")
156 lt = xmini.Text()
157 lt.replaceWholeText(get_res_path(request, rcfg.get_attribute(
158 "feed", "logo")))
159 l.appendChild(lt)
160 feed.appendChild(l)
161
162 maintainer = rcfg.get_attribute("repository", "maintainer")
163 # The author information isn't required, but can be useful.
164 if maintainer:
165 name, email = rfc822.AddressList(maintainer).addresslist[0]
166
167 if email and not name:
168 # If we got an email address, but no name, then
169 # the name was likely parsed as a local address. In
170 # that case, assume the whole string is the name.
171 name = maintainer
172 email = None
173
174 a = doc.createElement("author")
175
176 # First we have to add a name element. This is required if an
177 # author element exists.
178 n = doc.createElement("name")
179 nt = xmini.Text()
180 nt.replaceWholeText(name)
181 n.appendChild(nt)
182 a.appendChild(n)
183
184 if email:
185 # If we were able to extract an email address from the
186 # maintainer information, add the optional email
187 # element to provide a point of communication.
188 e = doc.createElement("email")
189 et = xmini.Text()
190 et.replaceWholeText(email)
191 e.appendChild(et)
192 a.appendChild(e)
193
194 # Done with the author.
195 feed.appendChild(a)
196
197 operations = {
198 "+": ["Added", "%s was added to the repository."],
199 "-": ["Removed", "%s was removed from the repository."],
200 "U": ["Updated", "%s, an update to an existing package, was added to "
201 "the repository."]
202 }
203
204 def add_transaction(request, scfg, rcfg, doc, feed, txn):
205 """Each transaction is an entry. We have non-trivial content, so we
206 can omit summary elements.
207 """
208
209 e = doc.createElement("entry")
210
211 tag, fmri_str = txn["catalog"].split()
212 f = fmri.PkgFmri(fmri_str)
213
214 # Generate a 'tag' uri, to uniquely identify the entry, using the fmri.
215 i = xmini.Text()
216 i.replaceWholeText(fmri_to_taguri(rcfg, f))
217 eid = doc.createElement("id")
218 eid.appendChild(i)
219 e.appendChild(eid)
220
221 # Attempt to determine the operation that was performed and generate
222 # the entry title and content.
223 if txn["operation"] in operations:
224 op_title, op_content = operations[txn["operation"]]
225 else:
226 # XXX Better way to reflect an error? (Aborting will make a
227 # non-well-formed document.)
228 op_title = "Unknown Operation"
229 op_content = "%s was changed in the repository."
230
231 if txn["operation"] == "+":
232 c = scfg.updatelog.catalog
233 # Get all FMRIs matching the current FMRI's package name.
234 matches = catalog.extract_matching_fmris(c.fmris(),
235 f.get_name(), matcher=fmri.exact_name_match)
236
237 if len(matches) > 1:
238 # Get the oldest fmri (it's the last entry).
239 of = matches[-1]
240
241 # If the current fmri isn't the oldest one, then this
242 # is an update to the package.
243 if f != of:
244 # If there is more than one matching FMRI, and
245 # it isn't the same version as the oldest one,
246 # we can assume that this is an update to an
247 # existing package.
248 op_title, op_content = operations["U"]
249
250 # Now add a title for our entry.
251 etitle = doc.createElement("title")
252 ti = xmini.Text()
253 ti.replaceWholeText(" ".join([op_title, fmri_str]))
254 etitle.appendChild(ti)
255 e.appendChild(etitle)
256
257 # Indicate when the entry was last updated (in this case, when the
258 # package was added).
259 eu = doc.createElement("updated")
260 ut = xmini.Text()
261 ut.replaceWholeText(ults_to_rfc3339_str(txn["timestamp"]))
262 eu.appendChild(ut)
263 e.appendChild(eu)
264
265 # Link to the info output for the given package FMRI.
266 e_uri = get_rel_path(request, 'info/0/%s' % f.get_url_path())
267
268 l = doc.createElement("link")
269 l.setAttribute("rel", "alternate")
270 l.setAttribute("href", e_uri)
271 e.appendChild(l)
272
273 # Using the description for the operation performed, add the FMRI and
274 # tag information.
275 content_text = op_content % fmri_str
276 if tag == "C":
277 content_text += " This version is tagged as critical."
278
279 co = xmini.Text()
280 co.replaceWholeText(content_text)
281 ec = doc.createElement("content")
282 ec.appendChild(co)
283 e.appendChild(ec)
284
285 feed.appendChild(e)
286
287 def update(request, scfg, rcfg, t, cf):
288 """Generate new Atom document for current updates. The cached feed
289 file is written to scfg.repo_root/CACHE_FILENAME.
290 """
291
292 # Our configuration is stored in hours, convert it to seconds.
293 window_seconds = rcfg.get_attribute("feed", "window") * 60 * 60
294 feed_ts = datetime.datetime.fromtimestamp(t - window_seconds)
295
296 d = xmini.Document()
297
298 feed = d.createElementNS("http://www.w3.org/2005/Atom", "feed")
299 feed.setAttribute("xmlns", "http://www.w3.org/2005/Atom")
300
301 set_title(request, rcfg, d, feed, scfg.updatelog.last_update)
302
303 d.appendChild(feed)
304
305 # The feed should be presented in reverse chronological order.
306 def compare_ul_entries(a, b):
307 return cmp(ults_to_ts(a["timestamp"]),
308 ults_to_ts(b["timestamp"]))
309
310 for txn in sorted(scfg.updatelog.gen_updates_as_dictionaries(feed_ts),
311 cmp=compare_ul_entries, reverse=True):
312 add_transaction(request, scfg, rcfg, d, feed, txn)
313
314 d.writexml(cf)
315
316 def __get_cache_pathname(scfg):
317 return os.path.join(scfg.repo_root, CACHE_FILENAME)
318
319 def __clear_cache(scfg):
320 if scfg.is_read_only():
321 # Ignore the request due to server configuration.
322 return
323
324 pathname = __get_cache_pathname(scfg)
325 try:
326 if os.path.exists(pathname):
327 os.remove(pathname)
328 except IOError:
329 raise cherrypy.HTTPError(
330 httplib.INTERNAL_SERVER_ERROR,
331 "Unable to clear feed cache.")
332
333 def __cache_needs_update(scfg):
334 """Checks to see if the feed cache file exists and if it is still
335 valid. Returns False, None if the cache is valid or True, last
336 where last is a timestamp representing when the cache was
337 generated.
338 """
339 cfpath = __get_cache_pathname(scfg)
340 last = None
341 need_update = True
342 if os.path.isfile(cfpath):
343 # Attempt to parse the cached copy. If we can't, for any
344 # reason, assume we need to remove it and start over.
345 try:
346 d = xmini.parse(cfpath)
347 except Exception:
348 d = None
349 __clear_cache(scfg)
350
351 # Get the feed element and attempt to get the time we last
352 # generated the feed to determine whether we need to regenerate
353 # it. If for some reason we can't get that information, assume
354 # the cache is invalid, clear it, and force regeneration.
355 fe = None
356 if d:
357 fe = d.childNodes[0]
358
359 if fe:
360 utn = None
361 for cnode in fe.childNodes:
362 if cnode.nodeName == "updated":
363 utn = cnode.childNodes[0]
364 break
365
366 if utn:
367 last_ts = rfc3339_str_to_dt(utn.nodeValue)
368
369 # Since our feed cache and updatelog might have
370 # been created within the same second, we need
371 # to ignore small variances when determining
372 # whether to update the feed cache.
373 update_ts = scfg.updatelog.last_update.replace(
374 microsecond=0)
375
376 if last_ts >= update_ts:
377 need_update = False
378 else:
379 last = rfc3339_str_to_ts(utn.nodeValue)
380 else:
381 __clear_cache(scfg)
382 else:
383 __clear_cache(scfg)
384
385 return need_update, last
386
387 def handle(scfg, rcfg, request, response):
388 """If there have been package updates since we last generated the feed,
389 update the feed and send it to the client. Otherwise, send them the
390 cached copy if it is available.
391 """
392
393 cfpath = __get_cache_pathname(scfg)
394
395 # First check to see if we already have a valid cache of the feed.
396 need_update, last = __cache_needs_update(scfg)
397
398 if need_update:
399 # Update always looks at feed.window seconds before the last
400 # update until "now." If last is none, we want it to use "now"
401 # as its starting point.
402 if last is None:
403 last = time.time()
404
405 if scfg.is_read_only():
406 # If the server is operating in readonly mode, the
407 # feed will have to be generated every time.
408 cf = cStringIO.StringIO()
409 update(request, scfg, rcfg, last, cf)
410 cf.seek(0)
411 buf = cf.read()
412 cf.close()
413
414 # Now that the feed has been generated, set the headers
415 # correctly and return it.
416 response.headers['Content-type'] = MIME_TYPE
417 response.headers['Last-Modified'] = \
418 datetime.datetime.now().isoformat()
419 response.headers['Content-length'] = len(buf)
420 return buf
421 else:
422 # If the server isn't operating in readonly mode, the
423 # feed can be generated and cached in inst_dir.
424 cf = file(cfpath, "w")
425 update(request, scfg, rcfg, last, cf)
426 cf.close()
427
428 return serve_file(cfpath, MIME_TYPE)
429