Print this page
3166 feed generation needs performance improvement
3306 feed returns invalid last-modified header
Split |
Close |
Expand all |
Collapse all |
--- old/src/modules/server/catalog.py
+++ new/src/modules/server/catalog.py
1 1 #!/usr/bin/python2.4
2 2 #
3 3 # CDDL HEADER START
4 4 #
5 5 # The contents of this file are subject to the terms of the
6 6 # Common Development and Distribution License (the "License").
7 7 # You may not use this file except in compliance with the License.
8 8 #
9 9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 10 # or http://www.opensolaris.org/os/licensing.
11 11 # See the License for the specific language governing permissions
12 12 # and limitations under the License.
13 13 #
14 14 # When distributing Covered Code, include this CDDL HEADER in each
15 15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 16 # If applicable, add the following below this CDDL HEADER, with the
17 17 # fields enclosed by brackets "[]" replaced with your own identifying
18 18 # information: Portions Copyright [yyyy] [name of copyright owner]
19 19 #
20 20 # CDDL HEADER END
21 21 #
22 22 # Copyright 2008 Sun Microsystems, Inc. All rights reserved.
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
23 23 # Use is subject to license terms.
24 24
25 25 import subprocess
26 26 import threading
27 27 import signal
28 28 import os
29 29 import sys
30 30 import cherrypy
31 31
32 32 import pkg.catalog as catalog
33 +import pkg.fmri as fmri
33 34 import pkg.indexer as indexer
34 35 import pkg.server.query_engine as query_e
35 36
36 37 from pkg.misc import SERVER_DEFAULT_MEM_USE_KB
37 38 from pkg.misc import emsg
38 39
39 40 class ServerCatalog(catalog.Catalog):
40 41 """The catalog information which is only needed by the server."""
41 42
42 43 def __init__(self, cat_root, authority = None, pkg_root = None,
43 44 read_only = False, index_root = None, repo_root = None,
44 45 rebuild = True):
45 46
46 47 self.index_root = index_root
47 48 self.repo_root = repo_root
48 49
49 50 # The update_handle lock protects the update_handle variable.
50 51 # This allows update_handle to be checked and acted on in a
51 52 # consistent step, preventing the dropping of needed updates.
52 53 # The check at the top of refresh index should always be done
53 54 # prior to deciding to spin off a process for indexing as it
54 55 # prevents more than one indexing process being run at the same
55 56 # time.
56 57 self.searchdb_update_handle_lock = threading.Lock()
57 58
58 59 if self.index_root:
59 60 self.query_engine = \
60 61 query_e.ServerQueryEngine(self.index_root)
61 62
62 63 if os.name == 'posix':
63 64 try:
64 65 signal.signal(signal.SIGCHLD,
65 66 self.child_handler)
66 67 except ValueError:
67 68 emsg("Tried to create signal handler in "
68 69 "a thread other than the main thread")
69 70
70 71 self.searchdb_update_handle = None
71 72 self._search_available = False
72 73 self.deferred_searchdb_updates = []
73 74 self.deferred_searchdb_updates_lock = threading.Lock()
74 75
75 76 self.refresh_again = False
76 77
77 78 catalog.Catalog.__init__(self, cat_root, authority, pkg_root,
78 79 read_only, rebuild)
79 80
80 81 if not self._search_available:
81 82 self._check_search()
82 83
83 84 def whence(self, cmd):
84 85 if cmd[0] != '/':
85 86 tmp_cmd = cmd
86 87 cmd = None
87 88 path = os.environ['PATH'].split(':')
88 89 path.append(os.environ['PWD'])
89 90 for p in path:
90 91 if os.path.exists(os.path.join(p, tmp_cmd)):
91 92 cmd = os.path.join(p, tmp_cmd)
92 93 break
93 94 assert cmd
94 95 return cmd
95 96
96 97 def refresh_index(self):
97 98 """ This function refreshes the search indexes if there any new
98 99 packages. It starts a subprocess which results in a call to
99 100 run_update_index (see below) which does the actual update.
100 101 """
101 102
102 103 self.searchdb_update_handle_lock.acquire()
103 104
104 105 if self.searchdb_update_handle:
105 106 self.refresh_again = True
106 107 self.searchdb_update_handle_lock.release()
107 108 return
108 109
109 110 try:
110 111 fmris_to_index = set(self.fmris())
111 112
112 113 indexer.Indexer.check_for_updates(self.index_root,
113 114 fmris_to_index)
114 115
115 116 if fmris_to_index:
116 117 if os.name == 'posix':
117 118 cmd = self.whence(sys.argv[0])
118 119 args = (cmd, "--refresh-index", "-d",
119 120 self.repo_root)
120 121 try:
121 122 self.searchdb_update_handle = \
122 123 subprocess.Popen(args,
123 124 stderr = \
124 125 subprocess.STDOUT)
125 126 except Exception, e:
126 127 emsg("Starting the indexing "
127 128 "process failed")
128 129 raise
129 130 else:
130 131 self.run_update_index()
131 132 else:
132 133 # Since there is nothing to index, setup
133 134 # the index and declare search available.
134 135 # We only log this if this represents
135 136 # a change in status of the server.
136 137 ind = indexer.Indexer(self.index_root,
137 138 SERVER_DEFAULT_MEM_USE_KB)
138 139 ind.setup()
139 140 if not self._search_available:
140 141 cherrypy.log("Search Available",
141 142 "INDEX")
142 143 self._search_available = True
143 144 finally:
144 145 self.searchdb_update_handle_lock.release()
145 146
146 147 def run_update_index(self):
147 148 """ Determines which fmris need to be indexed and passes them
148 149 to the indexer.
149 150
150 151 Note: Only one instance of this method should be running.
151 152 External locking is expected to ensure this behavior. Calling
152 153 refresh index is the preferred method to use to reindex.
153 154 """
154 155 fmris_to_index = set(self.fmris())
155 156
156 157 indexer.Indexer.check_for_updates(self.index_root,
157 158 fmris_to_index)
158 159
159 160 if fmris_to_index:
160 161 self.__update_searchdb_unlocked(fmris_to_index)
161 162 else:
162 163 ind = indexer.Indexer(self.index_root,
163 164 SERVER_DEFAULT_MEM_USE_KB)
164 165 ind.setup()
165 166
166 167 def _check_search(self):
167 168 ind = indexer.Indexer(self.index_root,
168 169 SERVER_DEFAULT_MEM_USE_KB)
169 170 if ind.check_index_existence():
170 171 self._search_available = True
171 172 cherrypy.log("Search Available", "INDEX")
172 173
173 174 def build_catalog(self):
174 175 """ Creates an Indexer instance and after building the
175 176 catalog, refreshes the index.
176 177 """
177 178 self._check_search()
178 179 catalog.Catalog.build_catalog(self)
179 180 # refresh_index doesn't use file modification times
180 181 # to determine which packages need to be indexed, so use
181 182 # it to reindex if it's needed.
182 183 self.refresh_index()
183 184
184 185 def child_handler(self, sig, frame):
185 186 """ Handler method for the SIGCLD signal. Checks to see if the
186 187 search database update child has finished, and enables searching
187 188 if it finished successfully, or logs an error if it didn't.
188 189 """
189 190 try:
190 191 signal.signal(signal.SIGCHLD, self.child_handler)
191 192 except ValueError:
192 193 emsg("Tried to create signal handler in "
193 194 "a thread other than the main thread")
194 195 # If there's no update_handle, then another subprocess was
195 196 # spun off and that was what finished. If the poll() returns
196 197 # None, then while the indexer was running, another process
197 198 # that was spun off finished.
198 199 rc = None
199 200 if not self.searchdb_update_handle:
200 201 return
201 202 rc = self.searchdb_update_handle.poll()
202 203 if rc == None:
203 204 return
204 205
205 206 if rc == 0:
206 207 self._search_available = True
207 208 cherrypy.log("Search indexes updated and available.",
208 209 "INDEX")
209 210 # Need to acquire this lock to prevent the possibility
210 211 # of a race condition with refresh_index where a needed
211 212 # refresh is dropped. It is possible that an extra
212 213 # refresh will be done with this code, but that refresh
213 214 # should be very quick to finish.
214 215 self.searchdb_update_handle_lock.acquire()
215 216 self.searchdb_update_handle = None
216 217 self.searchdb_update_handle_lock.release()
217 218
218 219 if self.refresh_again:
219 220 self.refresh_again = False
220 221 self.refresh_index()
221 222 elif rc > 0:
222 223 # XXX This should be logged instead
223 224 # If the refresh of the index failed, defensively
224 225 # declare that search is unavailable.
225 226 self._search_available = False
226 227 emsg(_("ERROR building search database, rc: %s"))
227 228 emsg(_(self.searchdb_update_handle.stderr.read()))
228 229
229 230 def __update_searchdb_unlocked(self, fmri_list):
230 231 """ Takes a fmri_list and calls the indexer with a list of fmri
231 232 and manifest file path pairs. It assumes that all needed
232 233 locking has already occurred.
233 234 """
234 235 assert self.index_root
235 236 fmri_manifest_list = []
236 237
237 238 # Rather than storing those, simply pass along the
238 239 # file and have the indexer take care of opening and
239 240 # reading the manifest file. Since the indexer
240 241 # processes and discards the manifest structure (and its
241 242 # search dictionary for that matter) this
242 243 # is much more memory efficient.
243 244
244 245 for f in fmri_list:
245 246 mfst_path = os.path.join(self.pkg_root,
246 247 f.get_dir_path())
247 248 fmri_manifest_list.append((f, mfst_path))
248 249
249 250 if fmri_manifest_list:
250 251 index_inst = indexer.Indexer(self.index_root,
251 252 SERVER_DEFAULT_MEM_USE_KB)
252 253 index_inst.server_update_index(fmri_manifest_list)
↓ open down ↓ |
210 lines elided |
↑ open up ↑ |
253 254
254 255 def search(self, token):
255 256 """Search through the search database for 'token'. Return a
256 257 list of token type / fmri pairs."""
257 258 assert self.index_root
258 259 if not self.query_engine:
259 260 self.query_engine = \
260 261 query_e.ServerQueryEngine(self.index_root)
261 262 query = query_e.Query(token, case_sensitive=False)
262 263 return self.query_engine.search(query)
264 +
265 + @staticmethod
266 + def read_catalog(catalog, dir, auth=None):
267 + """Read the catalog file in "dir" and combine it with the
268 + existing data in "catalog"."""
269 +
270 + catf = file(os.path.join(dir, "catalog"))
271 + for line in catf:
272 + if not line.startswith("V pkg") and \
273 + not line.startswith("C pkg"):
274 + continue
275 +
276 + f = fmri.PkgFmri(line[7:])
277 + ServerCatalog.cache_fmri(catalog, f, auth)
278 +
279 + catf.close()
280 +
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX