1 #!/usr/bin/python2.4 2 # 3 # CDDL HEADER START 4 # 5 # The contents of this file are subject to the terms of the 6 # Common Development and Distribution License (the "License"). 7 # You may not use this file except in compliance with the License. 8 # 9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 # or http://www.opensolaris.org/os/licensing. 11 # See the License for the specific language governing permissions 12 # and limitations under the License. 13 # 14 # When distributing Covered Code, include this CDDL HEADER in each 15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 # If applicable, add the following below this CDDL HEADER, with the 17 # fields enclosed by brackets "[]" replaced with your own identifying 18 # information: Portions Copyright [yyyy] [name of copyright owner] 19 # 20 # CDDL HEADER END 21 # 22 # Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 # Use is subject to license terms. 24 25 import subprocess 26 import threading 27 import signal 28 import os 29 import sys 30 import cherrypy 31 32 import pkg.catalog as catalog 33 import pkg.fmri as fmri 34 import pkg.indexer as indexer 35 import pkg.server.query_engine as query_e 36 37 from pkg.misc import SERVER_DEFAULT_MEM_USE_KB 38 from pkg.misc import emsg 39 40 class ServerCatalog(catalog.Catalog): 41 """The catalog information which is only needed by the server.""" 42 43 def __init__(self, cat_root, authority = None, pkg_root = None, 44 read_only = False, index_root = None, repo_root = None, 45 rebuild = True): 46 47 self.index_root = index_root 48 self.repo_root = repo_root 49 50 # The update_handle lock protects the update_handle variable. 51 # This allows update_handle to be checked and acted on in a 52 # consistent step, preventing the dropping of needed updates. 53 # The check at the top of refresh index should always be done 54 # prior to deciding to spin off a process for indexing as it 55 # prevents more than one indexing process being run at the same 56 # time. 57 self.searchdb_update_handle_lock = threading.Lock() 58 59 if self.index_root: 60 self.query_engine = \ 61 query_e.ServerQueryEngine(self.index_root) 62 63 if os.name == 'posix': 64 try: 65 signal.signal(signal.SIGCHLD, 66 self.child_handler) 67 except ValueError: 68 emsg("Tried to create signal handler in " 69 "a thread other than the main thread") 70 71 self.searchdb_update_handle = None 72 self._search_available = False 73 self.deferred_searchdb_updates = [] 74 self.deferred_searchdb_updates_lock = threading.Lock() 75 76 self.refresh_again = False 77 78 catalog.Catalog.__init__(self, cat_root, authority, pkg_root, 79 read_only, rebuild) 80 81 if not self._search_available: 82 self._check_search() 83 84 def whence(self, cmd): 85 if cmd[0] != '/': 86 tmp_cmd = cmd 87 cmd = None 88 path = os.environ['PATH'].split(':') 89 path.append(os.environ['PWD']) 90 for p in path: 91 if os.path.exists(os.path.join(p, tmp_cmd)): 92 cmd = os.path.join(p, tmp_cmd) 93 break 94 assert cmd 95 return cmd 96 97 def refresh_index(self): 98 """ This function refreshes the search indexes if there any new 99 packages. It starts a subprocess which results in a call to 100 run_update_index (see below) which does the actual update. 101 """ 102 103 self.searchdb_update_handle_lock.acquire() 104 105 if self.searchdb_update_handle: 106 self.refresh_again = True 107 self.searchdb_update_handle_lock.release() 108 return 109 110 try: 111 fmris_to_index = set(self.fmris()) 112 113 indexer.Indexer.check_for_updates(self.index_root, 114 fmris_to_index) 115 116 if fmris_to_index: 117 if os.name == 'posix': 118 cmd = self.whence(sys.argv[0]) 119 args = (cmd, "--refresh-index", "-d", 120 self.repo_root) 121 try: 122 self.searchdb_update_handle = \ 123 subprocess.Popen(args, 124 stderr = \ 125 subprocess.STDOUT) 126 except Exception, e: 127 emsg("Starting the indexing " 128 "process failed") 129 raise 130 else: 131 self.run_update_index() 132 else: 133 # Since there is nothing to index, setup 134 # the index and declare search available. 135 # We only log this if this represents 136 # a change in status of the server. 137 ind = indexer.Indexer(self.index_root, 138 SERVER_DEFAULT_MEM_USE_KB) 139 ind.setup() 140 if not self._search_available: 141 cherrypy.log("Search Available", 142 "INDEX") 143 self._search_available = True 144 finally: 145 self.searchdb_update_handle_lock.release() 146 147 def run_update_index(self): 148 """ Determines which fmris need to be indexed and passes them 149 to the indexer. 150 151 Note: Only one instance of this method should be running. 152 External locking is expected to ensure this behavior. Calling 153 refresh index is the preferred method to use to reindex. 154 """ 155 fmris_to_index = set(self.fmris()) 156 157 indexer.Indexer.check_for_updates(self.index_root, 158 fmris_to_index) 159 160 if fmris_to_index: 161 self.__update_searchdb_unlocked(fmris_to_index) 162 else: 163 ind = indexer.Indexer(self.index_root, 164 SERVER_DEFAULT_MEM_USE_KB) 165 ind.setup() 166 167 def _check_search(self): 168 ind = indexer.Indexer(self.index_root, 169 SERVER_DEFAULT_MEM_USE_KB) 170 if ind.check_index_existence(): 171 self._search_available = True 172 cherrypy.log("Search Available", "INDEX") 173 174 def build_catalog(self): 175 """ Creates an Indexer instance and after building the 176 catalog, refreshes the index. 177 """ 178 self._check_search() 179 catalog.Catalog.build_catalog(self) 180 # refresh_index doesn't use file modification times 181 # to determine which packages need to be indexed, so use 182 # it to reindex if it's needed. 183 self.refresh_index() 184 185 def child_handler(self, sig, frame): 186 """ Handler method for the SIGCLD signal. Checks to see if the 187 search database update child has finished, and enables searching 188 if it finished successfully, or logs an error if it didn't. 189 """ 190 try: 191 signal.signal(signal.SIGCHLD, self.child_handler) 192 except ValueError: 193 emsg("Tried to create signal handler in " 194 "a thread other than the main thread") 195 # If there's no update_handle, then another subprocess was 196 # spun off and that was what finished. If the poll() returns 197 # None, then while the indexer was running, another process 198 # that was spun off finished. 199 rc = None 200 if not self.searchdb_update_handle: 201 return 202 rc = self.searchdb_update_handle.poll() 203 if rc == None: 204 return 205 206 if rc == 0: 207 self._search_available = True 208 cherrypy.log("Search indexes updated and available.", 209 "INDEX") 210 # Need to acquire this lock to prevent the possibility 211 # of a race condition with refresh_index where a needed 212 # refresh is dropped. It is possible that an extra 213 # refresh will be done with this code, but that refresh 214 # should be very quick to finish. 215 self.searchdb_update_handle_lock.acquire() 216 self.searchdb_update_handle = None 217 self.searchdb_update_handle_lock.release() 218 219 if self.refresh_again: 220 self.refresh_again = False 221 self.refresh_index() 222 elif rc > 0: 223 # XXX This should be logged instead 224 # If the refresh of the index failed, defensively 225 # declare that search is unavailable. 226 self._search_available = False 227 emsg(_("ERROR building search database, rc: %s")) 228 emsg(_(self.searchdb_update_handle.stderr.read())) 229 230 def __update_searchdb_unlocked(self, fmri_list): 231 """ Takes a fmri_list and calls the indexer with a list of fmri 232 and manifest file path pairs. It assumes that all needed 233 locking has already occurred. 234 """ 235 assert self.index_root 236 fmri_manifest_list = [] 237 238 # Rather than storing those, simply pass along the 239 # file and have the indexer take care of opening and 240 # reading the manifest file. Since the indexer 241 # processes and discards the manifest structure (and its 242 # search dictionary for that matter) this 243 # is much more memory efficient. 244 245 for f in fmri_list: 246 mfst_path = os.path.join(self.pkg_root, 247 f.get_dir_path()) 248 fmri_manifest_list.append((f, mfst_path)) 249 250 if fmri_manifest_list: 251 index_inst = indexer.Indexer(self.index_root, 252 SERVER_DEFAULT_MEM_USE_KB) 253 index_inst.server_update_index(fmri_manifest_list) 254 255 def search(self, token): 256 """Search through the search database for 'token'. Return a 257 list of token type / fmri pairs.""" 258 assert self.index_root 259 if not self.query_engine: 260 self.query_engine = \ 261 query_e.ServerQueryEngine(self.index_root) 262 query = query_e.Query(token, case_sensitive=False) 263 return self.query_engine.search(query) 264 265 @staticmethod 266 def read_catalog(catalog, dir, auth=None): 267 """Read the catalog file in "dir" and combine it with the 268 existing data in "catalog".""" 269 270 catf = file(os.path.join(dir, "catalog")) 271 for line in catf: 272 if not line.startswith("V pkg") and \ 273 not line.startswith("C pkg"): 274 continue 275 276 f = fmri.PkgFmri(line[7:]) 277 ServerCatalog.cache_fmri(catalog, f, auth) 278 279 catf.close() 280