1 #!/usr/bin/python2.4 2 # 3 # CDDL HEADER START 4 # 5 # The contents of this file are subject to the terms of the 6 # Common Development and Distribution License (the "License"). 7 # You may not use this file except in compliance with the License. 8 # 9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 # or http://www.opensolaris.org/os/licensing. 11 # See the License for the specific language governing permissions 12 # and limitations under the License. 13 # 14 # When distributing Covered Code, include this CDDL HEADER in each 15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 # If applicable, add the following below this CDDL HEADER, with the 17 # fields enclosed by brackets "[]" replaced with your own identifying 18 # information: Portions Copyright [yyyy] [name of copyright owner] 19 # 20 # CDDL HEADER END 21 # 22 # Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 # Use is subject to license terms. 24 25 import subprocess 26 import threading 27 import signal 28 import os 29 import sys 30 import cherrypy 31 32 import pkg.catalog as catalog 33 import pkg.indexer as indexer 34 import pkg.server.query_engine as query_e 35 36 from pkg.misc import SERVER_DEFAULT_MEM_USE_KB 37 from pkg.misc import emsg 38 39 class ServerCatalog(catalog.Catalog): 40 """The catalog information which is only needed by the server.""" 41 42 def __init__(self, cat_root, authority = None, pkg_root = None, 43 read_only = False, index_root = None, repo_root = None, 44 rebuild = True): 45 46 self.index_root = index_root 47 self.repo_root = repo_root 48 49 # The update_handle lock protects the update_handle variable. 50 # This allows update_handle to be checked and acted on in a 51 # consistent step, preventing the dropping of needed updates. 52 # The check at the top of refresh index should always be done 53 # prior to deciding to spin off a process for indexing as it 54 # prevents more than one indexing process being run at the same 55 # time. 56 self.searchdb_update_handle_lock = threading.Lock() 57 58 if self.index_root: 59 self.query_engine = \ 60 query_e.ServerQueryEngine(self.index_root) 61 62 if os.name == 'posix': 63 try: 64 signal.signal(signal.SIGCHLD, 65 self.child_handler) 66 except ValueError: 67 emsg("Tried to create signal handler in " 68 "a thread other than the main thread") 69 70 self.searchdb_update_handle = None 71 self._search_available = False 72 self.deferred_searchdb_updates = [] 73 self.deferred_searchdb_updates_lock = threading.Lock() 74 75 self.refresh_again = False 76 77 catalog.Catalog.__init__(self, cat_root, authority, pkg_root, 78 read_only, rebuild) 79 80 if not self._search_available: 81 self._check_search() 82 83 def whence(self, cmd): 84 if cmd[0] != '/': 85 tmp_cmd = cmd 86 cmd = None 87 path = os.environ['PATH'].split(':') 88 path.append(os.environ['PWD']) 89 for p in path: 90 if os.path.exists(os.path.join(p, tmp_cmd)): 91 cmd = os.path.join(p, tmp_cmd) 92 break 93 assert cmd 94 return cmd 95 96 def refresh_index(self): 97 """ This function refreshes the search indexes if there any new 98 packages. It starts a subprocess which results in a call to 99 run_update_index (see below) which does the actual update. 100 """ 101 102 self.searchdb_update_handle_lock.acquire() 103 104 if self.searchdb_update_handle: 105 self.refresh_again = True 106 self.searchdb_update_handle_lock.release() 107 return 108 109 try: 110 fmris_to_index = set(self.fmris()) 111 112 indexer.Indexer.check_for_updates(self.index_root, 113 fmris_to_index) 114 115 if fmris_to_index: 116 if os.name == 'posix': 117 cmd = self.whence(sys.argv[0]) 118 args = (cmd, "--refresh-index", "-d", 119 self.repo_root) 120 try: 121 self.searchdb_update_handle = \ 122 subprocess.Popen(args, 123 stderr = \ 124 subprocess.STDOUT) 125 except Exception, e: 126 emsg("Starting the indexing " 127 "process failed") 128 raise 129 else: 130 self.run_update_index() 131 else: 132 # Since there is nothing to index, setup 133 # the index and declare search available. 134 # We only log this if this represents 135 # a change in status of the server. 136 ind = indexer.Indexer(self.index_root, 137 SERVER_DEFAULT_MEM_USE_KB) 138 ind.setup() 139 if not self._search_available: 140 cherrypy.log("Search Available", 141 "INDEX") 142 self._search_available = True 143 finally: 144 self.searchdb_update_handle_lock.release() 145 146 def run_update_index(self): 147 """ Determines which fmris need to be indexed and passes them 148 to the indexer. 149 150 Note: Only one instance of this method should be running. 151 External locking is expected to ensure this behavior. Calling 152 refresh index is the preferred method to use to reindex. 153 """ 154 fmris_to_index = set(self.fmris()) 155 156 indexer.Indexer.check_for_updates(self.index_root, 157 fmris_to_index) 158 159 if fmris_to_index: 160 self.__update_searchdb_unlocked(fmris_to_index) 161 else: 162 ind = indexer.Indexer(self.index_root, 163 SERVER_DEFAULT_MEM_USE_KB) 164 ind.setup() 165 166 def _check_search(self): 167 ind = indexer.Indexer(self.index_root, 168 SERVER_DEFAULT_MEM_USE_KB) 169 if ind.check_index_existence(): 170 self._search_available = True 171 cherrypy.log("Search Available", "INDEX") 172 173 def build_catalog(self): 174 """ Creates an Indexer instance and after building the 175 catalog, refreshes the index. 176 """ 177 self._check_search() 178 catalog.Catalog.build_catalog(self) 179 # refresh_index doesn't use file modification times 180 # to determine which packages need to be indexed, so use 181 # it to reindex if it's needed. 182 self.refresh_index() 183 184 def child_handler(self, sig, frame): 185 """ Handler method for the SIGCLD signal. Checks to see if the 186 search database update child has finished, and enables searching 187 if it finished successfully, or logs an error if it didn't. 188 """ 189 try: 190 signal.signal(signal.SIGCHLD, self.child_handler) 191 except ValueError: 192 emsg("Tried to create signal handler in " 193 "a thread other than the main thread") 194 # If there's no update_handle, then another subprocess was 195 # spun off and that was what finished. If the poll() returns 196 # None, then while the indexer was running, another process 197 # that was spun off finished. 198 rc = None 199 if not self.searchdb_update_handle: 200 return 201 rc = self.searchdb_update_handle.poll() 202 if rc == None: 203 return 204 205 if rc == 0: 206 self._search_available = True 207 cherrypy.log("Search indexes updated and available.", 208 "INDEX") 209 # Need to acquire this lock to prevent the possibility 210 # of a race condition with refresh_index where a needed 211 # refresh is dropped. It is possible that an extra 212 # refresh will be done with this code, but that refresh 213 # should be very quick to finish. 214 self.searchdb_update_handle_lock.acquire() 215 self.searchdb_update_handle = None 216 self.searchdb_update_handle_lock.release() 217 218 if self.refresh_again: 219 self.refresh_again = False 220 self.refresh_index() 221 elif rc > 0: 222 # XXX This should be logged instead 223 # If the refresh of the index failed, defensively 224 # declare that search is unavailable. 225 self._search_available = False 226 emsg(_("ERROR building search database, rc: %s")) 227 emsg(_(self.searchdb_update_handle.stderr.read())) 228 229 def __update_searchdb_unlocked(self, fmri_list): 230 """ Takes a fmri_list and calls the indexer with a list of fmri 231 and manifest file path pairs. It assumes that all needed 232 locking has already occurred. 233 """ 234 assert self.index_root 235 fmri_manifest_list = [] 236 237 # Rather than storing those, simply pass along the 238 # file and have the indexer take care of opening and 239 # reading the manifest file. Since the indexer 240 # processes and discards the manifest structure (and its 241 # search dictionary for that matter) this 242 # is much more memory efficient. 243 244 for f in fmri_list: 245 mfst_path = os.path.join(self.pkg_root, 246 f.get_dir_path()) 247 fmri_manifest_list.append((f, mfst_path)) 248 249 if fmri_manifest_list: 250 index_inst = indexer.Indexer(self.index_root, 251 SERVER_DEFAULT_MEM_USE_KB) 252 index_inst.server_update_index(fmri_manifest_list) 253 254 def search(self, token): 255 """Search through the search database for 'token'. Return a 256 list of token type / fmri pairs.""" 257 assert self.index_root 258 if not self.query_engine: 259 self.query_engine = \ 260 query_e.ServerQueryEngine(self.index_root) 261 query = query_e.Query(token, case_sensitive=False) 262 return self.query_engine.search(query)