3166 feed generation needs performance improvement
3306 feed returns invalid last-modified header

   1 #!/usr/bin/python2.4
   2 #
   3 # CDDL HEADER START
   4 #
   5 # The contents of this file are subject to the terms of the
   6 # Common Development and Distribution License (the "License").
   7 # You may not use this file except in compliance with the License.
   8 #
   9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10 # or http://www.opensolaris.org/os/licensing.
  11 # See the License for the specific language governing permissions
  12 # and limitations under the License.
  13 #
  14 # When distributing Covered Code, include this CDDL HEADER in each
  15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16 # If applicable, add the following below this CDDL HEADER, with the
  17 # fields enclosed by brackets "[]" replaced with your own identifying
  18 # information: Portions Copyright [yyyy] [name of copyright owner]
  19 #
  20 # CDDL HEADER END
  21 #
  22 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23 # Use is subject to license terms.
  24 
  25 import subprocess
  26 import threading
  27 import signal
  28 import os
  29 import sys
  30 import cherrypy
  31 
  32 import pkg.catalog as catalog

  33 import pkg.indexer as indexer
  34 import pkg.server.query_engine as query_e
  35 
  36 from pkg.misc import SERVER_DEFAULT_MEM_USE_KB
  37 from pkg.misc import emsg
  38 
  39 class ServerCatalog(catalog.Catalog):
  40         """The catalog information which is only needed by the server."""
  41 
  42         def __init__(self, cat_root, authority = None, pkg_root = None,
  43             read_only = False, index_root = None, repo_root = None,
  44             rebuild = True):
  45 
  46                 self.index_root = index_root
  47                 self.repo_root = repo_root
  48 
  49                 # The update_handle lock protects the update_handle variable.
  50                 # This allows update_handle to be checked and acted on in a
  51                 # consistent step, preventing the dropping of needed updates.
  52                 # The check at the top of refresh index should always be done
  53                 # prior to deciding to spin off a process for indexing as it
  54                 # prevents more than one indexing process being run at the same
  55                 # time.
  56                 self.searchdb_update_handle_lock = threading.Lock()
  57 
  58                 if self.index_root:
  59                         self.query_engine = \
  60                             query_e.ServerQueryEngine(self.index_root)
  61 
  62                 if os.name == 'posix':
  63                         try:
  64                                 signal.signal(signal.SIGCHLD,
  65                                     self.child_handler)
  66                         except ValueError:
  67                                 emsg("Tried to create signal handler in "
  68                                     "a thread other than the main thread")
  69 
  70                 self.searchdb_update_handle = None
  71                 self._search_available = False
  72                 self.deferred_searchdb_updates = []
  73                 self.deferred_searchdb_updates_lock = threading.Lock()
  74 
  75                 self.refresh_again = False
  76 
  77                 catalog.Catalog.__init__(self, cat_root, authority, pkg_root,
  78                     read_only, rebuild)
  79 
  80                 if not self._search_available:
  81                         self._check_search()
  82 
  83         def whence(self, cmd):
  84                 if cmd[0] != '/':
  85                         tmp_cmd = cmd
  86                         cmd = None
  87                         path = os.environ['PATH'].split(':')
  88                         path.append(os.environ['PWD'])
  89                         for p in path:
  90                                 if os.path.exists(os.path.join(p, tmp_cmd)):
  91                                         cmd = os.path.join(p, tmp_cmd)
  92                                         break
  93                         assert cmd
  94                 return cmd
  95 
  96         def refresh_index(self):
  97                 """ This function refreshes the search indexes if there any new
  98                 packages. It starts a subprocess which results in a call to
  99                 run_update_index (see below) which does the actual update.
 100                 """
 101 
 102                 self.searchdb_update_handle_lock.acquire()
 103 
 104                 if self.searchdb_update_handle:
 105                         self.refresh_again = True
 106                         self.searchdb_update_handle_lock.release()
 107                         return
 108 
 109                 try:
 110                         fmris_to_index = set(self.fmris())
 111 
 112                         indexer.Indexer.check_for_updates(self.index_root,
 113                             fmris_to_index)
 114 
 115                         if fmris_to_index:
 116                                 if os.name == 'posix':
 117                                         cmd = self.whence(sys.argv[0])
 118                                         args = (cmd, "--refresh-index", "-d",
 119                                             self.repo_root)
 120                                         try:
 121                                                 self.searchdb_update_handle = \
 122                                                     subprocess.Popen(args,
 123                                                         stderr = \
 124                                                         subprocess.STDOUT)
 125                                         except Exception, e:
 126                                                 emsg("Starting the indexing "
 127                                                     "process failed")
 128                                                 raise
 129                                 else:
 130                                         self.run_update_index()
 131                         else:
 132                                 # Since there is nothing to index, setup
 133                                 # the index and declare search available.
 134                                 # We only log this if this represents
 135                                 # a change in status of the server.
 136                                 ind = indexer.Indexer(self.index_root,
 137                                     SERVER_DEFAULT_MEM_USE_KB)
 138                                 ind.setup()
 139                                 if not self._search_available:
 140                                         cherrypy.log("Search Available",
 141                                             "INDEX")
 142                                 self._search_available = True
 143                 finally:
 144                         self.searchdb_update_handle_lock.release()
 145 
 146         def run_update_index(self):
 147                 """ Determines which fmris need to be indexed and passes them
 148                 to the indexer.
 149 
 150                 Note: Only one instance of this method should be running.
 151                 External locking is expected to ensure this behavior. Calling
 152                 refresh index is the preferred method to use to reindex.
 153                 """
 154                 fmris_to_index = set(self.fmris())
 155 
 156                 indexer.Indexer.check_for_updates(self.index_root,
 157                     fmris_to_index)
 158 
 159                 if fmris_to_index:
 160                         self.__update_searchdb_unlocked(fmris_to_index)
 161                 else:
 162                         ind = indexer.Indexer(self.index_root,
 163                             SERVER_DEFAULT_MEM_USE_KB)
 164                         ind.setup()
 165 
 166         def _check_search(self):
 167                 ind = indexer.Indexer(self.index_root,
 168                     SERVER_DEFAULT_MEM_USE_KB)
 169                 if ind.check_index_existence():
 170                         self._search_available = True
 171                         cherrypy.log("Search Available", "INDEX")
 172                         
 173         def build_catalog(self):
 174                 """ Creates an Indexer instance and after building the
 175                 catalog, refreshes the index.
 176                 """
 177                 self._check_search()
 178                 catalog.Catalog.build_catalog(self)
 179                 # refresh_index doesn't use file modification times
 180                 # to determine which packages need to be indexed, so use
 181                 # it to reindex if it's needed.
 182                 self.refresh_index()
 183 
 184         def child_handler(self, sig, frame):
 185                 """ Handler method for the SIGCLD signal.  Checks to see if the
 186                 search database update child has finished, and enables searching
 187                 if it finished successfully, or logs an error if it didn't.
 188                 """
 189                 try:
 190                         signal.signal(signal.SIGCHLD, self.child_handler)
 191                 except ValueError:
 192                         emsg("Tried to create signal handler in "
 193                             "a thread other than the main thread")
 194                 # If there's no update_handle, then another subprocess was
 195                 # spun off and that was what finished. If the poll() returns
 196                 # None, then while the indexer was running, another process
 197                 # that was spun off finished.
 198                 rc = None
 199                 if not self.searchdb_update_handle:
 200                         return
 201                 rc = self.searchdb_update_handle.poll()
 202                 if rc == None:
 203                         return
 204 
 205                 if rc == 0:
 206                         self._search_available = True
 207                         cherrypy.log("Search indexes updated and available.",
 208                             "INDEX")
 209                         # Need to acquire this lock to prevent the possibility
 210                         # of a race condition with refresh_index where a needed
 211                         # refresh is dropped. It is possible that an extra
 212                         # refresh will be done with this code, but that refresh
 213                         # should be very quick to finish.
 214                         self.searchdb_update_handle_lock.acquire()
 215                         self.searchdb_update_handle = None
 216                         self.searchdb_update_handle_lock.release()
 217 
 218                         if self.refresh_again:
 219                                 self.refresh_again = False
 220                                 self.refresh_index()
 221                 elif rc > 0:
 222                         # XXX This should be logged instead
 223                         # If the refresh of the index failed, defensively
 224                         # declare that search is unavailable.
 225                         self._search_available = False
 226                         emsg(_("ERROR building search database, rc: %s"))
 227                         emsg(_(self.searchdb_update_handle.stderr.read()))
 228 
 229         def __update_searchdb_unlocked(self, fmri_list):
 230                 """ Takes a fmri_list and calls the indexer with a list of fmri
 231                 and manifest file path pairs. It assumes that all needed
 232                 locking has already occurred.
 233                 """
 234                 assert self.index_root
 235                 fmri_manifest_list = []
 236 
 237                 # Rather than storing those, simply pass along the
 238                 # file and have the indexer take care of opening and
 239                 # reading the manifest file. Since the indexer 
 240                 # processes and discards the manifest structure (and its
 241                 # search dictionary for that matter) this
 242                 # is much more memory efficient.
 243 
 244                 for f in fmri_list:
 245                         mfst_path = os.path.join(self.pkg_root,
 246                                                  f.get_dir_path())
 247                         fmri_manifest_list.append((f, mfst_path))
 248 
 249                 if fmri_manifest_list:
 250                         index_inst = indexer.Indexer(self.index_root,
 251                             SERVER_DEFAULT_MEM_USE_KB)
 252                         index_inst.server_update_index(fmri_manifest_list)
 253 
 254         def search(self, token):
 255                 """Search through the search database for 'token'.  Return a
 256                 list of token type / fmri pairs."""
 257                 assert self.index_root
 258                 if not self.query_engine:
 259                         self.query_engine = \
 260                             query_e.ServerQueryEngine(self.index_root)
 261                 query = query_e.Query(token, case_sensitive=False)
 262                 return self.query_engine.search(query)

















--- EOF ---