3166 feed generation needs performance improvement
3306 feed returns invalid last-modified header

   1 #!/usr/bin/python2.4
   2 #
   3 # CDDL HEADER START
   4 #
   5 # The contents of this file are subject to the terms of the
   6 # Common Development and Distribution License (the "License").
   7 # You may not use this file except in compliance with the License.
   8 #
   9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10 # or http://www.opensolaris.org/os/licensing.
  11 # See the License for the specific language governing permissions
  12 # and limitations under the License.
  13 #
  14 # When distributing Covered Code, include this CDDL HEADER in each
  15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16 # If applicable, add the following below this CDDL HEADER, with the
  17 # fields enclosed by brackets "[]" replaced with your own identifying
  18 # information: Portions Copyright [yyyy] [name of copyright owner]
  19 #
  20 # CDDL HEADER END
  21 #
  22 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23 # Use is subject to license terms.
  24 
  25 import subprocess
  26 import threading
  27 import signal
  28 import os
  29 import sys
  30 import cherrypy
  31 
  32 import pkg.catalog as catalog
  33 import pkg.fmri as fmri
  34 import pkg.indexer as indexer
  35 import pkg.server.query_engine as query_e
  36 
  37 from pkg.misc import SERVER_DEFAULT_MEM_USE_KB
  38 from pkg.misc import emsg
  39 
  40 class ServerCatalog(catalog.Catalog):
  41         """The catalog information which is only needed by the server."""
  42 
  43         def __init__(self, cat_root, authority = None, pkg_root = None,
  44             read_only = False, index_root = None, repo_root = None,
  45             rebuild = True):
  46 
  47                 self.index_root = index_root
  48                 self.repo_root = repo_root
  49 
  50                 # The update_handle lock protects the update_handle variable.
  51                 # This allows update_handle to be checked and acted on in a
  52                 # consistent step, preventing the dropping of needed updates.
  53                 # The check at the top of refresh index should always be done
  54                 # prior to deciding to spin off a process for indexing as it
  55                 # prevents more than one indexing process being run at the same
  56                 # time.
  57                 self.searchdb_update_handle_lock = threading.Lock()
  58 
  59                 if self.index_root:
  60                         self.query_engine = \
  61                             query_e.ServerQueryEngine(self.index_root)
  62 
  63                 if os.name == 'posix':
  64                         try:
  65                                 signal.signal(signal.SIGCHLD,
  66                                     self.child_handler)
  67                         except ValueError:
  68                                 emsg("Tried to create signal handler in "
  69                                     "a thread other than the main thread")
  70 
  71                 self.searchdb_update_handle = None
  72                 self._search_available = False
  73                 self.deferred_searchdb_updates = []
  74                 self.deferred_searchdb_updates_lock = threading.Lock()
  75 
  76                 self.refresh_again = False
  77 
  78                 catalog.Catalog.__init__(self, cat_root, authority, pkg_root,
  79                     read_only, rebuild)
  80 
  81                 if not self._search_available:
  82                         self._check_search()
  83 
  84         def whence(self, cmd):
  85                 if cmd[0] != '/':
  86                         tmp_cmd = cmd
  87                         cmd = None
  88                         path = os.environ['PATH'].split(':')
  89                         path.append(os.environ['PWD'])
  90                         for p in path:
  91                                 if os.path.exists(os.path.join(p, tmp_cmd)):
  92                                         cmd = os.path.join(p, tmp_cmd)
  93                                         break
  94                         assert cmd
  95                 return cmd
  96 
  97         def refresh_index(self):
  98                 """ This function refreshes the search indexes if there any new
  99                 packages. It starts a subprocess which results in a call to
 100                 run_update_index (see below) which does the actual update.
 101                 """
 102 
 103                 self.searchdb_update_handle_lock.acquire()
 104 
 105                 if self.searchdb_update_handle:
 106                         self.refresh_again = True
 107                         self.searchdb_update_handle_lock.release()
 108                         return
 109 
 110                 try:
 111                         fmris_to_index = set(self.fmris())
 112 
 113                         indexer.Indexer.check_for_updates(self.index_root,
 114                             fmris_to_index)
 115 
 116                         if fmris_to_index:
 117                                 if os.name == 'posix':
 118                                         cmd = self.whence(sys.argv[0])
 119                                         args = (cmd, "--refresh-index", "-d",
 120                                             self.repo_root)
 121                                         try:
 122                                                 self.searchdb_update_handle = \
 123                                                     subprocess.Popen(args,
 124                                                         stderr = \
 125                                                         subprocess.STDOUT)
 126                                         except Exception, e:
 127                                                 emsg("Starting the indexing "
 128                                                     "process failed")
 129                                                 raise
 130                                 else:
 131                                         self.run_update_index()
 132                         else:
 133                                 # Since there is nothing to index, setup
 134                                 # the index and declare search available.
 135                                 # We only log this if this represents
 136                                 # a change in status of the server.
 137                                 ind = indexer.Indexer(self.index_root,
 138                                     SERVER_DEFAULT_MEM_USE_KB)
 139                                 ind.setup()
 140                                 if not self._search_available:
 141                                         cherrypy.log("Search Available",
 142                                             "INDEX")
 143                                 self._search_available = True
 144                 finally:
 145                         self.searchdb_update_handle_lock.release()
 146 
 147         def run_update_index(self):
 148                 """ Determines which fmris need to be indexed and passes them
 149                 to the indexer.
 150 
 151                 Note: Only one instance of this method should be running.
 152                 External locking is expected to ensure this behavior. Calling
 153                 refresh index is the preferred method to use to reindex.
 154                 """
 155                 fmris_to_index = set(self.fmris())
 156 
 157                 indexer.Indexer.check_for_updates(self.index_root,
 158                     fmris_to_index)
 159 
 160                 if fmris_to_index:
 161                         self.__update_searchdb_unlocked(fmris_to_index)
 162                 else:
 163                         ind = indexer.Indexer(self.index_root,
 164                             SERVER_DEFAULT_MEM_USE_KB)
 165                         ind.setup()
 166 
 167         def _check_search(self):
 168                 ind = indexer.Indexer(self.index_root,
 169                     SERVER_DEFAULT_MEM_USE_KB)
 170                 if ind.check_index_existence():
 171                         self._search_available = True
 172                         cherrypy.log("Search Available", "INDEX")
 173                         
 174         def build_catalog(self):
 175                 """ Creates an Indexer instance and after building the
 176                 catalog, refreshes the index.
 177                 """
 178                 self._check_search()
 179                 catalog.Catalog.build_catalog(self)
 180                 # refresh_index doesn't use file modification times
 181                 # to determine which packages need to be indexed, so use
 182                 # it to reindex if it's needed.
 183                 self.refresh_index()
 184 
 185         def child_handler(self, sig, frame):
 186                 """ Handler method for the SIGCLD signal.  Checks to see if the
 187                 search database update child has finished, and enables searching
 188                 if it finished successfully, or logs an error if it didn't.
 189                 """
 190                 try:
 191                         signal.signal(signal.SIGCHLD, self.child_handler)
 192                 except ValueError:
 193                         emsg("Tried to create signal handler in "
 194                             "a thread other than the main thread")
 195                 # If there's no update_handle, then another subprocess was
 196                 # spun off and that was what finished. If the poll() returns
 197                 # None, then while the indexer was running, another process
 198                 # that was spun off finished.
 199                 rc = None
 200                 if not self.searchdb_update_handle:
 201                         return
 202                 rc = self.searchdb_update_handle.poll()
 203                 if rc == None:
 204                         return
 205 
 206                 if rc == 0:
 207                         self._search_available = True
 208                         cherrypy.log("Search indexes updated and available.",
 209                             "INDEX")
 210                         # Need to acquire this lock to prevent the possibility
 211                         # of a race condition with refresh_index where a needed
 212                         # refresh is dropped. It is possible that an extra
 213                         # refresh will be done with this code, but that refresh
 214                         # should be very quick to finish.
 215                         self.searchdb_update_handle_lock.acquire()
 216                         self.searchdb_update_handle = None
 217                         self.searchdb_update_handle_lock.release()
 218 
 219                         if self.refresh_again:
 220                                 self.refresh_again = False
 221                                 self.refresh_index()
 222                 elif rc > 0:
 223                         # XXX This should be logged instead
 224                         # If the refresh of the index failed, defensively
 225                         # declare that search is unavailable.
 226                         self._search_available = False
 227                         emsg(_("ERROR building search database, rc: %s"))
 228                         emsg(_(self.searchdb_update_handle.stderr.read()))
 229 
 230         def __update_searchdb_unlocked(self, fmri_list):
 231                 """ Takes a fmri_list and calls the indexer with a list of fmri
 232                 and manifest file path pairs. It assumes that all needed
 233                 locking has already occurred.
 234                 """
 235                 assert self.index_root
 236                 fmri_manifest_list = []
 237 
 238                 # Rather than storing those, simply pass along the
 239                 # file and have the indexer take care of opening and
 240                 # reading the manifest file. Since the indexer 
 241                 # processes and discards the manifest structure (and its
 242                 # search dictionary for that matter) this
 243                 # is much more memory efficient.
 244 
 245                 for f in fmri_list:
 246                         mfst_path = os.path.join(self.pkg_root,
 247                                                  f.get_dir_path())
 248                         fmri_manifest_list.append((f, mfst_path))
 249 
 250                 if fmri_manifest_list:
 251                         index_inst = indexer.Indexer(self.index_root,
 252                             SERVER_DEFAULT_MEM_USE_KB)
 253                         index_inst.server_update_index(fmri_manifest_list)
 254 
 255         def search(self, token):
 256                 """Search through the search database for 'token'.  Return a
 257                 list of token type / fmri pairs."""
 258                 assert self.index_root
 259                 if not self.query_engine:
 260                         self.query_engine = \
 261                             query_e.ServerQueryEngine(self.index_root)
 262                 query = query_e.Query(token, case_sensitive=False)
 263                 return self.query_engine.search(query)
 264 
 265         @staticmethod
 266         def read_catalog(catalog, dir, auth=None):
 267                 """Read the catalog file in "dir" and combine it with the
 268                 existing data in "catalog"."""
 269 
 270                 catf = file(os.path.join(dir, "catalog"))
 271                 for line in catf:
 272                         if not line.startswith("V pkg") and \
 273                             not line.startswith("C pkg"):
 274                                 continue
 275 
 276                         f = fmri.PkgFmri(line[7:])
 277                         ServerCatalog.cache_fmri(catalog, f, auth)
 278 
 279                 catf.close()
 280 
--- EOF ---