/* * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using TermDocs = Lucene.Net.Index.TermDocs; namespace Lucene.Net.Search { /// Expert: A Scorer for documents matching a Term. sealed class TermScorer : Scorer { private Weight weight; private TermDocs termDocs; private byte[] norms; private float weightValue; private int doc; private int[] docs = new int[32]; // buffered doc numbers private int[] freqs = new int[32]; // buffered term freqs private int pointer; private int pointerMax; private const int SCORE_CACHE_SIZE = 32; private float[] scoreCache = new float[SCORE_CACHE_SIZE]; /// Construct a TermScorer. /// The weight of the Term in the query. /// /// An iterator over the documents matching the Term. /// /// The Similarity implementation to be used for score computations. /// /// The field norms of the document fields for the Term. /// internal TermScorer(Weight weight, TermDocs td, Similarity similarity, byte[] norms) : base(similarity) { this.weight = weight; this.termDocs = td; this.norms = norms; this.weightValue = weight.GetValue(); for (int i = 0; i < SCORE_CACHE_SIZE; i++) scoreCache[i] = GetSimilarity().Tf(i) * weightValue; } public override void Score(HitCollector hc) { Next(); Score(hc, System.Int32.MaxValue); } protected internal override bool Score(HitCollector c, int end) { Similarity similarity = GetSimilarity(); // cache sim in local float[] normDecoder = Similarity.GetNormDecoder(); while (doc < end) { // for docs in window int f = freqs[pointer]; float score = f < SCORE_CACHE_SIZE?scoreCache[f]:similarity.Tf(f) * weightValue; // cache miss score *= normDecoder[norms[doc] & 0xFF]; // normalize for field c.Collect(doc, score); // collect score if (++pointer >= pointerMax) { pointerMax = termDocs.Read(docs, freqs); // refill buffers if (pointerMax != 0) { pointer = 0; } else { termDocs.Close(); // close stream doc = System.Int32.MaxValue; // set to sentinel value return false; } } doc = docs[pointer]; } return true; } /// Returns the current document number matching the query. /// Initially invalid, until {@link #next()} is called the first time. /// public override int Doc() { return doc; } /// Advances to the next document matching the query. ///
The iterator over the matching documents is buffered using /// {@link TermDocs#Read(int[],int[])}. ///
/// true iff there is another document matching the query. /// public override bool Next() { pointer++; if (pointer >= pointerMax) { pointerMax = termDocs.Read(docs, freqs); // refill buffer if (pointerMax != 0) { pointer = 0; } else { termDocs.Close(); // close stream doc = System.Int32.MaxValue; // set to sentinel value return false; } } doc = docs[pointer]; return true; } public override float Score() { int f = freqs[pointer]; float raw = f < SCORE_CACHE_SIZE ? scoreCache[f] : GetSimilarity().Tf(f) * weightValue; // cache miss return raw * Similarity.DecodeNorm(norms[doc ]); // normalize for Field } /// Skips to the first match beyond the current whose document number is /// greater than or equal to a given target. ///
The implementation uses {@link TermDocs#SkipTo(int)}. ///
/// The target document number. /// /// true iff there is such a match. /// public override bool SkipTo(int target) { // first scan in cache for (pointer++; pointer < pointerMax; pointer++) { if (docs[pointer] >= target) { doc = docs[pointer]; return true; } } // not found in cache, seek underlying stream bool result = termDocs.SkipTo(target); if (result) { pointerMax = 1; pointer = 0; docs[pointer] = doc = termDocs.Doc(); freqs[pointer] = termDocs.Freq(); } else { doc = System.Int32.MaxValue; } return result; } /// Returns an explanation of the score for a document. ///
When this method is used, the {@link #next()} method /// and the {@link #Score(HitCollector)} method should not be used. ///
/// The document number for the explanation. /// /// Modify to make use of {@link TermDocs#SkipTo(int)}. public override Explanation Explain(int doc) { TermQuery query = (TermQuery) weight.GetQuery(); Explanation tfExplanation = new Explanation(); int tf = 0; while (pointer < pointerMax) { if (docs[pointer] == doc) tf = freqs[pointer]; pointer++; } if (tf == 0) { while (termDocs.Next()) { if (termDocs.Doc() == doc) { tf = termDocs.Freq(); } } } termDocs.Close(); tfExplanation.SetValue(GetSimilarity().Tf(tf)); tfExplanation.SetDescription("tf(termFreq(" + query.GetTerm() + ")=" + tf + ")"); return tfExplanation; } public override System.String ToString() { return "scorer(" + weight + ")"; } } }