search_index($type, $sid, $langcode, $text)
Updates the full-text search index for a particular item.
Parameters
string $type: The plugin ID or other machine-readable type of this item, which should be less than 64 bytes.
int $sid: An ID number identifying this particular item (e.g., node ID).
string $langcode: Language code for the language of the text being indexed.
string $text: The content of this item. Must be a piece of HTML or plain text.
Related topics
- Search interface
- The Drupal search interface manages a global search mechanism.
File
- core/modules/search/search.module, line 430
- Enables site-wide keyword searching.
Code
function search_index($type, $sid, $langcode, $text) { $minimum_word_size = \Drupal::config('search.settings')->get('index.minimum_word_size'); // Multipliers for scores of words inside certain HTML tags. The weights are // stored in config so that modules can overwrite the default weights. // Note: 'a' must be included for link ranking to work. $tags = \Drupal::config('search.settings')->get('index.tag_weights'); // Strip off all ignored tags to speed up processing, but insert space before // and after them to keep word boundaries. $text = str_replace(array('<', '>'), array(' <', '> '), $text); $text = strip_tags($text, '<' . implode('><', array_keys($tags)) . '>'); // Split HTML tags from plain text. $split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE); // Note: PHP ensures the array consists of alternating delimiters and literals // and begins and ends with a literal (inserting $null as required). $tag = FALSE; // Odd/even counter. Tag or no tag. $score = 1; // Starting score per word $accum = ' '; // Accumulator for cleaned up data $tagstack = array(); // Stack with open tags $tagwords = 0; // Counter for consecutive words $focus = 1; // Focus state $scored_words = array(); // Accumulator for words for index foreach ($split as $value) { if ($tag) { // Increase or decrease score per word based on tag list($tagname) = explode(' ', $value, 2); $tagname = Unicode::strtolower($tagname); // Closing or opening tag? if ($tagname[0] == '/') { $tagname = substr($tagname, 1); // If we encounter unexpected tags, reset score to avoid incorrect boosting. if (!count($tagstack) || $tagstack[0] != $tagname) { $tagstack = array(); $score = 1; } else { // Remove from tag stack and decrement score $score = max(1, $score - $tags[array_shift($tagstack)]); } } else { if (isset($tagstack[0]) && $tagstack[0] == $tagname) { // None of the tags we look for make sense when nested identically. // If they are, it's probably broken HTML. $tagstack = array(); $score = 1; } else { // Add to open tag stack and increment score array_unshift($tagstack, $tagname); $score += $tags[$tagname]; } } // A tag change occurred, reset counter. $tagwords = 0; } else { // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values if ($value != '') { $words = search_index_split($value, $langcode); foreach ($words as $word) { // Add word to accumulator $accum .= $word . ' '; // Check wordlength if (is_numeric($word) || Unicode::strlen($word) >= $minimum_word_size) { if (!isset($scored_words[$word])) { $scored_words[$word] = 0; } $scored_words[$word] += $score * $focus; // Focus is a decaying value in terms of the amount of unique words up to this point. // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words. $focus = min(1, .01 + 3.5 / (2 + count($scored_words) * .015)); } $tagwords++; // Too many words inside a single tag probably mean a tag was accidentally left open. if (count($tagstack) && $tagwords >= 15) { $tagstack = array(); $score = 1; } } } } $tag = !$tag; } // Remove the item $sid from the search index, and invalidate the relevant // cache tags. search_index_clear($type, $sid, $langcode); // Insert cleaned up data into dataset db_insert('search_dataset') ->fields(array( 'sid' => $sid, 'langcode' => $langcode, 'type' => $type, 'data' => $accum, 'reindex' => 0, )) ->execute(); // Insert results into search index foreach ($scored_words as $word => $score) { // If a word already exists in the database, its score gets increased // appropriately. If not, we create a new record with the appropriate // starting score. db_merge('search_index') ->keys(array( 'word' => $word, 'sid' => $sid, 'langcode' => $langcode, 'type' => $type, )) ->fields(array('score' => $score)) ->expression('score', 'score + :score', array(':score' => $score)) ->execute(); search_dirty($word); } }
Please login to continue.