search_index($type, $sid, $langcode, $text)
Updates the full-text search index for a particular item.
Parameters
string $type: The plugin ID or other machine-readable type of this item, which should be less than 64 bytes.
int $sid: An ID number identifying this particular item (e.g., node ID).
string $langcode: Language code for the language of the text being indexed.
string $text: The content of this item. Must be a piece of HTML or plain text.
Related topics
- Search interface
- The Drupal search interface manages a global search mechanism.
File
- core/modules/search/search.module, line 430
- Enables site-wide keyword searching.
Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | function search_index( $type , $sid , $langcode , $text ) { $minimum_word_size = \Drupal::config( 'search.settings' )->get( 'index.minimum_word_size' ); // Multipliers for scores of words inside certain HTML tags. The weights are // stored in config so that modules can overwrite the default weights. // Note: 'a' must be included for link ranking to work. $tags = \Drupal::config( 'search.settings' )->get( 'index.tag_weights' ); // Strip off all ignored tags to speed up processing, but insert space before // and after them to keep word boundaries. $text = str_replace ( array ( '<' , '>' ), array ( ' <' , '> ' ), $text ); $text = strip_tags ( $text , '<' . implode( '><' , array_keys ( $tags )) . '>' ); // Split HTML tags from plain text. $split = preg_split( '/\s*<([^>]+?)>\s*/' , $text , -1, PREG_SPLIT_DELIM_CAPTURE); // Note: PHP ensures the array consists of alternating delimiters and literals // and begins and ends with a literal (inserting $null as required). $tag = FALSE; // Odd/even counter. Tag or no tag. $score = 1; // Starting score per word $accum = ' ' ; // Accumulator for cleaned up data $tagstack = array (); // Stack with open tags $tagwords = 0; // Counter for consecutive words $focus = 1; // Focus state $scored_words = array (); // Accumulator for words for index foreach ( $split as $value ) { if ( $tag ) { // Increase or decrease score per word based on tag list( $tagname ) = explode ( ' ' , $value , 2); $tagname = Unicode:: strtolower ( $tagname ); // Closing or opening tag? if ( $tagname [0] == '/' ) { $tagname = substr ( $tagname , 1); // If we encounter unexpected tags, reset score to avoid incorrect boosting. if (! count ( $tagstack ) || $tagstack [0] != $tagname ) { $tagstack = array (); $score = 1; } else { // Remove from tag stack and decrement score $score = max(1, $score - $tags [ array_shift ( $tagstack )]); } } else { if (isset( $tagstack [0]) && $tagstack [0] == $tagname ) { // None of the tags we look for make sense when nested identically. // If they are, it's probably broken HTML. $tagstack = array (); $score = 1; } else { // Add to open tag stack and increment score array_unshift ( $tagstack , $tagname ); $score += $tags [ $tagname ]; } } // A tag change occurred, reset counter. $tagwords = 0; } else { // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values if ( $value != '' ) { $words = search_index_split( $value , $langcode ); foreach ( $words as $word ) { // Add word to accumulator $accum .= $word . ' ' ; // Check wordlength if ( is_numeric ( $word ) || Unicode:: strlen ( $word ) >= $minimum_word_size ) { if (!isset( $scored_words [ $word ])) { $scored_words [ $word ] = 0; } $scored_words [ $word ] += $score * $focus ; // Focus is a decaying value in terms of the amount of unique words up to this point. // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words. $focus = min(1, .01 + 3.5 / (2 + count ( $scored_words ) * .015)); } $tagwords ++; // Too many words inside a single tag probably mean a tag was accidentally left open. if ( count ( $tagstack ) && $tagwords >= 15) { $tagstack = array (); $score = 1; } } } } $tag = ! $tag ; } // Remove the item $sid from the search index, and invalidate the relevant // cache tags. search_index_clear( $type , $sid , $langcode ); // Insert cleaned up data into dataset db_insert( 'search_dataset' ) ->fields( array ( 'sid' => $sid , 'langcode' => $langcode , 'type' => $type , 'data' => $accum , 'reindex' => 0, )) ->execute(); // Insert results into search index foreach ( $scored_words as $word => $score ) { // If a word already exists in the database, its score gets increased // appropriately. If not, we create a new record with the appropriate // starting score. db_merge( 'search_index' ) ->keys( array ( 'word' => $word , 'sid' => $sid , 'langcode' => $langcode , 'type' => $type , )) ->fields( array ( 'score' => $score )) ->expression( 'score' , 'score + :score' , array ( ':score' => $score )) ->execute(); search_dirty( $word ); } } |
Please login to continue.