search_excerpt($keys, $text, $langcode = NULL)
Returns snippets from a piece of text, with search keywords highlighted.
Used for formatting search results. All HTML tags will be stripped from $text.
Parameters
string $keys: A string containing a search query.
string $text: The text to extract fragments from.
string|null $langcode: Language code for the language of $text, if known.
Return value
array A render array containing HTML for the excerpt.
Related topics
- Search interface
- The Drupal search interface manages a global search mechanism.
File
- core/modules/search/search.module, line 642
- Enables site-wide keyword searching.
Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | function search_excerpt( $keys , $text , $langcode = NULL) { // We highlight around non-indexable or CJK characters. $boundary_character = '[' . Unicode::PREG_CLASS_WORD_BOUNDARY . PREG_CLASS_CJK . ']' ; $preceded_by_boundary = '(?<=' . $boundary_character . ')' ; $followed_by_boundary = '(?=' . $boundary_character . ')' ; // Extract positive keywords and phrases. preg_match_all( '/ ("([^"]+)"|(?!OR)([^" ]+))/' , ' ' . $keys , $matches ); $keys = array_merge ( $matches [2], $matches [3]); // Prepare text by stripping HTML tags and decoding HTML entities. $text = strip_tags ( str_replace ( array ( '<' , '>' ), array ( ' <' , '> ' ), $text )); $text = Html::decodeEntities( $text ); $text_length = strlen ( $text ); // Make a list of unique keywords that are actually found in the text, // which could be items in $keys or replacements that are equivalent through // search_simplify(). $temp_keys = array (); foreach ( $keys as $key ) { $key = _search_find_match_with_simplify( $key , $text , $boundary_character , $langcode ); if (isset( $key )) { // Quote slashes so they can be used in regular expressions. $temp_keys [] = preg_quote( $key , '/' ); } } // Several keywords could have simplified down to the same thing, so pick // out the unique ones. $keys = array_unique ( $temp_keys ); // Extract fragments of about 60 characters around keywords, bounded by word // boundary characters. Try to reach 256 characters, using second occurrences // if necessary. $ranges = array (); $length = 0; $look_start = array (); $remaining_keys = $keys ; while ( $length < 256 && ! empty ( $remaining_keys )) { $found_keys = array (); foreach ( $remaining_keys as $key ) { if ( $length >= 256) { break ; } // Remember where we last found $key, in case we are coming through a // second time. if (!isset( $look_start [ $key ])) { $look_start [ $key ] = 0; } // See if we can find $key after where we found it the last time. Since // we are requiring a match on a word boundary, make sure $text starts // and ends with a space. $matches = array (); if (preg_match( '/' . $preceded_by_boundary . $key . $followed_by_boundary . '/iu' , ' ' . $text . ' ' , $matches , PREG_OFFSET_CAPTURE, $look_start [ $key ])) { $found_position = $matches [0][1]; $look_start [ $key ] = $found_position + 1; // Keep track of which keys we found this time, in case we need to // pass through again to find more text. $found_keys [] = $key ; // Locate a space before and after this match, leaving about 60 // characters of context on each end. $before = strpos ( ' ' . $text , ' ' , max(0, $found_position - 61)); if ( $before !== FALSE && $before <= $found_position ) { if ( $text_length > $found_position + 60) { $after = strrpos ( substr ( $text , 0, $found_position + 60), ' ' , $found_position ); } else { $after = $text_length ; } if ( $after !== FALSE && $after > $found_position ) { // Account for the spaces we added. $before = max( $before - 1, 0); if ( $before < $after ) { // Save this range. $ranges [ $before ] = $after ; $length += $after - $before ; } } } } } // Next time through this loop, only look for keys we found this time, // if any. $remaining_keys = $found_keys ; } if ( empty ( $ranges )) { // We didn't find any keyword matches, so just return the first part of the // text. We also need to re-encode any HTML special characters that we // entity-decoded above. return [ '#plain_text' => Unicode::truncate( $text , 256, TRUE, TRUE), ]; } // Sort the text ranges by starting position. ksort( $ranges ); // Collapse overlapping text ranges into one. The sorting makes it O(n). $new_ranges = array (); $max_end = 0; foreach ( $ranges as $this_from => $this_to ) { $max_end = max( $max_end , $this_to ); if (!isset( $working_from )) { // This is the first time through this loop: initialize. $working_from = $this_from ; $working_to = $this_to ; continue ; } if ( $this_from <= $working_to ) { // The ranges overlap: combine them. $working_to = max( $working_to , $this_to ); } else { // The ranges do not overlap: save the working range and start a new one. $new_ranges [ $working_from ] = $working_to ; $working_from = $this_from ; $working_to = $this_to ; } } // Save the remaining working range. $new_ranges [ $working_from ] = $working_to ; // Fetch text within the combined ranges we found. $out = array (); foreach ( $new_ranges as $from => $to ) { $out [] = substr ( $text , $from , $to - $from ); } // Combine the text chunks with "…" separators. The "…" needs to be // translated. Let translators have the … separator text as one chunk. $ellipses = explode ( '@excerpt' , t( '… @excerpt … @excerpt …' )); $text = (isset( $new_ranges [0]) ? '' : $ellipses [0]) . implode( $ellipses [1], $out ) . (( $max_end < strlen ( $text ) - 1) ? $ellipses [2] : '' ); $text = Html::escape( $text ); // Highlight keywords. Must be done at once to prevent conflicts ('strong' // and '<strong>'). $text = trim(preg_replace( '/' . $preceded_by_boundary . '(?:' . implode( '|' , $keys ) . ')' . $followed_by_boundary . '/iu' , '<strong>\0</strong>' , ' ' . $text . ' ' )); return [ '#markup' => $text , '#allowed_tags' => [ 'strong' ] ]; } |
Please login to continue.