search_excerpt($keys, $text, $langcode = NULL)
Returns snippets from a piece of text, with search keywords highlighted.
Used for formatting search results. All HTML tags will be stripped from $text.
Parameters
string $keys: A string containing a search query.
string $text: The text to extract fragments from.
string|null $langcode: Language code for the language of $text, if known.
Return value
array A render array containing HTML for the excerpt.
Related topics
- Search interface
- The Drupal search interface manages a global search mechanism.
File
- core/modules/search/search.module, line 642
- Enables site-wide keyword searching.
Code
function search_excerpt($keys, $text, $langcode = NULL) { // We highlight around non-indexable or CJK characters. $boundary_character = '[' . Unicode::PREG_CLASS_WORD_BOUNDARY . PREG_CLASS_CJK . ']'; $preceded_by_boundary = '(?<=' . $boundary_character . ')'; $followed_by_boundary = '(?=' . $boundary_character . ')'; // Extract positive keywords and phrases. preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' ' . $keys, $matches); $keys = array_merge($matches[2], $matches[3]); // Prepare text by stripping HTML tags and decoding HTML entities. $text = strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text)); $text = Html::decodeEntities($text); $text_length = strlen($text); // Make a list of unique keywords that are actually found in the text, // which could be items in $keys or replacements that are equivalent through // search_simplify(). $temp_keys = array(); foreach ($keys as $key) { $key = _search_find_match_with_simplify($key, $text, $boundary_character, $langcode); if (isset($key)) { // Quote slashes so they can be used in regular expressions. $temp_keys[] = preg_quote($key, '/'); } } // Several keywords could have simplified down to the same thing, so pick // out the unique ones. $keys = array_unique($temp_keys); // Extract fragments of about 60 characters around keywords, bounded by word // boundary characters. Try to reach 256 characters, using second occurrences // if necessary. $ranges = array(); $length = 0; $look_start = array(); $remaining_keys = $keys; while ($length < 256 && !empty($remaining_keys)) { $found_keys = array(); foreach ($remaining_keys as $key) { if ($length >= 256) { break; } // Remember where we last found $key, in case we are coming through a // second time. if (!isset($look_start[$key])) { $look_start[$key] = 0; } // See if we can find $key after where we found it the last time. Since // we are requiring a match on a word boundary, make sure $text starts // and ends with a space. $matches = array(); if (preg_match('/' . $preceded_by_boundary . $key . $followed_by_boundary . '/iu', ' ' . $text . ' ', $matches, PREG_OFFSET_CAPTURE, $look_start[$key])) { $found_position = $matches[0][1]; $look_start[$key] = $found_position + 1; // Keep track of which keys we found this time, in case we need to // pass through again to find more text. $found_keys[] = $key; // Locate a space before and after this match, leaving about 60 // characters of context on each end. $before = strpos(' ' . $text, ' ', max(0, $found_position - 61)); if ($before !== FALSE && $before <= $found_position) { if ($text_length > $found_position + 60) { $after = strrpos(substr($text, 0, $found_position + 60), ' ', $found_position); } else { $after = $text_length; } if ($after !== FALSE && $after > $found_position) { // Account for the spaces we added. $before = max($before - 1, 0); if ($before < $after) { // Save this range. $ranges[$before] = $after; $length += $after - $before; } } } } } // Next time through this loop, only look for keys we found this time, // if any. $remaining_keys = $found_keys; } if (empty($ranges)) { // We didn't find any keyword matches, so just return the first part of the // text. We also need to re-encode any HTML special characters that we // entity-decoded above. return [ '#plain_text' => Unicode::truncate($text, 256, TRUE, TRUE), ]; } // Sort the text ranges by starting position. ksort($ranges); // Collapse overlapping text ranges into one. The sorting makes it O(n). $new_ranges = array(); $max_end = 0; foreach ($ranges as $this_from => $this_to) { $max_end = max($max_end, $this_to); if (!isset($working_from)) { // This is the first time through this loop: initialize. $working_from = $this_from; $working_to = $this_to; continue; } if ($this_from <= $working_to) { // The ranges overlap: combine them. $working_to = max($working_to, $this_to); } else { // The ranges do not overlap: save the working range and start a new one. $new_ranges[$working_from] = $working_to; $working_from = $this_from; $working_to = $this_to; } } // Save the remaining working range. $new_ranges[$working_from] = $working_to; // Fetch text within the combined ranges we found. $out = array(); foreach ($new_ranges as $from => $to) { $out[] = substr($text, $from, $to - $from); } // Combine the text chunks with "…" separators. The "…" needs to be // translated. Let translators have the … separator text as one chunk. $ellipses = explode('@excerpt', t('… @excerpt … @excerpt …')); $text = (isset($new_ranges[0]) ? '' : $ellipses[0]) . implode($ellipses[1], $out) . (($max_end < strlen($text) - 1) ? $ellipses[2] : ''); $text = Html::escape($text); // Highlight keywords. Must be done at once to prevent conflicts ('strong' // and '<strong>'). $text = trim(preg_replace('/' . $preceded_by_boundary . '(?:' . implode('|', $keys) . ')' . $followed_by_boundary . '/iu', '<strong>\0</strong>', ' ' . $text . ' ')); return [ '#markup' => $text, '#allowed_tags' => ['strong'] ]; }
Please login to continue.