_search_find_match_with_simplify($key, $text, $boundary, $langcode = NULL)
Finds an appropriate keyword in text.
Parameters
string $key: The keyword to find.
string $text: The text to search for the keyword.
string $boundary: Regular expression for the boundary character class (characters that indicate spaces between words).
string|null $langcode: Language code for the language of $text, if known.
Return value
string|null A segment of $text that is between word boundary characters that either matches $key directly, or matches $key when both this text segment and $key are processed by search_simplify(). If a matching text segment is not located, NULL is returned.
File
- core/modules/search/search.module, line 812
- Enables site-wide keyword searching.
Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | function _search_find_match_with_simplify( $key , $text , $boundary , $langcode = NULL) { $preceded_by_boundary = '(?<=' . $boundary . ')' ; $followed_by_boundary = '(?=' . $boundary . ')' ; // See if $key appears as-is. When testing, make sure $text starts/ends with // a space, because we require $key to be surrounded by word boundary // characters. $temp = trim( $key ); if ( $temp == '' ) { return NULL; } if (preg_match( '/' . $preceded_by_boundary . preg_quote( $temp , '/' ) . $followed_by_boundary . '/iu' , ' ' . $text . ' ' )) { return $temp ; } // See if there is a match after lower-casing and removing diacritics in // both, which should preserve the string length. $new_text = Unicode:: strtolower ( $text ); $new_text = \Drupal::service( 'transliteration' )->removeDiacritics( $new_text ); $new_key = Unicode:: strtolower ( $temp ); $new_key = \Drupal::service( 'transliteration' )->removeDiacritics( $new_key ); if (preg_match( '/' . $preceded_by_boundary . preg_quote( $new_key , '/' ) . $followed_by_boundary . '/u' , ' ' . $new_text . ' ' )) { $position = Unicode:: strpos ( $new_text , $new_key ); return Unicode:: substr ( $text , $position , Unicode:: strlen ( $new_key )); } // Run both text and key through search_simplify. $simplified_key = trim(search_simplify( $key , $langcode )); $simplified_text = trim(search_simplify( $text , $langcode )); if ( $simplified_key == '' || $simplified_text == '' || strpos ( $simplified_text , $simplified_key ) === FALSE) { // The simplified keyword and text do not match at all, or are empty. return NULL; } // Split $text into words, keeping track of where the word boundaries are. $words = preg_split( '/' . $boundary . '+/u' , $text , NULL, PREG_SPLIT_OFFSET_CAPTURE); // Add an entry pointing to the end of the string, for the loop below. $words [] = array ( '' , strlen ( $text )); // Using a binary search, find the earliest possible ending position in // $text where it will still match the keyword after applying // search_simplify(). $start_index = 0; $start_pos = $words [ $start_index ][1]; $min_end_index = 1; $max_end_index = count ( $words ) - 1; while ( $max_end_index > $min_end_index ) { // Check the index half way between min and max. See if we ended there, // if we would still have a match. $proposed_end_index = floor (( $max_end_index + $min_end_index ) / 2); $proposed_end_pos = $words [ $proposed_end_index ][1]; // Since the split was done with preg_split(), the positions are byte counts // not character counts, so use substr() not Unicode::substr() here. $trial_text = trim(search_simplify( substr ( $text , $start_pos , $proposed_end_pos - $start_pos ), $langcode )); if ( strpos ( $trial_text , $simplified_key ) !== FALSE) { // The proposed endpoint is fine, text still matches. $max_end_index = $proposed_end_index ; } else { // The proposed endpoint index is too early, so the earliest possible // OK ending point would be the next index. $min_end_index = $proposed_end_index + 1; } } // Now do the same for the starting position: using a binary search, find the // latest possible starting position in $text where it will still match the // keyword after applying search_simplify(). $end_index = $min_end_index ; $end_pos = $words [ $end_index ][1]; $min_start_index = 0; $max_start_index = $end_index - 1; while ( $max_start_index > $min_start_index ) { // Check the index half way between min and max. See if we started there, // if we would still have a match. $proposed_start_index = ceil (( $max_start_index + $min_start_index ) / 2); $proposed_start_pos = $words [ $proposed_start_index ][1]; // Since the split was done with preg_split(), the positions are byte counts // not character counts, so use substr() not Unicode::substr() here. $trial_text = trim(search_simplify( substr ( $text , $proposed_start_pos , $end_pos - $proposed_start_pos ), $langcode )); if ( strpos ( $trial_text , $simplified_key ) !== FALSE) { // The proposed start point is fine, text still matches. $min_start_index = $proposed_start_index ; } else { // The proposed start point index is too late, so the latest possible // OK starting point would be the previous index. $max_start_index = $proposed_start_index - 1; } } $start_index = $max_start_index ; // Return the matching text. We need to use substr() here and not the // Unicode::substr() function, because the indices in $words came from // preg_split(), so they are Unicode-safe byte positions, not character // positions. return trim( substr ( $text , $words [ $start_index ][1], $words [ $end_index ][1] - $words [ $start_index ][1])); } |
Please login to continue.