_filter_url($text, $filter)
Converts text into hyperlinks automatically.
This filter identifies and makes clickable three types of "links".
- URLs like http://example.com.
- Email addresses like name@example.com.
- Web addresses without the "http://" protocol defined, like www.example.com.
Each type must be processed separately, as there is no one regular expression that could possibly match all of the cases in one pass.
Related topics
- Standard filters
- Filters implemented by the Filter module.
File
- core/modules/filter/filter.module, line 461
- Framework for handling the filtering of content.
Code
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | function _filter_url( $text , $filter ) { // Tags to skip and not recurse into. $ignore_tags = 'a|script|style|code|pre' ; // Pass length to regexp callback. _filter_url_trim(NULL, $filter ->settings[ 'filter_url_length' ]); // Create an array which contains the regexps for each type of link. // The key to the regexp is the name of a function that is used as // callback function to process matches of the regexp. The callback function // is to return the replacement for the match. The array is used and // matching/replacement done below inside some loops. $tasks = array (); // Prepare protocols pattern for absolute URLs. // \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols() will replace // any bad protocols with HTTP, so we need to support the identical list. // While '//' is technically optional for MAILTO only, we cannot cleanly // differ between protocols here without hard-coding MAILTO, so '//' is // optional for all protocols. // @see \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols() $protocols = \Drupal::getContainer()->getParameter( 'filter_protocols' ); $protocols = implode( ':(?://)?|' , $protocols ) . ':(?://)?' ; $valid_url_path_characters = "[\p{L}\p{M}\p{N}!\*\';:=\+,\.\$\/%#\[\]\-_~@&]" ; // Allow URL paths to contain balanced parens // 1. Used in Wikipedia URLs like /Primer_(film) // 2. Used in IIS sessions like /S(dfd346)/ $valid_url_balanced_parens = '\(' . $valid_url_path_characters . '+\)' ; // Valid end-of-path characters (so /foo. does not gobble the period). // 1. Allow =&# for empty URL parameters and other URL-join artifacts $valid_url_ending_characters = '[\p{L}\p{M}\p{N}:_+~#=/]|(?:' . $valid_url_balanced_parens . ')' ; $valid_url_query_chars = '[a-zA-Z0-9!?\*\'@\(\);:&=\+\$\/%#\[\]\-_\.,~|]' ; $valid_url_query_ending_chars = '[a-zA-Z0-9_&=#\/]' ; //full path //and allow @ in a url, but only in the middle. Catch things like http://example.com/@user/ $valid_url_path = '(?:(?:' . $valid_url_path_characters . '*(?:' . $valid_url_balanced_parens . $valid_url_path_characters . '*)*' . $valid_url_ending_characters . ')|(?:@' . $valid_url_path_characters . '+\/))' ; // Prepare domain name pattern. // The ICANN seems to be on track towards accepting more diverse top level // domains, so this pattern has been "future-proofed" to allow for TLDs // of length 2-64. $domain = '(?:[\p{L}\p{M}\p{N}._+-]+\.)?[\p{L}\p{M}]{2,64}\b' ; $ip = '(?:[0-9]{1,3}\.){3}[0-9]{1,3}' ; $auth = '[\p{L}\p{M}\p{N}:%_+*~#?&=.,/;-]+@' ; $trail = '(' . $valid_url_path . '*)?(\\?' . $valid_url_query_chars . '*' . $valid_url_query_ending_chars . ')?' ; // Match absolute URLs. $url_pattern = "(?:$auth)?(?:$domain|$ip)/?(?:$trail)?" ; $pattern = "`((?:$protocols)(?:$url_pattern))`u" ; $tasks [ '_filter_url_parse_full_links' ] = $pattern ; // Match email addresses. $url_pattern = "[\p{L}\p{M}\p{N}._+-]{1,254}@(?:$domain)" ; $pattern = "`($url_pattern)`u" ; $tasks [ '_filter_url_parse_email_links' ] = $pattern ; // Match www domains. $url_pattern = "www\.(?:$domain)/?(?:$trail)?" ; $pattern = "`($url_pattern)`u" ; $tasks [ '_filter_url_parse_partial_links' ] = $pattern ; // Each type of URL needs to be processed separately. The text is joined and // re-split after each task, since all injected HTML tags must be correctly // protected before the next task. foreach ( $tasks as $task => $pattern ) { // HTML comments need to be handled separately, as they may contain HTML // markup, especially a '>'. Therefore, remove all comment contents and add // them back later. _filter_url_escape_comments( '' , TRUE); $text = preg_replace_callback( '`<!--(.*?)-->`s' , '_filter_url_escape_comments' , $text ); // Split at all tags; ensures that no tags or attributes are processed. $chunks = preg_split( '/(<.+?>)/is' , $text , -1, PREG_SPLIT_DELIM_CAPTURE); // PHP ensures that the array consists of alternating delimiters and // literals, and begins and ends with a literal (inserting NULL as // required). Therefore, the first chunk is always text: $chunk_type = 'text' ; // If a tag of $ignore_tags is found, it is stored in $open_tag and only // removed when the closing tag is found. Until the closing tag is found, // no replacements are made. $open_tag = '' ; for ( $i = 0; $i < count ( $chunks ); $i ++) { if ( $chunk_type == 'text' ) { // Only process this text if there are no unclosed $ignore_tags. if ( $open_tag == '' ) { // If there is a match, inject a link into this chunk via the callback // function contained in $task. $chunks [ $i ] = preg_replace_callback( $pattern , $task , $chunks [ $i ]); } // Text chunk is done, so next chunk must be a tag. $chunk_type = 'tag' ; } else { // Only process this tag if there are no unclosed $ignore_tags. if ( $open_tag == '' ) { // Check whether this tag is contained in $ignore_tags. if (preg_match( "`<($ignore_tags)(?:\s|>)`i" , $chunks [ $i ], $matches )) { $open_tag = $matches [1]; } } // Otherwise, check whether this is the closing tag for $open_tag. else { if (preg_match( "`<\/$open_tag>`i" , $chunks [ $i ], $matches )) { $open_tag = '' ; } } // Tag chunk is done, so next chunk must be text. $chunk_type = 'text' ; } } $text = implode( $chunks ); // Revert to the original comment contents _filter_url_escape_comments( '' , FALSE); $text = preg_replace_callback( '`<!--(.*?)-->`' , '_filter_url_escape_comments' , $text ); } return $text ; } |
Please login to continue.