MailFormatHelper::htmlToText

public static MailFormatHelper::htmlToText($string, $allowed_tags = NULL)

Transforms an HTML string into plain text, preserving its structure.

The output will be suitable for use as 'format=flowed; delsp=yes' text (RFC 3676) and can be passed directly to MailManagerInterface::mail() for sending.

We deliberately use LF rather than CRLF, see MailManagerInterface::mail().

This function provides suitable alternatives for the following tags: <a> <em> <i> <strong> <b> <br> <p> <blockquote> <ul> <ol> <li> <dl> <dt> <dd> <h1> <h2> <h3> <h4> <h5> <h6> <hr>

Parameters

string $string: The string to be transformed.

array $allowed_tags: (optional) If supplied, a list of tags that will be transformed. If omitted, all supported tags are transformed.

Return value

string The transformed string.

File

core/lib/Drupal/Core/Mail/MailFormatHelper.php, line 104

Class

MailFormatHelper
Defines a class containing utility methods for formatting mail messages.

Namespace

Drupal\Core\Mail

Code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
public static function htmlToText($string, $allowed_tags = NULL) {
  // Cache list of supported tags.
  if (empty(static::$supportedTags)) {
    static::$supportedTags = array('a', 'em', 'i', 'strong', 'b', 'br', 'p',
      'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'h1', 'h2', 'h3',
      'h4', 'h5', 'h6', 'hr');
  }
 
  // Make sure only supported tags are kept.
  $allowed_tags = isset($allowed_tags) ? array_intersect(static::$supportedTags, $allowed_tags) : static::$supportedTags;
 
  // Make sure tags, entities and attributes are well-formed and properly
  // nested.
  $string = Html::normalize(Xss::filter($string, $allowed_tags));
 
  // Apply inline styles.
  $string = preg_replace('!</?(em|i)((?> +)[^>]*)?>!i', '/', $string);
  $string = preg_replace('!</?(strong|b)((?> +)[^>]*)?>!i', '*', $string);
 
  // Replace inline <a> tags with the text of link and a footnote.
  // 'See <a href="https://www.drupal.org">the Drupal site</a>' becomes
  // 'See the Drupal site [1]' with the URL included as a footnote.
  static::htmlToMailUrls(NULL, TRUE);
  $pattern = '@(<a[^>]+?href="([^"]*)"[^>]*?>(.+?)</a>)@i';
  $string = preg_replace_callback($pattern, 'static::htmlToMailUrls', $string);
  $urls = static::htmlToMailUrls();
  $footnotes = '';
  if (count($urls)) {
    $footnotes .= "\n";
    for ($i = 0, $max = count($urls); $i < $max; $i++) {
      $footnotes .= '[' . ($i + 1) . '] ' . $urls[$i] . "\n";
    }
  }
 
  // Split tags from text.
  $split = preg_split('/<([^>]+?)>/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
  // Note: PHP ensures the array consists of alternating delimiters and
  // literals and begins and ends with a literal (inserting $null as
  // required).
  // Odd/even counter (tag or no tag).
  $tag = FALSE;
  // Case conversion function.
  $casing = NULL;
  $output = '';
  // All current indentation string chunks.
  $indent = array();
  // Array of counters for opened lists.
  $lists = array();
  foreach ($split as $value) {
    // Holds a string ready to be formatted and output.
    $chunk = NULL;
 
    // Process HTML tags (but don't output any literally).
    if ($tag) {
      list($tagname) = explode(' ', strtolower($value), 2);
      switch ($tagname) {
        // List counters.
        case 'ul':
          array_unshift($lists, '*');
          break;
 
        case 'ol':
          array_unshift($lists, 1);
          break;
 
        case '/ul':
        case '/ol':
          array_shift($lists);
          // Ensure blank new-line.
          $chunk = '';
          break;
 
          // Quotation/list markers, non-fancy headers.
        case 'blockquote':
          // Format=flowed indentation cannot be mixed with lists.
          $indent[] = count($lists) ? ' "' : '>';
          break;
 
        case 'li':
          $indent[] = isset($lists[0]) && is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * ';
          break;
 
        case 'dd':
          $indent[] = '    ';
          break;
 
        case 'h3':
          $indent[] = '.... ';
          break;
 
        case 'h4':
          $indent[] = '.. ';
          break;
 
        case '/blockquote':
          if (count($lists)) {
            // Append closing quote for inline quotes (immediately).
            $output = rtrim($output, "> \n") . "\"\n";
            // Ensure blank new-line.
            $chunk = '';
          }
          // Intentional fall-through to the processing for '/li' and '/dd'.
        case '/li':
        case '/dd':
          array_pop($indent);
          break;
 
        case '/h3':
        case '/h4':
          array_pop($indent);
          // Intentional fall-through to the processing for '/h5' and '/h6'.
        case '/h5':
        case '/h6':
          // Ensure blank new-line.
          $chunk = '';
          break;
 
          // Fancy headers.
        case 'h1':
          $indent[] = '======== ';
          $casing = '\Drupal\Component\Utility\Unicode::strtoupper';
          break;
 
        case 'h2':
          $indent[] = '-------- ';
          $casing = '\Drupal\Component\Utility\Unicode::strtoupper';
          break;
 
        case '/h1':
        case '/h2':
          $casing = NULL;
          // Pad the line with dashes.
          $output = static::htmlToTextPad($output, ($tagname == '/h1') ? '=' : '-', ' ');
          array_pop($indent);
          // Ensure blank new-line.
          $chunk = '';
          break;
 
          // Horizontal rulers.
        case 'hr':
          // Insert immediately.
          $output .= static::wrapMail('', implode('', $indent)) . "\n";
          $output = static::htmlToTextPad($output, '-');
          break;
 
          // Paragraphs and definition lists.
        case '/p':
        case '/dl':
          // Ensure blank new-line.
          $chunk = '';
          break;
      }
    }
    // Process blocks of text.
    else {
      // Convert inline HTML text to plain text; not removing line-breaks or
      // white-space, since that breaks newlines when sanitizing plain-text.
      $value = trim(Html::decodeEntities($value));
      if (Unicode::strlen($value)) {
        $chunk = $value;
      }
    }
 
    // See if there is something waiting to be output.
    if (isset($chunk)) {
      // Apply any necessary case conversion.
      if (isset($casing)) {
        $chunk = call_user_func($casing, $chunk);
      }
      $line_endings = Settings::get('mail_line_endings', PHP_EOL);
      // Format it and apply the current indentation.
      $output .= static::wrapMail($chunk, implode('', $indent)) . $line_endings;
      // Remove non-quotation markers from indentation.
      $indent = array_map('\Drupal\Core\Mail\MailFormatHelper::htmlToTextClean', $indent);
    }
 
    $tag = !$tag;
  }
 
  return $output . $footnotes;
}
doc_Drupal
2025-01-10 15:47:30
Comments
Leave a Comment

Please login to continue.