3 * The Mail_mimeDecode class is used to decode mail/mime messages
5 * This class will parse a raw mime email and return
6 * the structure. Returned structure is similar to
7 * that returned by imap_fetchstructure().
9 * +----------------------------- IMPORTANT ------------------------------+
10 * | Usage of this class compared to native php extensions such as |
11 * | mailparse or imap, is slow and may be feature deficient. If available|
12 * | you are STRONGLY recommended to use the php extensions. |
13 * +----------------------------------------------------------------------+
15 * Compatible with PHP versions 4 and 5
17 * LICENSE: This LICENSE is in the BSD license style.
18 * Copyright (c) 2002-2003, Richard Heyes <richard@phpguru.org>
19 * Copyright (c) 2003-2006, PEAR <pear-group@php.net>
20 * All rights reserved.
22 * Redistribution and use in source and binary forms, with or
23 * without modification, are permitted provided that the following
26 * - Redistributions of source code must retain the above copyright
27 * notice, this list of conditions and the following disclaimer.
28 * - Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * - Neither the name of the authors, nor the names of its contributors
32 * may be used to endorse or promote products derived from this
33 * software without specific prior written permission.
35 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
36 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
39 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
40 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
41 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
42 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
43 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
44 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
45 * THE POSSIBILITY OF SUCH DAMAGE.
49 * @author Richard Heyes <richard@phpguru.org>
50 * @author George Schlossnagle <george@omniti.com>
51 * @author Cipriano Groenendal <cipri@php.net>
52 * @author Sean Coates <sean@php.net>
53 * @copyright 2003-2006 PEAR <pear-group@php.net>
54 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
55 * @version CVS: $Id: mimeDecode.php,v 1.48 2006/12/03 13:43:33 cipri Exp $
56 * @link http://pear.php.net/package/Mail_mime
63 * This package depends on PEAR to raise errors.
65 require_once 'PEAR.php';
69 * The Mail_mimeDecode class is used to decode mail/mime messages
71 * This class will parse a raw mime email and return the structure.
72 * Returned structure is similar to that returned by imap_fetchstructure().
74 * +----------------------------- IMPORTANT ------------------------------+
75 * | Usage of this class compared to native php extensions such as |
76 * | mailparse or imap, is slow and may be feature deficient. If available|
77 * | you are STRONGLY recommended to use the php extensions. |
78 * +----------------------------------------------------------------------+
82 * @author Richard Heyes <richard@phpguru.org>
83 * @author George Schlossnagle <george@omniti.com>
84 * @author Cipriano Groenendal <cipri@php.net>
85 * @author Sean Coates <sean@php.net>
86 * @copyright 2003-2006 PEAR <pear-group@php.net>
87 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
88 * @version Release: @package_version@
89 * @link http://pear.php.net/package/Mail_mime
91 class Mail_mimeDecode extends PEAR
94 * The raw email to decode
102 * The header part of the input
110 * The body part of the input
118 * If an error occurs, this is used to store the message
126 * Flag to determine whether to include bodies in the
132 var $_include_bodies;
135 * Flag to determine whether to decode bodies
143 * Flag to determine whether to decode headers
148 var $_decode_headers;
153 * Sets up the object, initialise the variables, and splits and
154 * stores the header and body of the input.
156 * @param string The input to decode
159 function Mail_mimeDecode($input)
161 list($header, $body) = $this->_splitBodyHeader($input);
163 $this->_input = $input;
164 $this->_header = $header;
165 $this->_body = $body;
166 $this->_decode_bodies = false;
167 $this->_include_bodies = true;
171 * Begins the decoding process. If called statically
172 * it will create an object and call the decode() method
175 * @param array An array of various parameters that determine
177 * include_bodies - Whether to include the body in the returned
179 * decode_bodies - Whether to decode the bodies
180 * of the parts. (Transfer encoding)
181 * decode_headers - Whether to decode headers
182 * input - If called statically, this will be treated
184 * @return object Decoded results
187 function decode($params = null)
189 // determine if this method has been called statically
190 $isStatic = !(isset($this) && get_class($this) == __CLASS__);
192 // Have we been called statically?
193 // If so, create an object and pass details to that.
194 if ($isStatic AND isset($params['input'])) {
196 $obj = new Mail_mimeDecode($params['input']);
197 $structure = $obj->decode($params);
199 // Called statically but no input
200 } elseif ($isStatic) {
201 return PEAR::raiseError('Called statically and no input given');
203 // Called via an object
205 $this->_include_bodies = isset($params['include_bodies']) ?
206 $params['include_bodies'] : false;
207 $this->_decode_bodies = isset($params['decode_bodies']) ?
208 $params['decode_bodies'] : false;
209 $this->_decode_headers = isset($params['decode_headers']) ?
210 $params['decode_headers'] : false;
212 $structure = $this->_decode($this->_header, $this->_body);
213 if ($structure === false) {
214 $structure = $this->raiseError($this->_error);
222 * Performs the decoding. Decodes the body string passed to it
223 * If it finds certain content-types it will call itself in a
226 * @param string Header section
227 * @param string Body section
228 * @return object Results of decoding process
231 function _decode($headers, $body, $default_ctype = 'text/plain')
233 $return = new stdClass;
234 $return->headers = array();
235 $headers = $this->_parseHeaders($headers);
237 foreach ($headers as $value) {
238 if (isset($return->headers[strtolower($value['name'])]) AND !is_array($return->headers[strtolower($value['name'])])) {
239 $return->headers[strtolower($value['name'])] = array($return->headers[strtolower($value['name'])]);
240 $return->headers[strtolower($value['name'])][] = $value['value'];
242 } elseif (isset($return->headers[strtolower($value['name'])])) {
243 $return->headers[strtolower($value['name'])][] = $value['value'];
246 $return->headers[strtolower($value['name'])] = $value['value'];
251 while (list($key, $value) = each($headers)) {
252 $headers[$key]['name'] = strtolower($headers[$key]['name']);
253 switch ($headers[$key]['name']) {
256 $content_type = $this->_parseHeaderValue($headers[$key]['value']);
258 if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) {
259 $return->ctype_primary = $regs[1];
260 $return->ctype_secondary = $regs[2];
263 if (isset($content_type['other'])) {
264 while (list($p_name, $p_value) = each($content_type['other'])) {
265 $return->ctype_parameters[$p_name] = $p_value;
270 case 'content-disposition':
271 $content_disposition = $this->_parseHeaderValue($headers[$key]['value']);
272 $return->disposition = $content_disposition['value'];
273 if (isset($content_disposition['other'])) {
274 while (list($p_name, $p_value) = each($content_disposition['other'])) {
275 $return->d_parameters[$p_name] = $p_value;
280 case 'content-transfer-encoding':
281 $content_transfer_encoding = $this->_parseHeaderValue($headers[$key]['value']);
286 if (isset($content_type)) {
287 switch (strtolower($content_type['value'])) {
289 $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
290 $this->_include_bodies ? $return->body = ($this->_decode_bodies ? $this->_decodeBody($body, $encoding) : $body) : null;
294 $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
295 $this->_include_bodies ? $return->body = ($this->_decode_bodies ? $this->_decodeBody($body, $encoding) : $body) : null;
298 case 'multipart/parallel':
299 case 'multipart/appledouble': // Appledouble mail
300 case 'multipart/report': // RFC1892
301 case 'multipart/signed': // PGP
302 case 'multipart/digest':
303 case 'multipart/alternative':
304 case 'multipart/related':
305 case 'multipart/mixed':
306 if(!isset($content_type['other']['boundary'])){
307 $this->_error = 'No boundary found for ' . $content_type['value'] . ' part';
311 $default_ctype = (strtolower($content_type['value']) === 'multipart/digest') ? 'message/rfc822' : 'text/plain';
313 $parts = $this->_boundarySplit($body, $content_type['other']['boundary']);
314 for ($i = 0; $i < count($parts); $i++) {
315 list($part_header, $part_body) = $this->_splitBodyHeader($parts[$i]);
316 $part = $this->_decode($part_header, $part_body, $default_ctype);
318 $part = $this->raiseError($this->_error);
319 $return->parts[] = $part;
323 case 'message/rfc822':
324 $obj = &new Mail_mimeDecode($body);
325 $return->parts[] = $obj->decode(array('include_bodies' => $this->_include_bodies,
326 'decode_bodies' => $this->_decode_bodies,
327 'decode_headers' => $this->_decode_headers));
332 if(!isset($content_transfer_encoding['value']))
333 $content_transfer_encoding['value'] = '7bit';
334 $this->_include_bodies ? $return->body = ($this->_decode_bodies ? $this->_decodeBody($body, $content_transfer_encoding['value']) : $body) : null;
339 $ctype = explode('/', $default_ctype);
340 $return->ctype_primary = $ctype[0];
341 $return->ctype_secondary = $ctype[1];
342 $this->_include_bodies ? $return->body = ($this->_decode_bodies ? $this->_decodeBody($body) : $body) : null;
349 * Given the output of the above function, this will return an
350 * array of references to the parts, indexed by mime number.
352 * @param object $structure The structure to go through
353 * @param string $mime_number Internal use only.
354 * @return array Mime numbers
356 function &getMimeNumbers(&$structure, $no_refs = false, $mime_number = '', $prepend = '')
359 if (!empty($structure->parts)) {
360 if ($mime_number != '') {
361 $structure->mime_id = $prepend . $mime_number;
362 $return[$prepend . $mime_number] = &$structure;
364 for ($i = 0; $i < count($structure->parts); $i++) {
367 if (!empty($structure->headers['content-type']) AND substr(strtolower($structure->headers['content-type']), 0, 8) == 'message/') {
368 $prepend = $prepend . $mime_number . '.';
371 $_mime_number = ($mime_number == '' ? $i + 1 : sprintf('%s.%s', $mime_number, $i + 1));
374 $arr = &Mail_mimeDecode::getMimeNumbers($structure->parts[$i], $no_refs, $_mime_number, $prepend);
375 foreach ($arr as $key => $val) {
376 $no_refs ? $return[$key] = '' : $return[$key] = &$arr[$key];
380 if ($mime_number == '') {
383 $structure->mime_id = $prepend . $mime_number;
384 $no_refs ? $return[$prepend . $mime_number] = '' : $return[$prepend . $mime_number] = &$structure;
391 * Given a string containing a header and body
392 * section, this function will split them (at the first
393 * blank line) and return them.
395 * @param string Input to split apart
396 * @return array Contains header and body section
399 function _splitBodyHeader($input)
401 if (preg_match("/^(.*?)\r?\n\r?\n(.*)/s", $input, $match)) {
402 return array($match[1], $match[2]);
404 $this->_error = 'Could not split header and body';
409 * Parse headers given in $input and return
412 * @param string Headers to parse
413 * @return array Contains parsed headers
416 function _parseHeaders($input)
421 $input = preg_replace("/\r?\n/", "\r\n", $input);
422 $input = preg_replace("/\r\n(\t| )+/", ' ', $input);
423 $headers = explode("\r\n", trim($input));
425 foreach ($headers as $value) {
426 $hdr_name = substr($value, 0, $pos = strpos($value, ':'));
427 $hdr_value = substr($value, $pos+1);
428 if($hdr_value[0] == ' ')
429 $hdr_value = substr($hdr_value, 1);
433 'value' => $this->_decode_headers ? $this->_decodeHeader($hdr_value) : $hdr_value
444 * Function to parse a header value,
445 * extract first part, and any secondary
446 * parts (after ;) This function is not as
447 * robust as it could be. Eg. header comments
448 * in the wrong place will probably break it.
450 * @param string Header value to parse
451 * @return array Contains parsed result
454 function _parseHeaderValue($input)
457 if (($pos = strpos($input, ';')) !== false) {
459 $return['value'] = trim(substr($input, 0, $pos));
460 $input = trim(substr($input, $pos+1));
462 if (strlen($input) > 0) {
464 // This splits on a semi-colon, if there's no preceeding backslash
465 // Now works with quoted values; had to glue the \; breaks in PHP
466 // the regex is already bordering on incomprehensible
467 $splitRegex = '/([^;\'"]*[\'"]([^\'"]*([^\'"]*)*)[\'"][^;\'"]*|([^;]+))(;|$)/';
468 preg_match_all($splitRegex, $input, $matches);
469 $parameters = array();
470 for ($i=0; $i<count($matches[0]); $i++) {
471 $param = $matches[0][$i];
472 while (substr($param, -2) == '\;') {
473 $param .= $matches[0][++$i];
475 $parameters[] = $param;
478 for ($i = 0; $i < count($parameters); $i++) {
479 $param_name = trim(substr($parameters[$i], 0, $pos = strpos($parameters[$i], '=')), "'\";\t\\ ");
480 $param_value = trim(str_replace('\;', ';', substr($parameters[$i], $pos + 1)), "'\";\t\\ ");
481 if ($param_value[0] == '"') {
482 $param_value = substr($param_value, 1, -1);
484 $return['other'][$param_name] = $param_value;
485 $return['other'][strtolower($param_name)] = $param_value;
489 $return['value'] = trim($input);
496 * This function splits the input based
497 * on the given boundary
499 * @param string Input to parse
500 * @return array Contains array of resulting mime parts
503 function _boundarySplit($input, $boundary)
507 $bs_possible = substr($boundary, 2, -2);
508 $bs_check = '\"' . $bs_possible . '\"';
510 if ($boundary == $bs_check) {
511 $boundary = $bs_possible;
514 $tmp = explode('--' . $boundary, $input);
516 for ($i = 1; $i < count($tmp) - 1; $i++) {
524 * Given a header, this function will decode it
525 * according to RFC2047. Probably not *exactly*
526 * conformant, but it does pass all the given
527 * examples (in RFC2047).
529 * @param string Input header value to decode
530 * @return string Decoded header value
533 function _decodeHeader($input)
535 // Remove white space between encoded-words
536 $input = preg_replace('/(=\?[^?]+\?(q|b)\?[^?]*\?=)(\s)+=\?/i', '\1=?', $input);
538 // For each encoded-word...
539 while (preg_match('/(=\?([^?]+)\?(q|b)\?([^?]*)\?=)/i', $input, $matches)) {
541 $encoded = $matches[1];
542 $charset = $matches[2];
543 $encoding = $matches[3];
546 switch (strtolower($encoding)) {
548 $text = base64_decode($text);
552 $text = str_replace('_', ' ', $text);
553 preg_match_all('/=([a-f0-9]{2})/i', $text, $matches);
554 foreach($matches[1] as $value)
555 $text = str_replace('='.$value, chr(hexdec($value)), $text);
559 $input = str_replace($encoded, $text, $input);
566 * Given a body string and an encoding type,
567 * this function will decode and return it.
569 * @param string Input body to decode
570 * @param string Encoding type to use.
571 * @return string Decoded body
574 function _decodeBody($input, $encoding = '7bit')
576 switch (strtolower($encoding)) {
581 case 'quoted-printable':
582 return $this->_quotedPrintableDecode($input);
586 return base64_decode($input);
595 * Given a quoted-printable string, this
596 * function will decode and return it.
598 * @param string Input body to decode
599 * @return string Decoded body
602 function _quotedPrintableDecode($input)
604 // Remove soft line breaks
605 $input = preg_replace("/=\r?\n/", '', $input);
607 // Replace encoded characters
608 $input = preg_replace('/=([a-f0-9]{2})/ie', "chr(hexdec('\\1'))", $input);
614 * Checks the input for uuencoded files and returns
615 * an array of them. Can be called statically, eg:
617 * $files =& Mail_mimeDecode::uudecode($some_text);
619 * It will check for the begin 666 ... end syntax
620 * however and won't just blindly decode whatever you
623 * @param string Input body to look for attahcments in
624 * @return array Decoded bodies, filenames and permissions
628 function &uudecode($input)
630 // Find all uuencoded sections
631 preg_match_all("/begin ([0-7]{3}) (.+)\r?\n(.+)\r?\nend/Us", $input, $matches);
633 for ($j = 0; $j < count($matches[3]); $j++) {
635 $str = $matches[3][$j];
636 $filename = $matches[2][$j];
637 $fileperm = $matches[1][$j];
640 $str = preg_split("/\r?\n/", trim($str));
641 $strlen = count($str);
643 for ($i = 0; $i < $strlen; $i++) {
646 $len=(int)(((ord(substr($str[$i],0,1)) -32) - ' ') & 077);
648 while (($d + 3 <= $len) AND ($pos + 4 <= strlen($str[$i]))) {
649 $c0 = (ord(substr($str[$i],$pos,1)) ^ 0x20);
650 $c1 = (ord(substr($str[$i],$pos+1,1)) ^ 0x20);
651 $c2 = (ord(substr($str[$i],$pos+2,1)) ^ 0x20);
652 $c3 = (ord(substr($str[$i],$pos+3,1)) ^ 0x20);
653 $file .= chr(((($c0 - ' ') & 077) << 2) | ((($c1 - ' ') & 077) >> 4));
655 $file .= chr(((($c1 - ' ') & 077) << 4) | ((($c2 - ' ') & 077) >> 2));
657 $file .= chr(((($c2 - ' ') & 077) << 6) | (($c3 - ' ') & 077));
663 if (($d + 2 <= $len) && ($pos + 3 <= strlen($str[$i]))) {
664 $c0 = (ord(substr($str[$i],$pos,1)) ^ 0x20);
665 $c1 = (ord(substr($str[$i],$pos+1,1)) ^ 0x20);
666 $c2 = (ord(substr($str[$i],$pos+2,1)) ^ 0x20);
667 $file .= chr(((($c0 - ' ') & 077) << 2) | ((($c1 - ' ') & 077) >> 4));
669 $file .= chr(((($c1 - ' ') & 077) << 4) | ((($c2 - ' ') & 077) >> 2));
675 if (($d + 1 <= $len) && ($pos + 2 <= strlen($str[$i]))) {
676 $c0 = (ord(substr($str[$i],$pos,1)) ^ 0x20);
677 $c1 = (ord(substr($str[$i],$pos+1,1)) ^ 0x20);
678 $file .= chr(((($c0 - ' ') & 077) << 2) | ((($c1 - ' ') & 077) >> 4));
682 $files[] = array('filename' => $filename, 'fileperm' => $fileperm, 'filedata' => $file);
689 * getSendArray() returns the arguments required for Mail::send()
690 * used to build the arguments for a mail::send() call
693 * $mailtext = Full email (for example generated by a template)
694 * $decoder = new Mail_mimeDecode($mailtext);
695 * $parts = $decoder->getSendArray();
696 * if (!PEAR::isError($parts) {
697 * list($recipents,$headers,$body) = $parts;
698 * $mail = Mail::factory('smtp');
699 * $mail->send($recipents,$headers,$body);
701 * echo $parts->message;
703 * @return mixed array of recipeint, headers,body or Pear_Error
705 * @author Alan Knowles <alan@akbkhome.com>
707 function getSendArray()
709 // prevent warning if this is not set
710 $this->_decode_headers = FALSE;
711 $headerlist =$this->_parseHeaders($this->_header);
714 return $this->raiseError("Message did not contain headers");
716 foreach($headerlist as $item) {
717 $header[$item['name']] = $item['value'];
718 switch (strtolower($item['name'])) {
722 $to = ",".$item['value'];
728 return $this->raiseError("Message did not contain any recipents");
731 return array($to,$header,$this->_body);
735 * Returns a xml copy of the output of
736 * Mail_mimeDecode::decode. Pass the output in as the
737 * argument. This function can be called statically. Eg:
739 * $output = $obj->decode();
740 * $xml = Mail_mimeDecode::getXML($output);
742 * The DTD used for this should have been in the package. Or
743 * alternatively you can get it from cvs, or here:
744 * http://www.phpguru.org/xmail/xmail.dtd.
746 * @param object Input to convert to xml. This should be the
747 * output of the Mail_mimeDecode::decode function
748 * @return string XML version of input
751 function getXML($input)
754 $output = '<?xml version=\'1.0\'?>' . $crlf .
755 '<!DOCTYPE email SYSTEM "http://www.phpguru.org/xmail/xmail.dtd">' . $crlf .
757 Mail_mimeDecode::_getXML($input) .
764 * Function that does the actual conversion to xml. Does a single
765 * mimepart at a time.
767 * @param object Input to convert to xml. This is a mimepart object.
768 * It may or may not contain subparts.
769 * @param integer Number of tabs to indent
770 * @return string XML version of input
773 function _getXML($input, $indent = 1)
778 $headers = @(array)$input->headers;
780 foreach ($headers as $hdr_name => $hdr_value) {
782 // Multiple headers with this name
783 if (is_array($headers[$hdr_name])) {
784 for ($i = 0; $i < count($hdr_value); $i++) {
785 $output .= Mail_mimeDecode::_getXML_helper($hdr_name, $hdr_value[$i], $indent);
788 // Only one header of this sort
790 $output .= Mail_mimeDecode::_getXML_helper($hdr_name, $hdr_value, $indent);
794 if (!empty($input->parts)) {
795 for ($i = 0; $i < count($input->parts); $i++) {
796 $output .= $crlf . str_repeat($htab, $indent) . '<mimepart>' . $crlf .
797 Mail_mimeDecode::_getXML($input->parts[$i], $indent+1) .
798 str_repeat($htab, $indent) . '</mimepart>' . $crlf;
800 } elseif (isset($input->body)) {
801 $output .= $crlf . str_repeat($htab, $indent) . '<body><![CDATA[' .
802 $input->body . ']]></body>' . $crlf;
809 * Helper function to _getXML(). Returns xml of a header.
811 * @param string Name of header
812 * @param string Value of header
813 * @param integer Number of tabs to indent
814 * @return string XML version of input
817 function _getXML_helper($hdr_name, $hdr_value, $indent)
823 $new_hdr_value = ($hdr_name != 'received') ? Mail_mimeDecode::_parseHeaderValue($hdr_value) : array('value' => $hdr_value);
824 $new_hdr_name = str_replace(' ', '-', ucwords(str_replace('-', ' ', $hdr_name)));
826 // Sort out any parameters
827 if (!empty($new_hdr_value['other'])) {
828 foreach ($new_hdr_value['other'] as $paramname => $paramvalue) {
829 $params[] = str_repeat($htab, $indent) . $htab . '<parameter>' . $crlf .
830 str_repeat($htab, $indent) . $htab . $htab . '<paramname>' . htmlspecialchars($paramname) . '</paramname>' . $crlf .
831 str_repeat($htab, $indent) . $htab . $htab . '<paramvalue>' . htmlspecialchars($paramvalue) . '</paramvalue>' . $crlf .
832 str_repeat($htab, $indent) . $htab . '</parameter>' . $crlf;
835 $params = implode('', $params);
840 $return = str_repeat($htab, $indent) . '<header>' . $crlf .
841 str_repeat($htab, $indent) . $htab . '<headername>' . htmlspecialchars($new_hdr_name) . '</headername>' . $crlf .
842 str_repeat($htab, $indent) . $htab . '<headervalue>' . htmlspecialchars($new_hdr_value['value']) . '</headervalue>' . $crlf .
844 str_repeat($htab, $indent) . '</header>' . $crlf;