3 * RFC 822 Email address list validation Utility
9 * Copyright (c) 2001-2010, Richard Heyes
10 * All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
16 * o Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * o Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * o The names of the authors may not be used to endorse or promote
22 * products derived from this software without specific prior written
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 * @author Richard Heyes <richard@phpguru.org>
40 * @author Chuck Hagenbuch <chuck@horde.org
41 * @copyright 2001-2010 Richard Heyes
42 * @license http://opensource.org/licenses/bsd-license.php New BSD License
43 * @version CVS: $Id: RFC822.php 294749 2010-02-08 08:22:25Z clockwerx $
44 * @link http://pear.php.net/package/Mail/
48 * RFC 822 Email address list validation Utility
52 * This class will take an address string, and parse it into it's consituent
53 * parts, be that either addresses, groups, or combinations. Nested groups
54 * are not supported. The structure it returns is pretty straight forward,
55 * and is similar to that provided by the imap_rfc822_parse_adrlist(). Use
56 * print_r() to view the structure.
60 * $address_string = 'My Group: "Richard" <richard@localhost> (A comment), ted@example.com (Ted Bloggs), Barney;';
61 * $structure = Mail_RFC822::parseAddressList($address_string, 'example.com', true)
62 * print_r($structure);
64 * @author Richard Heyes <richard@phpguru.org>
65 * @author Chuck Hagenbuch <chuck@horde.org>
66 * @version $Revision: 294749 $
73 * The address being parsed by the RFC822 object.
74 * @var string $address
79 * The default domain to use for unqualified addresses.
80 * @var string $default_domain
82 var $default_domain = 'localhost';
85 * Should we return a nested array showing groups, or flatten everything?
86 * @var boolean $nestGroups
88 var $nestGroups = true;
91 * Whether or not to validate atoms for non-ascii characters.
92 * @var boolean $validate
97 * The array of raw addresses built up as we parse.
98 * @var array $addresses
100 var $addresses = array();
103 * The final array of parsed address information that we build up.
104 * @var array $structure
106 var $structure = array();
109 * The current error message, if any.
115 * An internal counter/pointer.
116 * @var integer $index
121 * The number of groups that have been found in the address list.
122 * @var integer $num_groups
128 * A variable so that we can tell whether or not we're inside a
129 * Mail_RFC822 object.
130 * @var boolean $mailRFC822
132 var $mailRFC822 = true;
135 * A limit after which processing stops
141 * Sets up the object. The address must either be set here or when
142 * calling parseAddressList(). One or the other.
145 * @param string $address The address(es) to validate.
146 * @param string $default_domain Default domain/host etc. If not supplied, will be set to localhost.
147 * @param boolean $nest_groups Whether to return the structure with groups nested for easier viewing.
148 * @param boolean $validate Whether to validate atoms. Turn this off if you need to run addresses through before encoding the personal names, for instance.
150 * @return object Mail_RFC822 A new Mail_RFC822 object.
152 function Mail_RFC822($address = null, $default_domain = null, $nest_groups = null, $validate = null, $limit = null)
154 if (isset($address)) $this->address = $address;
155 if (isset($default_domain)) $this->default_domain = $default_domain;
156 if (isset($nest_groups)) $this->nestGroups = $nest_groups;
157 if (isset($validate)) $this->validate = $validate;
158 if (isset($limit)) $this->limit = $limit;
162 * Starts the whole process. The address must either be set here
163 * or when creating the object. One or the other.
166 * @param string $address The address(es) to validate.
167 * @param string $default_domain Default domain/host etc.
168 * @param boolean $nest_groups Whether to return the structure with groups nested for easier viewing.
169 * @param boolean $validate Whether to validate atoms. Turn this off if you need to run addresses through before encoding the personal names, for instance.
171 * @return array A structured array of addresses.
173 function parseAddressList($address = null, $default_domain = null, $nest_groups = null, $validate = null, $limit = null)
175 if (!isset($this) || !isset($this->mailRFC822)) {
176 $obj = new Mail_RFC822($address, $default_domain, $nest_groups, $validate, $limit);
177 return $obj->parseAddressList();
180 if (isset($address)) $this->address = $address;
181 if (isset($default_domain)) $this->default_domain = $default_domain;
182 if (isset($nest_groups)) $this->nestGroups = $nest_groups;
183 if (isset($validate)) $this->validate = $validate;
184 if (isset($limit)) $this->limit = $limit;
186 $this->structure = array();
187 $this->addresses = array();
191 // Unfold any long lines in $this->address.
192 $this->address = preg_replace('/\r?\n/', "\r\n", $this->address);
193 $this->address = preg_replace('/\r\n(\t| )+/', ' ', $this->address);
195 while ($this->address = $this->_splitAddresses($this->address));
197 if ($this->address === false || isset($this->error)) {
198 require_once 'PEAR.php';
199 return PEAR::raiseError($this->error);
202 // Validate each address individually. If we encounter an invalid
203 // address, stop iterating and return an error immediately.
204 foreach ($this->addresses as $address) {
205 $valid = $this->_validateAddress($address);
207 if ($valid === false || isset($this->error)) {
208 require_once 'PEAR.php';
209 return PEAR::raiseError($this->error);
212 if (!$this->nestGroups) {
213 $this->structure = array_merge($this->structure, $valid);
215 $this->structure[] = $valid;
219 return $this->structure;
223 * Splits an address into separate addresses.
226 * @param string $address The addresses to split.
227 * @return boolean Success or failure.
229 function _splitAddresses($address)
231 if (!empty($this->limit) && count($this->addresses) == $this->limit) {
235 if ($this->_isGroup($address) && !isset($this->error)) {
238 } elseif (!isset($this->error)) {
241 } elseif (isset($this->error)) {
245 // Split the string based on the above ten or so lines.
246 $parts = explode($split_char, $address);
247 $string = $this->_splitCheck($parts, $split_char);
251 // If $string does not contain a colon outside of
252 // brackets/quotes etc then something's fubar.
254 // First check there's a colon at all:
255 if (strpos($string, ':') === false) {
256 $this->error = 'Invalid address: ' . $string;
260 // Now check it's outside of brackets/quotes:
261 if (!$this->_splitCheck(explode(':', $string), ':')) {
265 // We must have a group at this point, so increase the counter:
269 // $string now contains the first full address/group.
270 // Add to the addresses array.
271 $this->addresses[] = array(
272 'address' => trim($string),
276 // Remove the now stored address from the initial line, the +1
277 // is to account for the explode character.
278 $address = trim(substr($address, strlen($string) + 1));
280 // If the next char is a comma and this was a group, then
281 // there are more addresses, otherwise, if there are any more
282 // chars, then there is another address.
283 if ($is_group && substr($address, 0, 1) == ','){
284 $address = trim(substr($address, 1));
287 } elseif (strlen($address) > 0) {
294 // If you got here then something's off
299 * Checks for a group at the start of the string.
302 * @param string $address The address to check.
303 * @return boolean Whether or not there is a group at the start of the string.
305 function _isGroup($address)
307 // First comma not in quotes, angles or escaped:
308 $parts = explode(',', $address);
309 $string = $this->_splitCheck($parts, ',');
311 // Now we have the first address, we can reliably check for a
312 // group by searching for a colon that's not escaped or in
313 // quotes or angle brackets.
314 if (count($parts = explode(':', $string)) > 1) {
315 $string2 = $this->_splitCheck($parts, ':');
316 return ($string2 !== $string);
323 * A common function that will check an exploded string.
326 * @param array $parts The exloded string.
327 * @param string $char The char that was exploded on.
328 * @return mixed False if the string contains unclosed quotes/brackets, or the string on success.
330 function _splitCheck($parts, $char)
334 for ($i = 0; $i < count($parts); $i++) {
335 if ($this->_hasUnclosedQuotes($string)
336 || $this->_hasUnclosedBrackets($string, '<>')
337 || $this->_hasUnclosedBrackets($string, '[]')
338 || $this->_hasUnclosedBrackets($string, '()')
339 || substr($string, -1) == '\\') {
340 if (isset($parts[$i + 1])) {
341 $string = $string . $char . $parts[$i + 1];
343 $this->error = 'Invalid address spec. Unclosed bracket or quotes';
356 * Checks if a string has unclosed quotes or not.
359 * @param string $string The string to check.
360 * @return boolean True if there are unclosed quotes inside the string,
363 function _hasUnclosedQuotes($string)
365 $string = trim($string);
366 $iMax = strlen($string);
370 for (; $i < $iMax; ++$i) {
371 switch ($string[$i]) {
377 if ($slashes % 2 == 0) {
378 $in_quote = !$in_quote;
380 // Fall through to default action below.
392 * Checks if a string has an unclosed brackets or not. IMPORTANT:
393 * This function handles both angle brackets and square brackets;
396 * @param string $string The string to check.
397 * @param string $chars The characters to check for.
398 * @return boolean True if there are unclosed brackets inside the string, false otherwise.
400 function _hasUnclosedBrackets($string, $chars)
402 $num_angle_start = substr_count($string, $chars[0]);
403 $num_angle_end = substr_count($string, $chars[1]);
405 $this->_hasUnclosedBracketsSub($string, $num_angle_start, $chars[0]);
406 $this->_hasUnclosedBracketsSub($string, $num_angle_end, $chars[1]);
408 if ($num_angle_start < $num_angle_end) {
409 $this->error = 'Invalid address spec. Unmatched quote or bracket (' . $chars . ')';
412 return ($num_angle_start > $num_angle_end);
417 * Sub function that is used only by hasUnclosedBrackets().
420 * @param string $string The string to check.
421 * @param integer &$num The number of occurences.
422 * @param string $char The character to count.
423 * @return integer The number of occurences of $char in $string, adjusted for backslashes.
425 function _hasUnclosedBracketsSub($string, &$num, $char)
427 $parts = explode($char, $string);
428 for ($i = 0; $i < count($parts); $i++){
429 if (substr($parts[$i], -1) == '\\' || $this->_hasUnclosedQuotes($parts[$i]))
431 if (isset($parts[$i + 1]))
432 $parts[$i + 1] = $parts[$i] . $char . $parts[$i + 1];
439 * Function to begin checking the address.
442 * @param string $address The address to validate.
443 * @return mixed False on failure, or a structured array of address information on success.
445 function _validateAddress($address)
448 $addresses = array();
450 if ($address['group']) {
453 // Get the group part of the name
454 $parts = explode(':', $address['address']);
455 $groupname = $this->_splitCheck($parts, ':');
456 $structure = array();
458 // And validate the group part of the name.
459 if (!$this->_validatePhrase($groupname)){
460 $this->error = 'Group name did not validate.';
463 // Don't include groups if we are not nesting
464 // them. This avoids returning invalid addresses.
465 if ($this->nestGroups) {
466 $structure = new stdClass;
467 $structure->groupname = $groupname;
471 $address['address'] = ltrim(substr($address['address'], strlen($groupname . ':')));
474 // If a group then split on comma and put into an array.
475 // Otherwise, Just put the whole address in an array.
477 while (strlen($address['address']) > 0) {
478 $parts = explode(',', $address['address']);
479 $addresses[] = $this->_splitCheck($parts, ',');
480 $address['address'] = trim(substr($address['address'], strlen(end($addresses) . ',')));
483 $addresses[] = $address['address'];
486 // Check that $addresses is set, if address like this:
488 // Then errors were appearing.
489 if (!count($addresses)){
490 $this->error = 'Empty group.';
494 // Trim the whitespace from all of the address strings.
495 array_map('trim', $addresses);
497 // Validate each mailbox.
498 // Format could be one of: name <geezer@domain.com>
501 // ... or any other format valid by RFC 822.
502 for ($i = 0; $i < count($addresses); $i++) {
503 if (!$this->validateMailbox($addresses[$i])) {
504 if (empty($this->error)) {
505 $this->error = 'Validation failed for: ' . $addresses[$i];
512 if ($this->nestGroups) {
514 $structure->addresses = $addresses;
516 $structure = $addresses[0];
522 $structure = array_merge($structure, $addresses);
524 $structure = $addresses;
532 * Function to validate a phrase.
535 * @param string $phrase The phrase to check.
536 * @return boolean Success or failure.
538 function _validatePhrase($phrase)
540 // Splits on one or more Tab or space.
541 $parts = preg_split('/[ \\x09]+/', $phrase, -1, PREG_SPLIT_NO_EMPTY);
543 $phrase_parts = array();
544 while (count($parts) > 0){
545 $phrase_parts[] = $this->_splitCheck($parts, ' ');
546 for ($i = 0; $i < $this->index + 1; $i++)
550 foreach ($phrase_parts as $part) {
552 if (substr($part, 0, 1) == '"') {
553 if (!$this->_validateQuotedString($part)) {
559 // Otherwise it's an atom:
560 if (!$this->_validateAtom($part)) return false;
567 * Function to validate an atom which from rfc822 is:
568 * atom = 1*<any CHAR except specials, SPACE and CTLs>
570 * If validation ($this->validate) has been turned off, then
571 * validateAtom() doesn't actually check anything. This is so that you
572 * can split a list of addresses up before encoding personal names
573 * (umlauts, etc.), for example.
576 * @param string $atom The string to check.
577 * @return boolean Success or failure.
579 function _validateAtom($atom)
581 if (!$this->validate) {
582 // Validation has been turned off; assume the atom is okay.
586 // Check for any char from ASCII 0 - ASCII 127
587 if (!preg_match('/^[\\x00-\\x7E]+$/i', $atom, $matches)) {
591 // Check for specials:
592 if (preg_match('/[][()<>@,;\\:". ]/', $atom)) {
596 // Check for control characters (ASCII 0-31):
597 if (preg_match('/[\\x00-\\x1F]+/', $atom)) {
605 * Function to validate quoted string, which is:
606 * quoted-string = <"> *(qtext/quoted-pair) <">
609 * @param string $qstring The string to check
610 * @return boolean Success or failure.
612 function _validateQuotedString($qstring)
614 // Leading and trailing "
615 $qstring = substr($qstring, 1, -1);
617 // Perform check, removing quoted characters first.
618 return !preg_match('/[\x0D\\\\"]/', preg_replace('/\\\\./', '', $qstring));
622 * Function to validate a mailbox, which is:
623 * mailbox = addr-spec ; simple address
624 * / phrase route-addr ; name and route-addr
627 * @param string &$mailbox The string to check.
628 * @return boolean Success or failure.
630 function validateMailbox(&$mailbox)
632 // A couple of defaults.
637 // Catch any RFC822 comments and store them separately.
638 $_mailbox = $mailbox;
639 while (strlen(trim($_mailbox)) > 0) {
640 $parts = explode('(', $_mailbox);
641 $before_comment = $this->_splitCheck($parts, '(');
642 if ($before_comment != $_mailbox) {
643 // First char should be a (.
644 $comment = substr(str_replace($before_comment, '', $_mailbox), 1);
645 $parts = explode(')', $comment);
646 $comment = $this->_splitCheck($parts, ')');
647 $comments[] = $comment;
649 // +2 is for the brackets
650 $_mailbox = substr($_mailbox, strpos($_mailbox, '('.$comment)+strlen($comment)+2);
656 foreach ($comments as $comment) {
657 $mailbox = str_replace("($comment)", '', $mailbox);
660 $mailbox = trim($mailbox);
662 // Check for name + route-addr
663 if (substr($mailbox, -1) == '>' && substr($mailbox, 0, 1) != '<') {
664 $parts = explode('<', $mailbox);
665 $name = $this->_splitCheck($parts, '<');
667 $phrase = trim($name);
668 $route_addr = trim(substr($mailbox, strlen($name.'<'), -1));
670 if ($this->_validatePhrase($phrase) === false || ($route_addr = $this->_validateRouteAddr($route_addr)) === false) {
674 // Only got addr-spec
676 // First snip angle brackets if present.
677 if (substr($mailbox, 0, 1) == '<' && substr($mailbox, -1) == '>') {
678 $addr_spec = substr($mailbox, 1, -1);
680 $addr_spec = $mailbox;
683 if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) {
688 // Construct the object that will be returned.
689 $mbox = new stdClass();
691 // Add the phrase (even if empty) and comments
692 $mbox->personal = $phrase;
693 $mbox->comment = isset($comments) ? $comments : array();
695 if (isset($route_addr)) {
696 $mbox->mailbox = $route_addr['local_part'];
697 $mbox->host = $route_addr['domain'];
698 $route_addr['adl'] !== '' ? $mbox->adl = $route_addr['adl'] : '';
700 $mbox->mailbox = $addr_spec['local_part'];
701 $mbox->host = $addr_spec['domain'];
709 * This function validates a route-addr which is:
710 * route-addr = "<" [route] addr-spec ">"
712 * Angle brackets have already been removed at the point of
713 * getting to this function.
716 * @param string $route_addr The string to check.
717 * @return mixed False on failure, or an array containing validated address/route information on success.
719 function _validateRouteAddr($route_addr)
722 if (strpos($route_addr, ':') !== false) {
723 $parts = explode(':', $route_addr);
724 $route = $this->_splitCheck($parts, ':');
726 $route = $route_addr;
729 // If $route is same as $route_addr then the colon was in
730 // quotes or brackets or, of course, non existent.
731 if ($route === $route_addr){
733 $addr_spec = $route_addr;
734 if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) {
738 // Validate route part.
739 if (($route = $this->_validateRoute($route)) === false) {
743 $addr_spec = substr($route_addr, strlen($route . ':'));
745 // Validate addr-spec part.
746 if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) {
752 $return['adl'] = $route;
757 $return = array_merge($return, $addr_spec);
762 * Function to validate a route, which is:
763 * route = 1#("@" domain) ":"
766 * @param string $route The string to check.
767 * @return mixed False on failure, or the validated $route on success.
769 function _validateRoute($route)
772 $domains = explode(',', trim($route));
774 foreach ($domains as $domain) {
775 $domain = str_replace('@', '', trim($domain));
776 if (!$this->_validateDomain($domain)) return false;
783 * Function to validate a domain, though this is not quite what
784 * you expect of a strict internet domain.
786 * domain = sub-domain *("." sub-domain)
789 * @param string $domain The string to check.
790 * @return mixed False on failure, or the validated domain on success.
792 function _validateDomain($domain)
794 // Note the different use of $subdomains and $sub_domains
795 $subdomains = explode('.', $domain);
797 while (count($subdomains) > 0) {
798 $sub_domains[] = $this->_splitCheck($subdomains, '.');
799 for ($i = 0; $i < $this->index + 1; $i++)
800 array_shift($subdomains);
803 foreach ($sub_domains as $sub_domain) {
804 if (!$this->_validateSubdomain(trim($sub_domain)))
808 // Managed to get here, so return input.
813 * Function to validate a subdomain:
814 * subdomain = domain-ref / domain-literal
817 * @param string $subdomain The string to check.
818 * @return boolean Success or failure.
820 function _validateSubdomain($subdomain)
822 if (preg_match('|^\[(.*)]$|', $subdomain, $arr)){
823 if (!$this->_validateDliteral($arr[1])) return false;
825 if (!$this->_validateAtom($subdomain)) return false;
828 // Got here, so return successful.
833 * Function to validate a domain literal:
834 * domain-literal = "[" *(dtext / quoted-pair) "]"
837 * @param string $dliteral The string to check.
838 * @return boolean Success or failure.
840 function _validateDliteral($dliteral)
842 return !preg_match('/(.)[][\x0D\\\\]/', $dliteral, $matches) && $matches[1] != '\\';
846 * Function to validate an addr-spec.
848 * addr-spec = local-part "@" domain
851 * @param string $addr_spec The string to check.
852 * @return mixed False on failure, or the validated addr-spec on success.
854 function _validateAddrSpec($addr_spec)
856 $addr_spec = trim($addr_spec);
858 // Split on @ sign if there is one.
859 if (strpos($addr_spec, '@') !== false) {
860 $parts = explode('@', $addr_spec);
861 $local_part = $this->_splitCheck($parts, '@');
862 $domain = substr($addr_spec, strlen($local_part . '@'));
864 // No @ sign so assume the default domain.
866 $local_part = $addr_spec;
867 $domain = $this->default_domain;
870 if (($local_part = $this->_validateLocalPart($local_part)) === false) return false;
871 if (($domain = $this->_validateDomain($domain)) === false) return false;
873 // Got here so return successful.
874 return array('local_part' => $local_part, 'domain' => $domain);
878 * Function to validate the local part of an address:
879 * local-part = word *("." word)
882 * @param string $local_part
883 * @return mixed False on failure, or the validated local part on success.
885 function _validateLocalPart($local_part)
887 $parts = explode('.', $local_part);
890 // Split the local_part into words.
891 while (count($parts) > 0){
892 $words[] = $this->_splitCheck($parts, '.');
893 for ($i = 0; $i < $this->index + 1; $i++) {
898 // Validate each word.
899 foreach ($words as $word) {
900 // If this word contains an unquoted space, it is invalid. (6.2.4)
901 if (strpos($word, ' ') && $word[0] !== '"')
906 if ($this->_validatePhrase(trim($word)) === false) return false;
909 // Managed to get here, so return the input.
914 * Returns an approximate count of how many addresses are in the
915 * given string. This is APPROXIMATE as it only splits based on a
916 * comma which has no preceding backslash. Could be useful as
917 * large amounts of addresses will end up producing *large*
918 * structures when used with parseAddressList().
920 * @param string $data Addresses to count
921 * @return int Approximate count
923 function approximateCount($data)
925 return count(preg_split('/(?<!\\\\),/', $data));
929 * This is a email validating function separate to the rest of the
930 * class. It simply validates whether an email is of the common
931 * internet form: <user>@<domain>. This can be sufficient for most
932 * people. Optional stricter mode can be utilised which restricts
933 * mailbox characters allowed to alphanumeric, full stop, hyphen
936 * @param string $data Address to check
937 * @param boolean $strict Optional stricter mode
938 * @return mixed False if it fails, an indexed array
939 * username/domain if it matches
941 function isValidInetAddress($data, $strict = false)
943 $regex = $strict ? '/^([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})$/i' : '/^([*+!.&#$|\'\\%\/0-9a-z^_`{}=?~:-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})$/i';
944 if (preg_match($regex, trim($data), $matches)) {
945 return array($matches[1], $matches[2]);