Logo Search packages:      
Sourcecode: mailavenger version File versions  Download package

addrparse.C

/* $Id$ */

/*
 *
 * Copyright (C) 2004 David Mazieres (dm@uun.org)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 *
 */

/*
 * RFC 821 address parser.  The grammar in RFC 821 unfortunately
 * doesn't match reality--it permits things that are never used, and
 * prohibits commonly used email addresses (e.g., anything in which a
 * DNS label has fewer than three characters).  We try to be as strict
 * as possible while remaining reasonable.
 */

#include "asmtpd.h"

static int
gobble_0_255 (const char **dpp)
{
  const char *dp = *dpp;
  int i, n;

  if (!isdigit (*dp))
    return -1;
  i = n = 0;
  while (isdigit (*dp) && i++ < 3)
    n = 10 * n + *dp++ - '0';
  if (n > 255)
    return -1;
  *dpp = dp;
  return 0;
}

static inline int
gobble_dquad (const char **dpp)
{
  const char *dp = *dpp;
  if (*dp++ != '['
      || gobble_0_255 (&dp) || *dp++ != '.'
      || gobble_0_255 (&dp) || *dp++ != '.'
      || gobble_0_255 (&dp) || *dp++ != '.'
      || gobble_0_255 (&dp) || *dp++ != ']')
    return -1;
  *dpp = dp;
  return 0;
}

static int
gobble_name (const char **dpp, bool uok)
{
  /* I'm cheating a bit here.  RFC821 requires at least 3 characters
   * in a name, whereas the real world often only uses one or two.  We
   * still insist that the first character be a letter, and that the
   * last not be a hyphen, though.  I'm also cheating by bumping "#"
   * and dotnums back up to gobble_domain (see the comment there). */
  const char *dp = *dpp;

  /* if (!isalpha (*dp)) return -1; */
  /* Actually, violate 821 some more to allow 3com.com, etc. */
  if (!isalnum (*dp) && (!uok || *dp != '_'))
    return -1;
  dp++;
  while (isalnum (*dp) || *dp == '-' || (uok && *dp == '_'))
    dp++;
  while (dp[-1] == '-')
    dp--;
  *dpp = dp;
  return 0;
}

static inline int
gobble_dname (const char **dpp, bool uok)
{
  const char *dp = *dpp;

  if (gobble_name (&dp, uok) /* || *dp++ != '.' || gobble_name (&dp) */)
    return -1;
  while (*dp++ == '.') {
    if (gobble_name (&dp, uok))
      break;
  }
  *dpp = dp - 1;
  return 0;
}

static int
gobble_domain (const char **dpp, bool uok = false)
{
  /* I'm cheating a bit here, but only because RFC821 doesn't make
   * sense.  I mean, do you really want to consider something like
   * "harvard.edu.[18.26.0.1].#34" a valid domain name?  How the hell
   * would you route mail to an address like that anyway?  So we'll
   * take either one bracketed doted quad or else a syntactically
   * valid internet domain name. */
  if (**dpp == '[')
    return gobble_dquad (dpp);
  else
    return gobble_dname (dpp, uok);
}

static int
gobble_atdomain (const char **dpp)
{
  const char *dp = *dpp;
  if (*dp++ != '@' || gobble_domain (&dp))
    return -1;
  *dpp = dp;
  return 0;
}

static int
gobble_adl (const char **dpp)
{
  const char *dp = *dpp;
  do {
    if (gobble_atdomain (&dp))
      return -1;
  } while (*dp++ == ',');
  if (dp[-1] != ':')
    return -1;
  *dpp = dp;
  return 0;
}

static inline int
myisspecial (char c)
{
  static const char specials[0x80] = {
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
  };

  if ((unsigned char) c >= 128)
    return 1;
  return specials[(int) c];
}

static inline int
gobble_char (const char **dpp)
{
  /* More cheating.  You should be allowed to have any quoted
   * character in the local part of an address, but we disallow '\0'
   * as well as '\n' (since we are reading input one line at a time).
   * I don't feel too bad because sendmail seems to do the same thing,
   * and anyone who puts '\0' or a newline in an email address is
   * looking for trouble. */
  const char *dp = *dpp;

  if (*dp == '\\') {
    if (dp[1] == '\0' && (unsigned char) dp[1] > 127)
      return -1;
    *dpp = dp + 2;
    return 0;
  }
  if (myisspecial (*dp))
    return -1;
  *dpp = dp + 1;
  return 0;
}

static int
gobble_string (const char **dpp)
{
  const char *dp = *dpp;
  if (gobble_char (&dp))
    return -1;
  while (!gobble_char (&dp))
    ;
  *dpp = dp;
  return 0;
}

static int
gobble_dot_string (const char **dpp)
{
  const char *dp = *dpp;
  do {
    if (gobble_string (&dp))
      return -1;
  } while (*dp++ == '.');
  *dpp = dp - 1;
  return 0;
}

static inline int
gobble_qqq (const char **dpp)
{
  /* Again, we cheat on '\0' and '\n'. */
  const char *dp = *dpp;
  if (*dp == '\\') {
    if ((unsigned char) dp[1] > 127 || dp[1] == '\0' || dp[1] == '\n')
      return -1;
    *dpp = dp + 2;
    return 0;
  }
  if (*dp == '\0' || *dp == '\r' || *dp == '\n' || *dp == '"')
    return -1;
  *dpp = dp + 1;
  return 0;
}

static int
gobble_quoted_string (const char **dpp)
{
  const char *dp = *dpp;
  if (*dp++ != '"')
    return -1;
  if (gobble_qqq (&dp))
    return -1;
  while (!gobble_qqq (&dp))
    ;
  if (*dp++ != '"')
    return -1;
  *dpp = dp;
  return 0;
}

static int
gobble_local_part (const char **dpp)
{
  if (**dpp == '"')
    return gobble_quoted_string (dpp);
  else
    return gobble_dot_string (dpp);
}

str
extract_addr (const char **dpp, const char *prefix)
{
  const char *s;
  const char *dp = *dpp;
  int pl = strlen (prefix);
  int rl;

  if (strncasecmp (dp, prefix, pl))
    return NULL;

  dp += pl;
  while (*dp && *dp == ' ')
    dp++;
  if (*dp != '<')
    return NULL;
  s = ++dp;
  if (*dp == '>')
    return "";

  if (*dp == '@' && gobble_adl (&dp))
    return NULL;
  if (gobble_local_part (&dp))
    return NULL;
  if (*dp++ != '@' || gobble_domain (&dp))
    return NULL;
  if ((rl = dp - s) > MAX_ADDR_LEN)
    return NULL;
  if (*dp++ != '>')
    return NULL;

  while (*dp == ' ')
    dp++;
  *dpp = dp;

  return str (s, rl);
}

str
extract_addr (const str &in, const char *prefix)
{
  const char *dp = in;
  return extract_addr (&dp, prefix);
}

str
extract_relay (const char *addr)
{
  const char *dp = addr;
  const char *s;
  int i, rl;

  if (*dp == '@') {
    if (gobble_atdomain (&dp) || (*dp != ',' && *dp != ':'))
      return NULL;
    s = addr + 1;
  }
  else {
    if (gobble_local_part (&dp) || *dp++ != '@')
      return NULL;
    s = dp;
    if (gobble_domain (&dp) || *dp)
      return NULL;
  }

  rl = dp - s;
  mstr r (rl);
  for (i = 0; i < rl; i++)
    r[i] = tolower (s[i]);
  str res (r);
  return res;
}

bool
validate_domain (const char *addr, bool uok)
{
  const char *dp = addr;
  return !gobble_domain (&dp, uok) && !*dp;
}

str
extract_domain (const char *addr)
{
  const char *dp = addr;
  if (!gobble_local_part (&dp) && *dp++ == '@')
    return dp;
  dp = addr;
  if (gobble_adl (&dp))
    return NULL;
  if (!gobble_local_part (&dp) && *dp++ == '@')
    return dp;
  return NULL;
}

str
domain_tolower (const char *addr)
{
  mstr m (strlen (addr));
  strcpy (m, addr);
  const char *dp = implicit_cast<char *> (m);
  if (!gobble_local_part (&dp) && *dp == '@') {
    for (char *cp = const_cast<char *> (dp); *cp; cp++)
      *cp = tolower (*cp);
    return m;
  }
  dp = implicit_cast<char *> (m);
  if (gobble_adl (&dp))
    return NULL;
  for (char *cp = m; cp < dp; cp++)
    *cp = tolower (*cp);
  if (gobble_local_part (&dp))
    return NULL;
  if (!*dp)
    return m;
  if (*dp++ != '@')
    return NULL;
  for (char *cp = const_cast<char *> (dp); *cp; cp++)
    *cp = tolower (*cp);
  return m;
}

str
extract_local (const char *addr)
{
  const char *dp = addr;
  if (gobble_local_part (&dp) || *dp != '@')
    return NULL;

  const char *dp2 = dp + 1;
  if (gobble_domain (&dp2) || *dp2)
    return NULL;

  return str (addr, dp - addr);
}

bool
validate_local (str addr)
{
  const char *dp = addr;
  if (gobble_local_part (&dp) || *dp)
    return false;
  if (addr.cstr () + addr.len () != dp)
    return false;
  return true;
}

Generated by  Doxygen 1.6.0   Back to index