Logo Search packages:      
Sourcecode: mailavenger version File versions  Download package

netpath.C

/* $Id$ */

/*
 *
 * Copyright (C) 2003 David Mazieres (dm@uun.org)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 *
 */

#include "async.h"
#include "qhash.h"
#include "aios.h"
#include "rawnet.h"

#define NETPATH_VERBOSE 0

inline
icmpsock::icmpclnt::icmpclnt (ref<icmpsock> s, in_addr a, icmpsock::cb_t c)
  : cb (c), is (s), addr (a)
{
  is->cbtab.insert (this);
}

inline
icmpsock::icmpclnt::~icmpclnt ()
{
  is->cbtab.remove (this);
}

void
icmpsock::rcb ()
{
  for (;;) {
    sockaddr_in sin;
    socklen_t sinlen = sizeof (sin);
    bzero (&sin, sizeof (sin));
    sin.sin_family = AF_INET;

    inpkt pkt;
    bzero (&pkt, sizeof (pkt));

    int n = recvfrom (icmpfd, &pkt, sizeof (pkt), 0,
                  (sockaddr *) &sin, &sinlen);
    if (n <= 0) {
      if (n < 0 && errno != EAGAIN)
      warn ("recvfrom ICMP socket: %m\n");
      return;
    }
    icmp_info ii;
    if (!icmp_parse (&ii, &pkt, n) || !ii.udphp)
      continue;

    for (icmpclnt *cp = cbtab[ii.iphp->ip_dst], *ncp; cp; cp = ncp) {
      ncp = cbtab.nextkeq (cp);
      (*cp->cb) (&ii);
    }
  }
}

void
icmpsock::closefds ()
{
  if (icmpfd >= 0) {
    fdcb (icmpfd, selread, NULL);
    close (icmpfd);
    icmpfd = -1;
  }
  if (udpfd >= 0) {
    close (udpfd);
    udpfd = -1;
  }
  if (ipfd >= 0) {
    close (ipfd);
    ipfd = -1;
  }
}

bool
icmpsock::icmp_parse (icmpsock::icmp_info *infop, inpkt *inp, int size)
{
  bzero (infop, sizeof (*infop));

  if (size < int (sizeof (inp->iph) + 8)
      /* || size < int (ntohs (inp->iph.ip_len)) XXX - kernel byte-swaps? */
      || inp->iph.ip_p != IPPROTO_ICMP)
    return false;
  int hlen = inp->iph.ip_hl << 2;
  if (size < int (hlen + sizeof (icmp)))
    return false;

  infop->pkthdrp = &inp->iph;
  infop->icmpp = reinterpret_cast<icmp *> (&inp->data[hlen]);
  infop->type = infop->icmpp->icmp_type;
  infop->code = infop->icmpp->icmp_code;
  if (size < int (hlen + 8 + sizeof (struct ip) + 8))
    return true;

  infop->iphp = &infop->icmpp->icmp_ip;
  int dhlen = infop->iphp->ip_hl << 2;
  if (dhlen < int (sizeof (struct ip))
      || size < hlen + 8 + dhlen + 8
      || infop->iphp->ip_p != IPPROTO_UDP)
    return true;
  infop->udphp = reinterpret_cast<udphdr *> (&inp->data[hlen + 8 + dhlen]);

  return true;
}

void
icmpsock::portalloc ()
{
  assert (udpfd == -1);
  udpfd = inetsocket (SOCK_DGRAM, ntohs (fromaddr.sin_port),
                  ntohl (fromaddr.sin_addr.s_addr));
  if (udpfd < 0) {
    if (errno != EADDRINUSE)
      warn ("socket: %m\n");
  }
  else {
    close_on_exec (udpfd);
    socklen_t sinlen = sizeof (fromaddr);
    getsockname (udpfd, (sockaddr *) &fromaddr, &sinlen);
  }
  if (fromaddr.sin_addr.s_addr == htonl (INADDR_ANY)) {
    vec<in_addr> av;
    myipaddrs (&av);
    while (!av.empty () && av[0].s_addr == htonl (INADDR_LOOPBACK))
      av.pop_front ();
    if (!av.empty ())
      fromaddr.sin_addr = av[0];
    else     // What the hell, allows testing on disconnected machines
      fromaddr.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
  }
}

bool
icmpsock::init (const sockaddr_in *fa)
{
  closefds ();
  fromaddr = *fa;
  portalloc ();

  ipfd = socket (AF_INET, SOCK_RAW, IPPROTO_RAW);
  if (ipfd < 0) {
    warn ("RAW IP socket: %m\n");
    return false;
  }
  close_on_exec (ipfd);
  int data = 576;
#if 0
  if (setsockopt (ipfd, SOL_SOCKET, SO_SNDBUF,
              (char *) &data, sizeof(data)) < 0) {
    warn("SO_SNDBUF: %m\n");
    return false;
  }
#endif
  data = 1;
  if (setsockopt (ipfd, IPPROTO_IP, IP_HDRINCL,
              (char *) &data, sizeof(data)) < 0) {
    warn("IP_HDRINCL: %m\n");
    return false;
  }
  make_async (ipfd);

  icmpfd = socket (AF_INET, SOCK_RAW, IPPROTO_ICMP);
  if (icmpfd < 0) {
    warn ("ICMP socket: %m\n");
    return false;
  }
  close_on_exec (icmpfd);
  make_async (icmpfd);

  fdcb (icmpfd, selread, wrap (this, &icmpsock::rcb));

  return true;
}

#define IPPROTO_ICMP_UDP 117
#define IPPROTO_ICMP_TCP 106

#if PLANET_LAB
bool
icmpsock::init_plab (const sockaddr_in *fa)
{
  int one = 1;

  closefds ();
  fromaddr = *fa;
  portalloc ();
  if (udpfd >= 0) {
    close (udpfd);
    udpfd = -1;
  }

  ipfd = socket (AF_INET, SOCK_RAW, IPPROTO_UDP);
  if (ipfd < 0) {
    warn ("RAW IP socket: %m\n");
    return false;
  }
  close_on_exec (ipfd);
  if (bind (ipfd, (sockaddr *) &fromaddr, sizeof (fromaddr)) < 0) {
    warn ("bind of RAW/UDP socket: %m\n");
    return false;
  }
  if (setsockopt (ipfd, IPPROTO_IP, IP_HDRINCL,
              (char *) &one, sizeof(one)) < 0) {
    warn("IP_HDRINCL (RAW UDP): %m\n");
    return false;
  }
  make_async (ipfd);

  icmpfd = socket (AF_INET, SOCK_RAW, IPPROTO_ICMP_UDP);
  if (icmpfd < 0) {
    warn ("ICMP socket: %m\n");
    return false;
  }
  close_on_exec (icmpfd);
  if (bind (icmpfd, (sockaddr *) &fromaddr, sizeof (fromaddr)) < 0) {
    warn ("bind of ICMP/UDP socket: %m\n");
    return false;
  }
  /* XXX - this is done in planet lab traceroute code -- is it needed? */
  if (setsockopt (ipfd, IPPROTO_IP, IP_HDRINCL,
              (char *) &one, sizeof(one)) < 0) {
    warn("IP_HDRINCL (ICMP/UDP): %m\n");
    return false;
  }
  make_async (icmpfd);

  fdcb (icmpfd, selread, wrap (this, &icmpsock::rcb));

  return true;
}
#endif /* PLANET_LAB */

void
icmpsock::sendpkt (const sockaddr_in *to, u_int ttl,
               u_int16_t datasize, u_int16_t id, u_int16_t sum,
               const sockaddr_in *fromp)
{
  int pktlen = xoffsetof (outpkt, payload[datasize]);
  outpkt pkt;
  bzero (&pkt, pktlen);

  if (fromp && fromp->sin_addr.s_addr != htonl (INADDR_ANY))
    pkt.iph.ip_src = fromp->sin_addr;
  else
    pkt.iph.ip_src = fromaddr.sin_addr;
  pkt.iph.ip_dst = to->sin_addr;
  pkt.iph.ip_off = htons (0);
  pkt.iph.ip_hl = sizeof (pkt.iph) >> 2;
  pkt.iph.ip_p = IPPROTO_UDP;
  pkt.iph.ip_v = 4;

  pkt.iph.ip_ttl = ttl;
  pkt.iph.ip_len = htons (pktlen);
  pkt.iph.ip_id = htons (id);

  if (fromp && fromp->sin_port != htons (0))
    pkt.udph.uh_sport = fromp->sin_port;
  else
    pkt.udph.uh_sport = fromaddr.sin_port;
  pkt.udph.uh_dport = to->sin_port;
  pkt.udph.uh_ulen = htons (pktlen - sizeof (pkt.iph));

  if (sum) {
    assert (datasize >= 2);
    assert (pkt.iph.ip_src.s_addr != htonl (INADDR_ANY));
    pkt.udph.uh_sum = ntohs (sum);
    u_int32_t usum = ntohs (pkt.iph.ip_p) + pkt.udph.uh_ulen;
    usum = cksum (&pkt.iph.ip_src, 8, usum);
    usum = ~cksum (&pkt.udph, ntohs (pkt.udph.uh_ulen), usum) & 0xffff;
    if (!usum)
      usum = 0xffff;
    *reinterpret_cast<u_int16_t *> (&pkt.udph + 1) = usum;
  }
  else {
    u_int32_t usum = ntohs (pkt.iph.ip_p) + pkt.udph.uh_ulen;
    usum = cksum (&pkt.iph.ip_src, 8, usum);
    usum = ~cksum (&pkt.udph, ntohs (pkt.udph.uh_ulen), usum) & 0xffff;
    if (!usum)
      usum = 0xffff;
    pkt.udph.uh_sum = usum;
  }

#if 0
  struct ph {
    in_addr s, d;
    u_int8_t z, p;
    u_int16_t l, sp, dp, l2, uc;
  };
  ph h = { pkt.iph.ip_src, pkt.iph.ip_dst, 0, pkt.iph.ip_p,
         pkt.udph.uh_ulen, pkt.udph.uh_sport, pkt.udph.uh_dport,
         pkt.udph.uh_ulen, pkt.udph.uh_sum
  };
  u_int16_t s2 = ~cksum (&h, sizeof (h));

  warn ("ttl %d, sum 0x%x\n", ttl, ntohs (pkt.udph.uh_sum));
  if (ttl == 64) {
    warn << "IP header " << hexdump (&pkt.iph, sizeof (pkt.iph)) << "\n";
    warn << "UDP header " << hexdump (&pkt.udph, sizeof (pkt.udph)) << "\n";
    warn << "pseudo-header " << hexdump (&h, sizeof (h)) << "\n";
  }
#endif

  static bool ip_hdrincl_ok, ip_hdrincl_swapped;

  if (ip_hdrincl_swapped) {
    /* Yuck... Might be required by FreeBSD */
    pkt.iph.ip_len = ntohs (pkt.iph.ip_len);
  }

  errno = 0;
  int n = sendto (ipfd, &pkt, pktlen, 0, (sockaddr *) to, sizeof (*to));
  if (n < 0 && errno == EINVAL && !ip_hdrincl_swapped && !ip_hdrincl_ok) {
    pkt.iph.ip_len = ntohs (pkt.iph.ip_len);
    n = sendto (ipfd, &pkt, pktlen, 0, (sockaddr *) to, sizeof (*to));
    if (n >= 0) {
      warn ("kernel seems to swap byte order of ip_len... yuck\n");
      ip_hdrincl_swapped = true;
    }
  }
  if (n >= 0)
    ip_hdrincl_ok = true;

  if (n != pktlen)
    warn ("RAW IP sendto %d/%d: %m\n", n, pktlen);
}

traceroute::traceroute (ref<icmpsock> ss, const sockaddr_in *d,
                  int nhops, cb_t c, const sockaddr_in *srcp)
  : dest (*d), use_dstport (false), hops_req (nhops), hops_max (0),
    hops_total (-1), hops_found (0), xmit_count (0),
    ic (ss->setcb (d->sin_addr, wrap (this, &traceroute::rcb))),
    ntmo (0), tmo_lastfound (0), tmo (NULL)
{
  cbvec.push_back (c);
  xmit_ttls.zsetsize (maxhops + 1);
  oxmit_ttls.zsetsize (maxhops + 1);

  ids.setsize (maxhops + 1);
  bzero (ids.base (), ids.size () * sizeof (ids[0]));

  use_src = srcp;
  if (use_src)
    src = *srcp;

  if (hops_req > maxhops)
    hops_req = maxhops;
  if (dest.sin_port == htons (0)) {
    dest.sin_port = htons (33435);
    use_dstport = true;
  }
  xmit ();
}

traceroute::~traceroute ()
{
  if (tmo)
    timecb_remove (tmo);
}

void
traceroute::probe (u_int8_t ttl)
{
  if (!xmit_ttls[ttl]) {
    xmit_ttls[ttl] = true;
    xmit_count++;
  }
  while (!ids[ttl])
    ids[ttl] = arandom ();
  if (use_dstport) {
    dest.sin_port = htons (baseport + ttl);
    ic->is->sendpkt (&dest, ttl, 0, ids[ttl], 0,
                 use_src ? &src : NULL); 
  }
  else
    ic->is->sendpkt (&dest, ttl, 2 + (ttl & 0xf), ids[ttl], ttl,
                 use_src ? &src : NULL); 
#if NETPATH_VERBOSE
  verbose.fmt ("%d ->  probe % 2d  ->\n", xmit_count, ttl);
#endif /* NETPATH_VERBOSE */
}

inline bool
traceroute::shouldprobe (int prio, int ttl)
{
  if (ttl < int (hops.size ())
      && hops[ttl].s_addr != htonl (INADDR_ANY))
    return false;
  if (prio == 0 && oxmit_ttls[ttl])
    return false;
  return !xmit_ttls[ttl];
}

void
traceroute::timeout ()
{
  tmo = NULL;
#if NETPATH_VERBOSE
  verbose.fmt ("=== TIMEOUT %d ===\n", ntmo + 1);
#endif /* NETPATH_VERBOSE */
  assert (ntmo < 5);
  if (ntmo++ > 3) {
    finish ();
    return;
  }
  if (ntmo > 1 && hops_found == tmo_lastfound
      && hops_max > 0 && hops_total == -1) {
    /* If there's a firewall, we'll never get a port unreachable
     * message, and hence never know hops_total.  In such cases, we
     * can only terminate through timeouts, and so want to timeout
     * more quickly.  We use the heuristic that if we've probed five
     * hops out and not found anything, we timeout quickly.
     */
    int highprobe = maxhops;
    if (hops_req > 0 && hops_req < highprobe)
      highprobe = hops_req;
    highprobe = min (highprobe, hops_max + maxprobes);
    bool fastquit = true;
    for (int i = hops_max + 1; i <= highprobe; i++)
      if (!xmit_ttls[i])
      fastquit = false;
    if (fastquit) {
      finish ();
      return;
    }
  }
  tmo_lastfound = hops_found;
  oxmit_ttls = xmit_ttls;
  xmit_ttls.setrange (0, xmit_ttls.size (), 0);
  xmit_count = 0;
  xmit ();
}

inline void
tmoval (timespec *ts, int ntmo)
{
  ts->tv_sec = 0;
  ts->tv_nsec = 0;
  switch (ntmo) {
  case 0:
    ts->tv_nsec = 100000000;
    break;
  case 1:
    ts->tv_nsec = 150000000;
    break;
  default:
    ts->tv_nsec = 250000000;
    break;
  }
}

void
traceroute::proberange (int start, int low, int high)
{
  assert (start >= 1);
  assert (low >= 1);
  assert (high <= maxhops);
  assert (hops_total == -1 || high <= hops_total);
  if (high && start > high)
    start = high;

  for (int prio = 0; xmit_count < maxprobes && prio <= 1; prio++) {
    for (int ttl = start; ttl >= low && xmit_count < maxprobes; ttl--)
      if (shouldprobe (prio, ttl))
      probe (ttl);

    for (int ttl = start; ttl <= high && xmit_count < maxprobes; ttl++)
      if (shouldprobe (prio, ttl))
      probe (ttl);
  }
}

void
traceroute::xmit ()
{
  if (hops_req > 0 && hops_total > 0)
    proberange (min (hops_req, hops_total), 1, 0);
  else if (hops_req > 0)
    proberange (hops_req, 1, 0);
  else if (hops_total > 0) {
    if (!hops_req || hops_total + hops_req < 0)
      proberange (hops_total, 1, 0);
    else
      proberange (hops_total, hops_total + hops_req + 1, 0);
  }
  else if (hops_max) {
    if (!hops_req || hops_max + hops_req < 0)
      proberange (hops_max + maxprobes/2, 1, maxhops);
    else
      proberange (hops_max + maxprobes/2, hops_max + hops_req + 1, maxhops);
  }
  else if (ntmo)
    proberange (maxprobes, 1, maxhops);
  else {
    probe (maxhops);
    if (maxhops > 16)
      probe (16);
    if (maxhops > 10)
      probe (10);
  }

  if (!xmit_count) {
    finish ();
    return;
  }

  if (!tmo) {
    timespec ts;
    tmoval (&ts, ntmo);
    tmo = delaycb (ts.tv_sec, ts.tv_nsec, wrap (this, &traceroute::timeout));
  }
}

void
traceroute::getpkt (int hopno, in_addr addr, bool last)
{
#if NETPATH_VERBOSE
  verbose.fmt ("%d <- getpkt % 2d%s <- ", xmit_count,
             hopno, last ? "*" : " ") << inet_ntoa (addr) << "\n";
#endif /* NETPATH_VERBOSE */

  if (hopno < 1 || hopno > maxhops || (hops_total > 0 && hopno > hops_total))
      return;
  if (xmit_count > 0 && xmit_ttls[hopno]) {
    xmit_count--;
    xmit_ttls[hopno] = false;
  }

  if (hopno > hops_max) {
    if (last) {
      hops_total = hopno;
      xmit_count = 0;
      for (int i = 0; i < hops_total; i++)
      if (xmit_ttls[i])
        xmit_count++;
    }
    hops_max = hopno;
    hops.setsize (hopno + 1);
  }

  if (!hops[hopno].s_addr) {
    hops[hopno].s_addr = addr.s_addr;
    hops_found++;
  }

  xmit ();
}

void
traceroute::rcb (icmpsock::icmp_info *ii)
{
  // int hopno = ntohs (ii->udphp->uh_ulen) - sizeof (struct udphdr);
  int hopno;
  if (use_dstport)
    hopno = ntohs (ii->udphp->uh_dport) - baseport;
  else
    hopno = ntohs (ii->udphp->uh_sum);

  /* FreeBSD for some reason zeroes out the checksum of a returned UDP
   * packet inside an ICMP packet.  Thus, we search for the packet
   * (from the end, since this is most likely to be the end node). */
  if (!use_dstport && !hopno && ii->iphp->ip_id) {
    hopno = ntohs (ii->udphp->uh_ulen) - sizeof (struct udphdr) - 2;
    if (hopno < 0 || hopno > 7) {
      warn ("bad traceroute UDP len %d\n", ntohs (ii->udphp->uh_ulen));
      return;
    }
    hopno += ((maxhops + 0xf) & ~0xf);
    while (hopno > maxhops)
      hopno -= 0x10;
    u_int16_t id = ntohs (ii->iphp->ip_id);
    while (hopno > 0 && ids[hopno] != id)
      hopno -= 0x10;
    if (hopno <= 0) {
      warn ("bad traceroute IP id for %s\n", inet_ntoa (dest.sin_addr));
      return;
    }
  }

#if 0
  warn ("(%d/%d) %d %s (found %d)\n", ii->type, ii->code, hopno,
      inet_ntoa (ii->pkthdrp->ip_src), hops_found);
#endif
  if (hopno < 1 || hopno > maxhops) {
    warn ("bad traceroute hopno %d for %s\n", hopno,
        inet_ntoa (dest.sin_addr));
    return;
  }
  if (ntohs (ii->iphp->ip_id) != ids[hopno]) {
    warn ("bad traceroute IP id for %s (hop %d)\n",
        inet_ntoa (dest.sin_addr), hopno);
    return;
  }

  if (ii->type == ICMP_UNREACH) {
    hopno = hopno - ii->iphp->ip_ttl + 1;
    if (hopno < 1 || hopno > maxhops) {
      warn ("bad traceroute unreach for %s (hopno %d)\n",
          inet_ntoa (dest.sin_addr), hopno);
      return;
    }
    getpkt (hopno, ii->pkthdrp->ip_src, true);
  }
  else if (ii->type == ICMP_TIMXCEED && ii->code == ICMP_TIMXCEED_INTRANS)
    getpkt (hopno, ii->pkthdrp->ip_src, false);
}

void
traceroute::fail ()
{
  while (!cbvec.empty ())
    (*cbvec.pop_front ()) (-1, NULL, -1);
  delete this;
}

void
traceroute::finish ()
{
#if NETPATH_VERBOSE
  warnx << verbose;
#endif /* NETPATH_VERBOSE */

  int first = 1;
  int last = hops_total > 0 ? hops_total : hops_max;
  if (last <= 0) {
    fail ();
    return;
  }
  if (hops_req > 0)
    last = min (hops_req, last);
  if (hops_req < 0)
    first = max (1, last + hops_req + 1);

  while (!cbvec.empty ())
    (*cbvec.pop_front ()) (hops_total, hops.base () + first, last - first + 1);
  delete this;
}

static ifchgcb_t *rebind;
static ptr<icmpsock> is;

void
netpath_reset ()
{
  is = NULL;
}

traceroute *
netpath (const sockaddr_in *destp, int hops, netpathcb_t cb,
       const sockaddr_in *srcp)
{
  if (!is) {
    sockaddr_in source;
    bzero (&source, sizeof (source));
    source.sin_family = AF_INET;

    is = New refcounted<icmpsock>;
    if (!is->init (&source)) {
      if (!is->init_plab (&source)) {
      warn ("could not initialize RAW sockets for netpath\n");
      is = NULL;
      (*cb) (-1, NULL, -1);
      return NULL;
      }
      warn ("using planet-lab protected raw sockets\n");
    }
    if (!rebind)
      rebind = ifchgcb (wrap (netpath_reset));
  }

  return New traceroute (is, destp, hops, cb, srcp);
}

void
netpath_cancel (traceroute *trp)
{
  delete trp;
}

void
netpath_addcb (traceroute *trp, netpathcb_t cb)
{
  trp->addcb (cb);
}

static int respending;
static void
result (str name, in_addr addr, int nhops, in_addr *av, int n)
{
  aout << strbuf ("dest %s (%s)\n", name.cstr (), inet_ntoa (addr));
  if (nhops > 0)
    aout << strbuf ("%d hops total\n", nhops);
  if (n > 0)
    for (int i = 0; i < n; i++)
      if (nhops <= 0)
      aout << strbuf ("% 2d ==> %s\n", i + 1, inet_ntoa (av[i]));
      else
      aout << strbuf ("% 2d ==> %s\n", nhops + i - n + 1, inet_ntoa (av[i]));
  else
    aout << "failed\n";
  if (--respending <= 0)
    exit (0);
}

static void netpath_usage () __attribute__ ((noreturn));
static void
netpath_usage ()
{
  warnx << "usage: " << progname << " --netpath host [nhops]\n";
  exit (1);
}

void
netpath_test (int argc, char **argv)
{
  argc++;
  argv--;

  if (argc != 2 && argc != 3)
    netpath_usage ();
  int nhops = 0;
  if (argc == 3)
    nhops = atoi (argv[2]);

  struct hostent *hp = gethostbyname (argv[1]);
  if (!hp)
    fatal << argv[1] << ": no such host\n";

  sockaddr_in to;
  bzero (&to, sizeof (to));
  to.sin_family = AF_INET;
  to.sin_addr = *(in_addr *) hp->h_addr;
  
  respending = 1;
  netpath (&to, nhops, wrap (result, argv[1], to.sin_addr));
}

Generated by  Doxygen 1.6.0   Back to index