#!/usr/bin/perl

use Socket;                   # include Socket module

	$queryString = $ENV{QUERY_STRING};
	if ($queryString =~ /theURL=(.*)/) {
		$full_url =  $1;
		}
	else {
		$full_url =  "[no URL specified]";
		}

	$full_url =~ s|\%3[Aa]|:|g;
	$full_url =~ s|\%2[Ff]|/|g;
	$full_url =~ s|\%7[Ee]|\~|g;
	$full_url =~ s|\%3[Ff]|\?|g;

  print "Content-type: text/html\n\n";

  # if the URL isn't a full URL, assume that it is a http request
  $full_url="http://$full_url" if ($full_url !~ 
                                 m/(\w+):\/\/([^\/:]+)(:\d*)?([^#]*)/);

  # break up URL into meaningful parts
  my @the_url = parse_URL($full_url);
  if (!defined @the_url) {
	print <<BAD_URL;
<HTML><HEAD><TITLE>Bad URL</TITLE></HEAD>
<BODY><HR>
<FONT SIZE=+2 COLOR="#FF0000">Invalid URL specified...
<P>$full_url
</FONT>
</BODY></HTML>
BAD_URL
    exit(-1);
  }

  # we're only interested in HTTP URL's
  if ($the_url[0] !~ m/http/i) {
	print <<BAD_HTML;
<HTML><HEAD><TITLE>Bad URL</TITLE></HEAD>
<BODY><HR>
<FONT SIZE=+2 COLOR="#FF0000">Only http protocol is valid here...
<P>$full_url
</FONT>
</BODY></HTML>
BAD_HTML
    exit(-1);
	}

  # connect to server specified in 1st parameter
  if (!defined open_TCP('F', $the_url[1], $the_url[2])) {
	print <<NO_SERVER;
<HTML><HEAD><TITLE>Server error</TITLE></HEAD>
<BODY><HR>
<FONT SIZE=+2 COLOR="#FF0000">Error connecting to web server: $the_url[1]<BR>
</FONT>
</BODY></HTML>
NO_SERVER
    exit(-1);
  }

  # request the path of the document to get
    print F "GET $the_url[3] HTTP/1.0\n";
    print F "Accept: */*\n";
    print F "User-Agent: showurl/1.0\n\n";

  # print out server's response.

	print <<PART1;
<HTML>
<HEAD>
<TITLE>
$full_url
</TITLE>
</HEAD>
<BODY BGCOLOR="#FFFFFF">
<H1 ALIGN="center">$full_url</H1>
<HR>
PART1

	print '<PRE><FONT FACE="Verdana"><FONT COLOR="#FF0000" SIZE=+2>';
	$body = 0;
	while (<F>) {
		if ($body) {
			s|<|<FONT COLOR="#0000FF".&lt;|g;
			s|>|&gt;</FONT>|g;
			s|FF"\.|FF">|g;
			}
		else {
			s|<|&lt;|g;
			s|>|&gt;|g;
			}
		print "$_";
		if (/^\s*$/) {
			if ($body == 0) {
				print "</FONT><B>";
				$body = 1;
				}
			}
		}
	print <<PART2;
</B>
</FONT>
</PRE>
</BODY>
</HTML>
PART2

  # close the network connection
  close(F);


# ----------------------------------------------------------------------
# Given ($file_handle, $dest, $port) return 1 if successful, undef when
# unsuccessful.
#
# Input: $fileHandle  is the name of the filehandle to use.
#        $dest        is the name of the destination computer,
#                     either IP address or hostname.
#        $port        is the port number.
#
# Output: successful network connection in file handle
# ----------------------------------------------------------------------

sub open_TCP
{
	# get parameters
	my ($FS, $dest, $port) = @_;

	my $proto = getprotobyname('tcp');
	socket($FS, PF_INET, SOCK_STREAM, $proto);
	return undef if (! defined(inet_aton($dest)));
	my $sin = sockaddr_in($port,inet_aton($dest));
	connect($FS,$sin) || return undef;
  
	my $old_fh = select($FS); 
	$| = 1; 		        # don't buffer output
	select($old_fh);
	1;
}


# ----------------------------------------------------------------
#  Given a full URL, return the scheme, hostname, port, and path
#  into ($scheme, $hostname, $port, $path).  We'll only deal with
#  HTTP URLs.
# ----------------------------------------------------------------

sub parse_URL {

	# put URL into variable
	my ($URL) = @_;

	# attempt to parse.  Return undef if it didn't parse.
	(my @parsed =$URL =~ m@(\w+)://([^/:]+)(:\d*)?([^#]*)@) || return undef;

	# remove colon from port number, even if it wasn't specified in the URL
	if (defined $parsed[2]) {
		$parsed[2]=~ s/^://;
		}

	# the path is "/" if one wasn't specified
	$parsed[3]='/' if ($parsed[0]=~/http/i && (length $parsed[3])==0);

	# if port number was specified, we're done
	return @parsed if (defined $parsed[2]);

	# otherwise, assume port 80, and then we're done.
	$parsed[2] = 80;
	@parsed;
}


# -----------------------------------------------------------
#  grab_urls($html_content, %tags) returns an array of links 
#  that are referenced from within html. 
# -----------------------------------------------------------

sub grab_urls {

	my($data, %tags) = @_;
	my @urls;

	# while there are HTML tags
	skip_others: while ($data =~ s/<([^>]*)>//)  {

		my $in_brackets=$1;
		my $key;

		foreach $key (keys %tags) {

			if ($in_brackets =~ /^\s*$key\s+/i) {     # if tag, try parms
		    	if ($in_brackets =~ /\s+$tags{$key}\s*=\s*"([^"]*)"/i) {
					my $link=$1;
					$link =~ s/[\n\r]//g;  # kill "\n\r" anywhere in URL
					push (@urls, $link);
					next skip_others;
					} 
				# handle case when URL isn't in quotes (ie: <a href=thing>)
				elsif ($in_brackets =~ /\s+$tags{$key}\s*=\s*([^\s]+)/i) {
					my $link=$1;
					$link =~ s/[\n\r]//g;	# kill "\n\r" anywhere in URL
					push (@urls, $link);
					next skip_others;
					}		
				}       # if tag matches
			}	        # foreach tag
		}	            # while there are brackets
	@urls;
}
