C# Command-Line Instructions Help

Page 1 of 1

0 Replies - 623 Views - Last Post: 17 April 2009 - 05:10 AM Rate Topic: -----

#1 Glaive  Icon User is offline

  • New D.I.C Head

Reputation: 0
  • View blog
  • Posts: 9
  • Joined: 17-April 09

C# Command-Line Instructions Help

Posted 17 April 2009 - 05:10 AM

Hi all,

I've been trying to get some ideas about a project I am doing for my course, and have decided to create a web crawler. Whilst searching for examples I came across this sample program and it is very similar to what I want to do, so I thought I'd try it out to see what it does and what it does not do, so that I get an idea of what features to have in my own program. The problem is, although it comes with brief instructions at the top of the program, I have no idea how to use this program. I am using Visual Studio 2008 and when I start debugging it, it seems to debug fine with no errors and a console screen flashes quickly across the screen and then I am back looking at the code window.

Thanks in advance.

/* MiniCrawler: A skeletal Web crawler.
Usage:
To start crawling, specify a starting
URI on the command line. For example,
to start at McGraw-Hill.com, use this
command line:
MiniCrawler http://McGraw-Hill.com
*/

using System.Collections.Generic;
using System.Linq;
using System.Text;
using System;
using System.Net;
using System.IO;
class MiniCrawler
{
	// Find a link in a content string.
	static string FindLink(string htmlstr,
	ref int startloc)
	{
		int i;
		int start, end;
		string uri = null;
		string lowcasestr = htmlstr.ToLower();
		i = lowcasestr.IndexOf("href=\"http", startloc);
		if (i != -1)
		{
			start = htmlstr.IndexOf('"', i) + 1;
			end = htmlstr.IndexOf('"', start);
			uri = htmlstr.Substring(start, end - start);
			startloc = end;
		}
		return uri;
	}
	static void Main(string[] args)
	{
		string link = null;
		string str;
		string answer;
		int curloc; // holds current location in response
		if (args.Length != 1)
		{
			Console.WriteLine("Usage: MiniCrawler <uri>");
			return;
		}
		string uristr = args[0]; // holds current URI
		try
		{
			do
			{
				Console.WriteLine("Linking to " + uristr);
				// Create a WebRequest to the specified URI.
				HttpWebRequest req = (HttpWebRequest)
				WebRequest.Create(uristr);
				uristr = null; // disallow further use of this URI
				// Send that request and return the response.
				HttpWebResponse resp = (HttpWebResponse)
				req.GetResponse();
				// From the response, obtain an input stream.
				Stream istrm = resp.GetResponseStream();
				// Wrap the input stream in a StreamReader.
				StreamReader rdr = new StreamReader(istrm);
				// Read in the entire page.
				str = rdr.ReadToEnd();
				curloc = 0;
				do
				{
					// Find the next URI to link to.
					link = FindLink(str, ref curloc);
					if (link != null)
					{
						Console.WriteLine("Link found: " + link);
						Console.Write("Link, More, Quit?");
						answer = Console.ReadLine();
						if (string.Compare(answer, "L", true) == 0)
						{
							uristr = string.Copy(link);
							break;
						}
						else if (string.Compare(answer, "Q", true) == 0)
						{
							break;
						}
						else if (string.Compare(answer, "M", true) == 0)
						{
							Console.WriteLine("Searching for another link.");
						}
					}
					else
					{
						Console.WriteLine("No link found.");
						break;
					}
				} while (link.Length > 0);
				// Close the response.
				resp.Close();
			} while (uristr != null);
		}
		catch (WebException exc)
		{
			Console.WriteLine("Network Error: " + exc.Message +
			"\nStatus code: " + exc.Status);
		}
		catch (ProtocolViolationException exc)
		{
			Console.WriteLine("Protocol Error: " + exc.Message);
		}
		catch (UriFormatException exc)
		{
			Console.WriteLine("URI Format Error: " + exc.Message);
		}
		catch (NotSupportedException exc)
		{
			Console.WriteLine("Unknown Protocol: " + exc.Message);
		}
		catch (IOException exc)
		{
			Console.WriteLine("I/O Error: " + exc.Message);
		}
		Console.WriteLine("Terminating MiniCrawler.");
	}
}



Is This A Good Question/Topic? 0
  • +

Page 1 of 1