hexchat/src/common/url.c

/* X-Chat
 * Copyright (C) 1998 Peter Zelezny.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "hexchat.h"
#include "hexchatc.h"
#include "cfgfiles.h"
#include "fe.h"
#include "tree.h"
#include "url.h"
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif

void *url_tree = NULL;
GTree *url_btree = NULL;


static int
url_free (char *url, void *data)
{
	free (url);
	return TRUE;
}

void
url_clear (void)
{
	tree_foreach (url_tree, (tree_traverse_func *)url_free, NULL);
	tree_destroy (url_tree);
	url_tree = NULL;
	g_tree_destroy (url_btree);
	url_btree = NULL;
}

static int
url_save_cb (char *url, FILE *fd)
{
	fprintf (fd, "%s\n", url);
	return TRUE;
}

void
url_save_tree (const char *fname, const char *mode, gboolean fullpath)
{
	FILE *fd;

	if (fullpath)
		fd = hexchat_fopen_file (fname, mode, XOF_FULLPATH);
	else
		fd = hexchat_fopen_file (fname, mode, 0);
	if (fd == NULL)
		return;

	tree_foreach (url_tree, (tree_traverse_func *)url_save_cb, fd);
	fclose (fd);
}

static void
url_save_node (char* url)
{
	FILE *fd;

	/* open <config>/url.log in append mode */
	fd = hexchat_fopen_file ("url.log", "a", 0);
	if (fd == NULL)
	{
		return;
	}

	fprintf (fd, "%s\n", url);
	fclose (fd);	
}

static int
url_find (char *urltext)
{
	return (g_tree_lookup_extended (url_btree, urltext, NULL, NULL));
}

static void
url_add (char *urltext, int len)
{
	char *data;
	int size;

	/* we don't need any URLs if we have neither URL grabbing nor URL logging enabled */
	if (!prefs.hex_url_grabber && !prefs.hex_url_logging)
	{
		return;
	}

	data = malloc (len + 1);
	if (!data)
	{
		return;
	}
	memcpy (data, urltext, len);
	data[len] = 0;

	if (data[len - 1] == '.')	/* chop trailing dot */
	{
		len--;
		data[len] = 0;
	}
	/* chop trailing ) but only if there's no counterpart */
	if (data[len - 1] == ')' && strchr (data, '(') == NULL)
	{
		data[len - 1] = 0;
	}

	if (prefs.hex_url_logging)
	{
		url_save_node (data);
	}

	/* the URL is saved already, only continue if we need the URL grabber too */
	if (!prefs.hex_url_grabber)
	{
		free (data);
		return;
	}

	if (!url_tree)
	{
		url_tree = tree_new ((tree_cmp_func *)strcasecmp, NULL);
		url_btree = g_tree_new ((GCompareFunc)strcasecmp);
	}

	if (url_find (data))
	{
		free (data);
		return;
	}

	size = tree_size (url_tree);
	/* 0 is unlimited */
	if (prefs.hex_url_grabber_limit > 0 && size >= prefs.hex_url_grabber_limit)
	{
		/* the loop is necessary to handle having the limit lowered while
		   HexChat is running */
		size -= prefs.hex_url_grabber_limit;
		for(; size > 0; size--)
		{
			char *pos;

			pos = tree_remove_at_pos (url_tree, 0);
			g_tree_remove (url_btree, pos);
			free (pos);
		}
	}

	tree_append (url_tree, data);
	g_tree_insert (url_btree, data, GINT_TO_POINTER (tree_size (url_tree) - 1));
	fe_url_add (data);
}

/* check if a word is clickable. This is called on mouse motion events, so
   keep it FAST! This new version was found to be almost 3x faster than
   2.4.4 release. */

int
url_check_word (const char *word, int len)
{
#define D(x) (x), ((sizeof (x)) - 1)
	static const struct {
		const char *s;
		int len;
	}
	prefix[] = {
		{ D("irc.") },
		{ D("ftp.") },
		{ D("www.") },
		{ D("irc://") },
		{ D("ftp://") },
		{ D("http://") },
		{ D("https://") },
		{ D("file://") },
		{ D("rtsp://") },
		{ D("ut2004://") },
	},
	suffix[] = {
		{ D(".org") },
		{ D(".net") },
		{ D(".com") },
		{ D(".edu") },
		{ D(".html") },
		{ D(".info") },
		{ D(".name") },
		/* Some extra common suffixes.
		foo.blah/baz.php etc should work now, rather than
		needing  http:// at the beginning. */
		{ D(".php") },
		{ D(".htm") },
		{ D(".aero") },
		{ D(".asia") },
		{ D(".biz") },
		{ D(".cat") },
		{ D(".coop") },
		{ D(".int") },
		{ D(".jobs") },
		{ D(".mobi") },
		{ D(".museum") },
		{ D(".pro") },
		{ D(".tel") },
		{ D(".travel") },
		{ D(".xxx") },
		{ D(".asp") },
		{ D(".aspx") },
		{ D(".shtml") },
		{ D(".xml") },
	};
#undef D
	const char *at, *dot;
	int i, dots;

	/* this is pretty much the same as in logmask_is_fullpath() except with length checks and .\ for portable mode */
#ifdef WIN32
	if ((len > 1 && word[0] == '\\') ||
		(len > 2 && word[0] == '.' && word[1] == '\\') ||
		(len > 2 && (((word[0] >= 'A' && word[0] <= 'Z') || (word[0] >= 'a' && word[0] <= 'z')) && word[1] == ':')))
#else
	if (len > 1 && word[0] == '/')
#endif
	{
		return WORD_PATH;
	}

	if (len > 1 && word[1] == '#' && strchr("@+^%*#", word[0]))
		return WORD_CHANNEL;

	if ((word[0] == '#' || word[0] == '&') && word[1] != '#' && word[1] != 0)
		return WORD_CHANNEL;

	for (i = 0; i < G_N_ELEMENTS(prefix); i++)
	{
		int l;

		l = prefix[i].len;
		if (len > l)
		{
			int j;

			/* This is pretty much g_ascii_strncasecmp(). */
			for (j = 0; j < l; j++)
			{
				unsigned char c = word[j];
				if (tolower(c) != prefix[i].s[j])
					break;
			}
			if (j == l)
				return WORD_URL;
		}
	}

	at = strchr (word, '@');	  /* check for email addy */
	dot = strrchr (word, '.');
	if (at && dot)
	{
		if (at < dot)
		{
			if (strchr (word, '*'))
				return WORD_HOST;
			else
				return WORD_EMAIL;
		}
	}
 
	/* check if it's an IP number */
	dots = 0;
	for (i = 0; i < len; i++)
	{
		if (word[i] == '.' && i > 0)
			dots++;	/* allow 127.0.0.1:80 */
		else if (!isdigit ((unsigned char) word[i]) && word[i] != ':')
		{
			dots = 0;
			break;
		}
	}
	if (dots == 3)
		return WORD_HOST;

	if (len > 5)
	{
		for (i = 0; i < G_N_ELEMENTS(suffix); i++)
		{
			int l;

			l = suffix[i].len;
			if (len > l)
			{
				const unsigned char *p = &word[len - l];
				int j;

				/* This is pretty much g_ascii_strncasecmp(). */
				for (j = 0; j < l; j++)
				{
					if (tolower(p[j]) != suffix[i].s[j])
						break;
				}
				if (j == l)
					return WORD_HOST;
			}
		}

		if (word[len - 3] == '.' &&
			 isalpha ((unsigned char) word[len - 2]) &&
				isalpha ((unsigned char) word[len - 1]))
			return WORD_HOST;
	}

	return 0;
}

/* List of IRC commands for which contents (and thus possible URLs)
 * are visible to the user.  NOTE:  Trailing blank required in each. */
static char *commands[] = {
	"NOTICE ",
	"PRIVMSG ",
	"TOPIC ",
	"332 ",		/* RPL_TOPIC */
	"372 "		/* RPL_MOTD */
};

#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))

void
url_check_line (char *buf, int len)
{
	char *po = buf;
	char *start;
	int i, wlen;

	/* Skip over message prefix */
	if (*po == ':')
	{
		po = strchr (po, ' ');
		if (!po)
			return;
		po++;
	}
	/* Allow only commands from the above list */
	for (i = 0; i < ARRAY_SIZE (commands); i++)
	{
		char *cmd = commands[i];
		int len = strlen (cmd);

		if (strncmp (cmd, po, len) == 0)
		{
			po += len;
			break;
		}
	}
	if (i == ARRAY_SIZE (commands))
		return;

	/* Skip past the channel name or user nick */
	po = strchr (po, ' ');
	if (!po)
		return;
	po++;

	if (buf[0] == ':' && buf[1] != 0)
		po++;

	start = po;

	/* check each "word" (space separated) */
	while (1)
	{
		switch (po[0])
		{
		case 0:
		case ' ':
		case '\r':

			wlen = po - start;
			if (wlen > 2)
			{
				/* HACK! :( */
				/* This is to work around not being able to detect URLs that are at
				   the start of messages. */
				if (start[0] == ':')
				{
					start++;
					wlen--;
				}
				if (start[0] == '+' || start[0] == '-')
				{
					start++;
					wlen--;
				}

				if (wlen > 2 && url_check_word (start, wlen) == WORD_URL)
				{
					url_add (start, wlen);
				}
			}
			if (po[0] == 0)
				return;
			po++;
			start = po;
			break;

		default:
			po++;
		}
	}
}
add xchat r1489 2011-02-24 06:14:30 +03:00			`/* X-Chat`
			`* Copyright (C) 1998 Peter Zelezny.`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation; either version 2 of the License, or`
			`* (at your option) any later version.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
			`* along with this program; if not, write to the Free Software`
fix incorrect FSF address 2012-12-23 23:36:54 +04:00			`* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA`
add xchat r1489 2011-02-24 06:14:30 +03:00			`*/`

			`#include <stdio.h>`
			`#include <stdlib.h>`
			`#include <string.h>`
			`#include <ctype.h>`
A lot more rebranding 2012-10-24 23:33:02 +04:00			`#include "hexchat.h"`
			`#include "hexchatc.h"`
add xchat r1489 2011-02-24 06:14:30 +03:00			`#include "cfgfiles.h"`
			`#include "fe.h"`
			`#include "tree.h"`
			`#include "url.h"`
			`#ifdef HAVE_STRINGS_H`
			`#include <strings.h>`
			`#endif`

			`void *url_tree = NULL;`
Fix memory leak related to url grabbing 2012-10-01 23:53:25 +04:00			`GTree *url_btree = NULL;`
add xchat r1489 2011-02-24 06:14:30 +03:00

			`static int`
			`url_free (char url, void data)`
			`{`
			`free (url);`
			`return TRUE;`
			`}`

			`void`
			`url_clear (void)`
			`{`
			`tree_foreach (url_tree, (tree_traverse_func *)url_free, NULL);`
			`tree_destroy (url_tree);`
			`url_tree = NULL;`
Fix memory leak related to url grabbing 2012-10-01 23:53:25 +04:00			`g_tree_destroy (url_btree);`
			`url_btree = NULL;`
add xchat r1489 2011-02-24 06:14:30 +03:00			`}`

			`static int`
			`url_save_cb (char url, FILE fd)`
			`{`
			`fprintf (fd, "%s\n", url);`
			`return TRUE;`
			`}`

			`void`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 12:03:39 +04:00			`url_save_tree (const char fname, const char mode, gboolean fullpath)`
add xchat r1489 2011-02-24 06:14:30 +03:00			`{`
			`FILE *fd;`

			`if (fullpath)`
Nah, even more rebranding 2012-10-30 14:35:39 +04:00			`fd = hexchat_fopen_file (fname, mode, XOF_FULLPATH);`
add xchat r1489 2011-02-24 06:14:30 +03:00			`else`
Nah, even more rebranding 2012-10-30 14:35:39 +04:00			`fd = hexchat_fopen_file (fname, mode, 0);`
add xchat r1489 2011-02-24 06:14:30 +03:00			`if (fd == NULL)`
			`return;`

			`tree_foreach (url_tree, (tree_traverse_func *)url_save_cb, fd);`
			`fclose (fd);`
			`}`

Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 12:03:39 +04:00			`static void`
			`url_save_node (char* url)`
add xchat r1489 2011-02-24 06:14:30 +03:00			`{`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 12:03:39 +04:00			`FILE *fd;`

			`/* open <config>/url.log in append mode */`
Nah, even more rebranding 2012-10-30 14:35:39 +04:00			`fd = hexchat_fopen_file ("url.log", "a", 0);`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 12:03:39 +04:00			`if (fd == NULL)`
			`{`
			`return;`
			`}`

			`fprintf (fd, "%s\n", url);`
			`fclose (fd);`
add xchat r1489 2011-02-24 06:14:30 +03:00			`}`

			`static int`
			`url_find (char *urltext)`
			`{`
Fix memory leak related to url grabbing 2012-10-01 23:53:25 +04:00			`return (g_tree_lookup_extended (url_btree, urltext, NULL, NULL));`
add xchat r1489 2011-02-24 06:14:30 +03:00			`}`

			`static void`
			`url_add (char *urltext, int len)`
			`{`
update xchat to r1503 2012-03-16 02:58:52 +04:00			`char *data;`
			`int size;`

Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 12:03:39 +04:00			`/* we don't need any URLs if we have neither URL grabbing nor URL logging enabled */`
Variable cleanup for the rest 2012-10-22 17:55:43 +04:00			`if (!prefs.hex_url_grabber && !prefs.hex_url_logging)`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 12:03:39 +04:00			`{`
update xchat to r1503 2012-03-16 02:58:52 +04:00			`return;`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 12:03:39 +04:00			`}`
update xchat to r1503 2012-03-16 02:58:52 +04:00
			`data = malloc (len + 1);`
add xchat r1489 2011-02-24 06:14:30 +03:00			`if (!data)`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 12:03:39 +04:00			`{`
add xchat r1489 2011-02-24 06:14:30 +03:00			`return;`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 12:03:39 +04:00			`}`
add xchat r1489 2011-02-24 06:14:30 +03:00			`memcpy (data, urltext, len);`
			`data[len] = 0;`

			`if (data[len - 1] == '.') /* chop trailing dot */`
			`{`
			`len--;`
			`data[len] = 0;`
			`}`
Fix Wikipedia URL detection - URLs inside parentheses won't work 2012-10-07 15:27:44 +04:00			`/* chop trailing ) but only if there's no counterpart */`
			`if (data[len - 1] == ')' && strchr (data, '(') == NULL)`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 12:03:39 +04:00			`{`
add xchat r1489 2011-02-24 06:14:30 +03:00			`data[len - 1] = 0;`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 12:03:39 +04:00			`}`

Variable cleanup for the rest 2012-10-22 17:55:43 +04:00			`if (prefs.hex_url_logging)`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 12:03:39 +04:00			`{`
			`url_save_node (data);`
			`}`

			`/* the URL is saved already, only continue if we need the URL grabber too */`
Variable cleanup for the rest 2012-10-22 17:55:43 +04:00			`if (!prefs.hex_url_grabber)`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 12:03:39 +04:00			`{`
			`free (data);`
			`return;`
			`}`
add xchat r1489 2011-02-24 06:14:30 +03:00
Fix memory leak related to url grabbing 2012-10-01 23:53:25 +04:00			`if (!url_tree)`
			`{`
			`url_tree = tree_new ((tree_cmp_func *)strcasecmp, NULL);`
			`url_btree = g_tree_new ((GCompareFunc)strcasecmp);`
			`}`

add xchat r1489 2011-02-24 06:14:30 +03:00			`if (url_find (data))`
			`{`
			`free (data);`
			`return;`
			`}`

update xchat to r1503 2012-03-16 02:58:52 +04:00			`size = tree_size (url_tree);`
			`/* 0 is unlimited */`
Variable cleanup for the rest 2012-10-22 17:55:43 +04:00			`if (prefs.hex_url_grabber_limit > 0 && size >= prefs.hex_url_grabber_limit)`
update xchat to r1503 2012-03-16 02:58:52 +04:00			`{`
			`/* the loop is necessary to handle having the limit lowered while`
Nah, even more rebranding 2012-10-30 14:35:39 +04:00			`HexChat is running */`
Variable cleanup for the rest 2012-10-22 17:55:43 +04:00			`size -= prefs.hex_url_grabber_limit;`
update xchat to r1503 2012-03-16 02:58:52 +04:00			`for(; size > 0; size--)`
Fix memory leak related to url grabbing 2012-10-01 23:53:25 +04:00			`{`
			`char *pos;`

			`pos = tree_remove_at_pos (url_tree, 0);`
			`g_tree_remove (url_btree, pos);`
			`free (pos);`
			`}`
update xchat to r1503 2012-03-16 02:58:52 +04:00			`}`

			`tree_append (url_tree, data);`
Fix memory leak related to url grabbing 2012-10-01 23:53:25 +04:00			`g_tree_insert (url_btree, data, GINT_TO_POINTER (tree_size (url_tree) - 1));`
add xchat r1489 2011-02-24 06:14:30 +03:00			`fe_url_add (data);`
			`}`

			`/* check if a word is clickable. This is called on mouse motion events, so`
			`keep it FAST! This new version was found to be almost 3x faster than`
			`2.4.4 release. */`

			`int`
Eliminate some more warnings 2012-10-31 00:25:58 +04:00			`url_check_word (const char *word, int len)`
add xchat r1489 2011-02-24 06:14:30 +03:00			`{`
			`#define D(x) (x), ((sizeof (x)) - 1)`
			`static const struct {`
			`const char *s;`
			`int len;`
			`}`
			`prefix[] = {`
			`{ D("irc.") },`
			`{ D("ftp.") },`
			`{ D("www.") },`
			`{ D("irc://") },`
			`{ D("ftp://") },`
			`{ D("http://") },`
			`{ D("https://") },`
			`{ D("file://") },`
			`{ D("rtsp://") },`
			`{ D("ut2004://") },`
			`},`
			`suffix[] = {`
			`{ D(".org") },`
			`{ D(".net") },`
			`{ D(".com") },`
			`{ D(".edu") },`
			`{ D(".html") },`
			`{ D(".info") },`
			`{ D(".name") },`
Improve URL recognition (sacarasc) 2012-07-12 01:16:20 +04:00			`/* Some extra common suffixes.`
			`foo.blah/baz.php etc should work now, rather than`
			`needing http:// at the beginning. */`
			`{ D(".php") },`
			`{ D(".htm") },`
			`{ D(".aero") },`
			`{ D(".asia") },`
			`{ D(".biz") },`
			`{ D(".cat") },`
			`{ D(".coop") },`
			`{ D(".int") },`
			`{ D(".jobs") },`
			`{ D(".mobi") },`
			`{ D(".museum") },`
			`{ D(".pro") },`
			`{ D(".tel") },`
			`{ D(".travel") },`
			`{ D(".xxx") },`
			`{ D(".asp") },`
			`{ D(".aspx") },`
			`{ D(".shtml") },`
			`{ D(".xml") },`
add xchat r1489 2011-02-24 06:14:30 +03:00			`};`
			`#undef D`
			`const char at, dot;`
			`int i, dots;`

Fix opening folders in portable mode 2012-10-13 12:24:43 +04:00			`/* this is pretty much the same as in logmask_is_fullpath() except with length checks and .\ for portable mode */`
Fix URL detection 2012-10-07 05:00:52 +04:00			`#ifdef WIN32`
Fix opening folders in portable mode 2012-10-13 12:24:43 +04:00			`if ((len > 1 && word[0] == '\\') \|\|`
			`(len > 2 && word[0] == '.' && word[1] == '\\') \|\|`
			`(len > 2 && (((word[0] >= 'A' && word[0] <= 'Z') \|\| (word[0] >= 'a' && word[0] <= 'z')) && word[1] == ':')))`
Fix URL detection 2012-10-07 05:00:52 +04:00			`#else`
Fix "Fix URL detection". First-character test in linux should be for equal, rather than not-equal. If first character is a slash return WORD_PATH. 2012-10-08 23:20:11 +04:00			`if (len > 1 && word[0] == '/')`
Fix URL detection 2012-10-07 05:00:52 +04:00			`#endif`
			`{`
			`return WORD_PATH;`
			`}`

add xchat r1489 2011-02-24 06:14:30 +03:00			`if (len > 1 && word[1] == '#' && strchr("@+^%*#", word[0]))`
			`return WORD_CHANNEL;`

			`if ((word[0] == '#' \|\| word[0] == '&') && word[1] != '#' && word[1] != 0)`
			`return WORD_CHANNEL;`

			`for (i = 0; i < G_N_ELEMENTS(prefix); i++)`
			`{`
			`int l;`

			`l = prefix[i].len;`
			`if (len > l)`
			`{`
			`int j;`

Fix string handling for Turkish locale 2012-06-16 15:01:47 +04:00			`/* This is pretty much g_ascii_strncasecmp(). */`
add xchat r1489 2011-02-24 06:14:30 +03:00			`for (j = 0; j < l; j++)`
			`{`
			`unsigned char c = word[j];`
			`if (tolower(c) != prefix[i].s[j])`
			`break;`
			`}`
			`if (j == l)`
			`return WORD_URL;`
			`}`
			`}`

			`at = strchr (word, '@'); /* check for email addy */`
			`dot = strrchr (word, '.');`
			`if (at && dot)`
			`{`
			`if (at < dot)`
			`{`
			`if (strchr (word, '*'))`
			`return WORD_HOST;`
			`else`
			`return WORD_EMAIL;`
			`}`
			`}`

			`/* check if it's an IP number */`
			`dots = 0;`
			`for (i = 0; i < len; i++)`
			`{`
			`if (word[i] == '.' && i > 0)`
			`dots++; /* allow 127.0.0.1:80 */`
			`else if (!isdigit ((unsigned char) word[i]) && word[i] != ':')`
			`{`
			`dots = 0;`
			`break;`
			`}`
			`}`
			`if (dots == 3)`
			`return WORD_HOST;`

			`if (len > 5)`
			`{`
			`for (i = 0; i < G_N_ELEMENTS(suffix); i++)`
			`{`
			`int l;`

			`l = suffix[i].len;`
			`if (len > l)`
			`{`
			`const unsigned char *p = &word[len - l];`
			`int j;`

Fix string handling for Turkish locale 2012-06-16 15:01:47 +04:00			`/* This is pretty much g_ascii_strncasecmp(). */`
add xchat r1489 2011-02-24 06:14:30 +03:00			`for (j = 0; j < l; j++)`
			`{`
			`if (tolower(p[j]) != suffix[i].s[j])`
			`break;`
			`}`
			`if (j == l)`
			`return WORD_HOST;`
			`}`
			`}`

			`if (word[len - 3] == '.' &&`
			`isalpha ((unsigned char) word[len - 2]) &&`
			`isalpha ((unsigned char) word[len - 1]))`
			`return WORD_HOST;`
			`}`

			`return 0;`
			`}`

Limit url-grabbing to NOTICE, PRIVMSG, TOPIC, 332 (RPL_TOPIC), 372 (RPL_MOTD) 2012-11-14 00:06:35 +04:00			`/* List of IRC commands for which contents (and thus possible URLs)`
			`* are visible to the user. NOTE: Trailing blank required in each. */`
			`static char *commands[] = {`
			`"NOTICE ",`
			`"PRIVMSG ",`
			`"TOPIC ",`
			`"332 ", /* RPL_TOPIC */`
			`"372 " /* RPL_MOTD */`
			`};`

			`#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))`

add xchat r1489 2011-02-24 06:14:30 +03:00			`void`
			`url_check_line (char *buf, int len)`
			`{`
			`char *po = buf;`
			`char *start;`
Limit url-grabbing to NOTICE, PRIVMSG, TOPIC, 332 (RPL_TOPIC), 372 (RPL_MOTD) 2012-11-14 00:06:35 +04:00			`int i, wlen;`

			`/* Skip over message prefix */`
			`if (*po == ':')`
			`{`
			`po = strchr (po, ' ');`
			`if (!po)`
			`return;`
			`po++;`
			`}`
			`/* Allow only commands from the above list */`
			`for (i = 0; i < ARRAY_SIZE (commands); i++)`
			`{`
			`char *cmd = commands[i];`
			`int len = strlen (cmd);`

			`if (strncmp (cmd, po, len) == 0)`
			`{`
			`po += len;`
			`break;`
			`}`
			`}`
			`if (i == ARRAY_SIZE (commands))`
			`return;`

			`/* Skip past the channel name or user nick */`
			`po = strchr (po, ' ');`
			`if (!po)`
			`return;`
			`po++;`
add xchat r1489 2011-02-24 06:14:30 +03:00
			`if (buf[0] == ':' && buf[1] != 0)`
			`po++;`

			`start = po;`

			`/* check each "word" (space separated) */`
			`while (1)`
			`{`
			`switch (po[0])`
			`{`
			`case 0:`
			`case ' ':`
Limit url-grabbing to NOTICE, PRIVMSG, TOPIC, 332 (RPL_TOPIC), 372 (RPL_MOTD) 2012-11-14 00:06:35 +04:00			`case '\r':`
update xchat to r1503 2012-03-16 02:58:52 +04:00
add xchat r1489 2011-02-24 06:14:30 +03:00			`wlen = po - start;`
			`if (wlen > 2)`
			`{`
update xchat to r1503 2012-03-16 02:58:52 +04:00			`/* HACK! :( */`
			`/* This is to work around not being able to detect URLs that are at`
			`the start of messages. */`
			`if (start[0] == ':')`
			`{`
			`start++;`
			`wlen--;`
			`}`
			`if (start[0] == '+' \|\| start[0] == '-')`
			`{`
			`start++;`
			`wlen--;`
			`}`

			`if (wlen > 2 && url_check_word (start, wlen) == WORD_URL)`
add xchat r1489 2011-02-24 06:14:30 +03:00			`{`
			`url_add (start, wlen);`
			`}`
			`}`
			`if (po[0] == 0)`
			`return;`
			`po++;`
			`start = po;`
			`break;`

			`default:`
			`po++;`
			`}`
			`}`
			`}`