2011-02-24 06:14:30 +03:00
|
|
|
/* X-Chat
|
|
|
|
* Copyright (C) 1998 Peter Zelezny.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
2012-12-23 23:36:54 +04:00
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
2011-02-24 06:14:30 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <ctype.h>
|
2012-10-24 23:33:02 +04:00
|
|
|
#include "hexchat.h"
|
|
|
|
#include "hexchatc.h"
|
2011-02-24 06:14:30 +03:00
|
|
|
#include "cfgfiles.h"
|
|
|
|
#include "fe.h"
|
|
|
|
#include "tree.h"
|
|
|
|
#include "url.h"
|
|
|
|
#ifdef HAVE_STRINGS_H
|
|
|
|
#include <strings.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
void *url_tree = NULL;
|
2012-10-01 23:53:25 +04:00
|
|
|
GTree *url_btree = NULL;
|
2011-02-24 06:14:30 +03:00
|
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
url_free (char *url, void *data)
|
|
|
|
{
|
|
|
|
free (url);
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
url_clear (void)
|
|
|
|
{
|
|
|
|
tree_foreach (url_tree, (tree_traverse_func *)url_free, NULL);
|
|
|
|
tree_destroy (url_tree);
|
|
|
|
url_tree = NULL;
|
2012-10-01 23:53:25 +04:00
|
|
|
g_tree_destroy (url_btree);
|
|
|
|
url_btree = NULL;
|
2011-02-24 06:14:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
url_save_cb (char *url, FILE *fd)
|
|
|
|
{
|
|
|
|
fprintf (fd, "%s\n", url);
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2012-10-13 12:03:39 +04:00
|
|
|
url_save_tree (const char *fname, const char *mode, gboolean fullpath)
|
2011-02-24 06:14:30 +03:00
|
|
|
{
|
|
|
|
FILE *fd;
|
|
|
|
|
|
|
|
if (fullpath)
|
2012-10-30 14:35:39 +04:00
|
|
|
fd = hexchat_fopen_file (fname, mode, XOF_FULLPATH);
|
2011-02-24 06:14:30 +03:00
|
|
|
else
|
2012-10-30 14:35:39 +04:00
|
|
|
fd = hexchat_fopen_file (fname, mode, 0);
|
2011-02-24 06:14:30 +03:00
|
|
|
if (fd == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
tree_foreach (url_tree, (tree_traverse_func *)url_save_cb, fd);
|
|
|
|
fclose (fd);
|
|
|
|
}
|
|
|
|
|
2012-10-13 12:03:39 +04:00
|
|
|
static void
|
|
|
|
url_save_node (char* url)
|
2011-02-24 06:14:30 +03:00
|
|
|
{
|
2012-10-13 12:03:39 +04:00
|
|
|
FILE *fd;
|
|
|
|
|
|
|
|
/* open <config>/url.log in append mode */
|
2012-10-30 14:35:39 +04:00
|
|
|
fd = hexchat_fopen_file ("url.log", "a", 0);
|
2012-10-13 12:03:39 +04:00
|
|
|
if (fd == NULL)
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf (fd, "%s\n", url);
|
|
|
|
fclose (fd);
|
2011-02-24 06:14:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
url_find (char *urltext)
|
|
|
|
{
|
2012-10-01 23:53:25 +04:00
|
|
|
return (g_tree_lookup_extended (url_btree, urltext, NULL, NULL));
|
2011-02-24 06:14:30 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
url_add (char *urltext, int len)
|
|
|
|
{
|
2012-03-16 02:58:52 +04:00
|
|
|
char *data;
|
|
|
|
int size;
|
|
|
|
|
2012-10-13 12:03:39 +04:00
|
|
|
/* we don't need any URLs if we have neither URL grabbing nor URL logging enabled */
|
2012-10-22 17:55:43 +04:00
|
|
|
if (!prefs.hex_url_grabber && !prefs.hex_url_logging)
|
2012-10-13 12:03:39 +04:00
|
|
|
{
|
2012-03-16 02:58:52 +04:00
|
|
|
return;
|
2012-10-13 12:03:39 +04:00
|
|
|
}
|
2012-03-16 02:58:52 +04:00
|
|
|
|
|
|
|
data = malloc (len + 1);
|
2011-02-24 06:14:30 +03:00
|
|
|
if (!data)
|
2012-10-13 12:03:39 +04:00
|
|
|
{
|
2011-02-24 06:14:30 +03:00
|
|
|
return;
|
2012-10-13 12:03:39 +04:00
|
|
|
}
|
2011-02-24 06:14:30 +03:00
|
|
|
memcpy (data, urltext, len);
|
|
|
|
data[len] = 0;
|
|
|
|
|
|
|
|
if (data[len - 1] == '.') /* chop trailing dot */
|
|
|
|
{
|
|
|
|
len--;
|
|
|
|
data[len] = 0;
|
|
|
|
}
|
2012-10-07 15:27:44 +04:00
|
|
|
/* chop trailing ) but only if there's no counterpart */
|
|
|
|
if (data[len - 1] == ')' && strchr (data, '(') == NULL)
|
2012-10-13 12:03:39 +04:00
|
|
|
{
|
2011-02-24 06:14:30 +03:00
|
|
|
data[len - 1] = 0;
|
2012-10-13 12:03:39 +04:00
|
|
|
}
|
|
|
|
|
2012-10-22 17:55:43 +04:00
|
|
|
if (prefs.hex_url_logging)
|
2012-10-13 12:03:39 +04:00
|
|
|
{
|
|
|
|
url_save_node (data);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* the URL is saved already, only continue if we need the URL grabber too */
|
2012-10-22 17:55:43 +04:00
|
|
|
if (!prefs.hex_url_grabber)
|
2012-10-13 12:03:39 +04:00
|
|
|
{
|
|
|
|
free (data);
|
|
|
|
return;
|
|
|
|
}
|
2011-02-24 06:14:30 +03:00
|
|
|
|
2012-10-01 23:53:25 +04:00
|
|
|
if (!url_tree)
|
|
|
|
{
|
|
|
|
url_tree = tree_new ((tree_cmp_func *)strcasecmp, NULL);
|
|
|
|
url_btree = g_tree_new ((GCompareFunc)strcasecmp);
|
|
|
|
}
|
|
|
|
|
2011-02-24 06:14:30 +03:00
|
|
|
if (url_find (data))
|
|
|
|
{
|
|
|
|
free (data);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2012-03-16 02:58:52 +04:00
|
|
|
size = tree_size (url_tree);
|
|
|
|
/* 0 is unlimited */
|
2012-10-22 17:55:43 +04:00
|
|
|
if (prefs.hex_url_grabber_limit > 0 && size >= prefs.hex_url_grabber_limit)
|
2012-03-16 02:58:52 +04:00
|
|
|
{
|
|
|
|
/* the loop is necessary to handle having the limit lowered while
|
2012-10-30 14:35:39 +04:00
|
|
|
HexChat is running */
|
2012-10-22 17:55:43 +04:00
|
|
|
size -= prefs.hex_url_grabber_limit;
|
2012-03-16 02:58:52 +04:00
|
|
|
for(; size > 0; size--)
|
2012-10-01 23:53:25 +04:00
|
|
|
{
|
|
|
|
char *pos;
|
|
|
|
|
|
|
|
pos = tree_remove_at_pos (url_tree, 0);
|
|
|
|
g_tree_remove (url_btree, pos);
|
|
|
|
free (pos);
|
|
|
|
}
|
2012-03-16 02:58:52 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
tree_append (url_tree, data);
|
2012-10-01 23:53:25 +04:00
|
|
|
g_tree_insert (url_btree, data, GINT_TO_POINTER (tree_size (url_tree) - 1));
|
2011-02-24 06:14:30 +03:00
|
|
|
fe_url_add (data);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check if a word is clickable. This is called on mouse motion events, so
|
|
|
|
keep it FAST! This new version was found to be almost 3x faster than
|
|
|
|
2.4.4 release. */
|
|
|
|
|
|
|
|
int
|
2012-10-31 00:25:58 +04:00
|
|
|
url_check_word (const char *word, int len)
|
2011-02-24 06:14:30 +03:00
|
|
|
{
|
|
|
|
#define D(x) (x), ((sizeof (x)) - 1)
|
|
|
|
static const struct {
|
|
|
|
const char *s;
|
|
|
|
int len;
|
|
|
|
}
|
|
|
|
prefix[] = {
|
|
|
|
{ D("irc.") },
|
|
|
|
{ D("ftp.") },
|
|
|
|
{ D("www.") },
|
|
|
|
{ D("irc://") },
|
|
|
|
{ D("ftp://") },
|
|
|
|
{ D("http://") },
|
|
|
|
{ D("https://") },
|
|
|
|
{ D("file://") },
|
|
|
|
{ D("rtsp://") },
|
|
|
|
{ D("ut2004://") },
|
|
|
|
},
|
|
|
|
suffix[] = {
|
|
|
|
{ D(".org") },
|
|
|
|
{ D(".net") },
|
|
|
|
{ D(".com") },
|
|
|
|
{ D(".edu") },
|
|
|
|
{ D(".html") },
|
|
|
|
{ D(".info") },
|
|
|
|
{ D(".name") },
|
2012-07-12 01:16:20 +04:00
|
|
|
/* Some extra common suffixes.
|
|
|
|
foo.blah/baz.php etc should work now, rather than
|
|
|
|
needing http:// at the beginning. */
|
|
|
|
{ D(".php") },
|
|
|
|
{ D(".htm") },
|
|
|
|
{ D(".aero") },
|
|
|
|
{ D(".asia") },
|
|
|
|
{ D(".biz") },
|
|
|
|
{ D(".cat") },
|
|
|
|
{ D(".coop") },
|
|
|
|
{ D(".int") },
|
|
|
|
{ D(".jobs") },
|
|
|
|
{ D(".mobi") },
|
|
|
|
{ D(".museum") },
|
|
|
|
{ D(".pro") },
|
|
|
|
{ D(".tel") },
|
|
|
|
{ D(".travel") },
|
|
|
|
{ D(".xxx") },
|
|
|
|
{ D(".asp") },
|
|
|
|
{ D(".aspx") },
|
|
|
|
{ D(".shtml") },
|
|
|
|
{ D(".xml") },
|
2011-02-24 06:14:30 +03:00
|
|
|
};
|
|
|
|
#undef D
|
|
|
|
const char *at, *dot;
|
|
|
|
int i, dots;
|
|
|
|
|
2012-10-13 12:24:43 +04:00
|
|
|
/* this is pretty much the same as in logmask_is_fullpath() except with length checks and .\ for portable mode */
|
2012-10-07 05:00:52 +04:00
|
|
|
#ifdef WIN32
|
2012-10-13 12:24:43 +04:00
|
|
|
if ((len > 1 && word[0] == '\\') ||
|
|
|
|
(len > 2 && word[0] == '.' && word[1] == '\\') ||
|
|
|
|
(len > 2 && (((word[0] >= 'A' && word[0] <= 'Z') || (word[0] >= 'a' && word[0] <= 'z')) && word[1] == ':')))
|
2012-10-07 05:00:52 +04:00
|
|
|
#else
|
2012-10-08 23:20:11 +04:00
|
|
|
if (len > 1 && word[0] == '/')
|
2012-10-07 05:00:52 +04:00
|
|
|
#endif
|
|
|
|
{
|
|
|
|
return WORD_PATH;
|
|
|
|
}
|
|
|
|
|
2011-02-24 06:14:30 +03:00
|
|
|
if (len > 1 && word[1] == '#' && strchr("@+^%*#", word[0]))
|
|
|
|
return WORD_CHANNEL;
|
|
|
|
|
|
|
|
if ((word[0] == '#' || word[0] == '&') && word[1] != '#' && word[1] != 0)
|
|
|
|
return WORD_CHANNEL;
|
|
|
|
|
|
|
|
for (i = 0; i < G_N_ELEMENTS(prefix); i++)
|
|
|
|
{
|
|
|
|
int l;
|
|
|
|
|
|
|
|
l = prefix[i].len;
|
|
|
|
if (len > l)
|
|
|
|
{
|
|
|
|
int j;
|
|
|
|
|
2012-06-16 15:01:47 +04:00
|
|
|
/* This is pretty much g_ascii_strncasecmp(). */
|
2011-02-24 06:14:30 +03:00
|
|
|
for (j = 0; j < l; j++)
|
|
|
|
{
|
|
|
|
unsigned char c = word[j];
|
|
|
|
if (tolower(c) != prefix[i].s[j])
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (j == l)
|
|
|
|
return WORD_URL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
at = strchr (word, '@'); /* check for email addy */
|
|
|
|
dot = strrchr (word, '.');
|
|
|
|
if (at && dot)
|
|
|
|
{
|
|
|
|
if (at < dot)
|
|
|
|
{
|
|
|
|
if (strchr (word, '*'))
|
|
|
|
return WORD_HOST;
|
|
|
|
else
|
|
|
|
return WORD_EMAIL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check if it's an IP number */
|
|
|
|
dots = 0;
|
|
|
|
for (i = 0; i < len; i++)
|
|
|
|
{
|
|
|
|
if (word[i] == '.' && i > 0)
|
|
|
|
dots++; /* allow 127.0.0.1:80 */
|
|
|
|
else if (!isdigit ((unsigned char) word[i]) && word[i] != ':')
|
|
|
|
{
|
|
|
|
dots = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (dots == 3)
|
|
|
|
return WORD_HOST;
|
|
|
|
|
|
|
|
if (len > 5)
|
|
|
|
{
|
|
|
|
for (i = 0; i < G_N_ELEMENTS(suffix); i++)
|
|
|
|
{
|
|
|
|
int l;
|
|
|
|
|
|
|
|
l = suffix[i].len;
|
|
|
|
if (len > l)
|
|
|
|
{
|
|
|
|
const unsigned char *p = &word[len - l];
|
|
|
|
int j;
|
|
|
|
|
2012-06-16 15:01:47 +04:00
|
|
|
/* This is pretty much g_ascii_strncasecmp(). */
|
2011-02-24 06:14:30 +03:00
|
|
|
for (j = 0; j < l; j++)
|
|
|
|
{
|
|
|
|
if (tolower(p[j]) != suffix[i].s[j])
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (j == l)
|
|
|
|
return WORD_HOST;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (word[len - 3] == '.' &&
|
|
|
|
isalpha ((unsigned char) word[len - 2]) &&
|
|
|
|
isalpha ((unsigned char) word[len - 1]))
|
|
|
|
return WORD_HOST;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-11-14 00:06:35 +04:00
|
|
|
/* List of IRC commands for which contents (and thus possible URLs)
|
|
|
|
* are visible to the user. NOTE: Trailing blank required in each. */
|
|
|
|
static char *commands[] = {
|
|
|
|
"NOTICE ",
|
|
|
|
"PRIVMSG ",
|
|
|
|
"TOPIC ",
|
|
|
|
"332 ", /* RPL_TOPIC */
|
|
|
|
"372 " /* RPL_MOTD */
|
|
|
|
};
|
|
|
|
|
|
|
|
#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
|
|
|
|
|
2011-02-24 06:14:30 +03:00
|
|
|
void
|
|
|
|
url_check_line (char *buf, int len)
|
|
|
|
{
|
|
|
|
char *po = buf;
|
|
|
|
char *start;
|
2012-11-14 00:06:35 +04:00
|
|
|
int i, wlen;
|
|
|
|
|
|
|
|
/* Skip over message prefix */
|
|
|
|
if (*po == ':')
|
|
|
|
{
|
|
|
|
po = strchr (po, ' ');
|
|
|
|
if (!po)
|
|
|
|
return;
|
|
|
|
po++;
|
|
|
|
}
|
|
|
|
/* Allow only commands from the above list */
|
|
|
|
for (i = 0; i < ARRAY_SIZE (commands); i++)
|
|
|
|
{
|
|
|
|
char *cmd = commands[i];
|
|
|
|
int len = strlen (cmd);
|
|
|
|
|
|
|
|
if (strncmp (cmd, po, len) == 0)
|
|
|
|
{
|
|
|
|
po += len;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (i == ARRAY_SIZE (commands))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Skip past the channel name or user nick */
|
|
|
|
po = strchr (po, ' ');
|
|
|
|
if (!po)
|
|
|
|
return;
|
|
|
|
po++;
|
2011-02-24 06:14:30 +03:00
|
|
|
|
|
|
|
if (buf[0] == ':' && buf[1] != 0)
|
|
|
|
po++;
|
|
|
|
|
|
|
|
start = po;
|
|
|
|
|
|
|
|
/* check each "word" (space separated) */
|
|
|
|
while (1)
|
|
|
|
{
|
|
|
|
switch (po[0])
|
|
|
|
{
|
|
|
|
case 0:
|
|
|
|
case ' ':
|
2012-11-14 00:06:35 +04:00
|
|
|
case '\r':
|
2012-03-16 02:58:52 +04:00
|
|
|
|
2011-02-24 06:14:30 +03:00
|
|
|
wlen = po - start;
|
|
|
|
if (wlen > 2)
|
|
|
|
{
|
2012-03-16 02:58:52 +04:00
|
|
|
/* HACK! :( */
|
|
|
|
/* This is to work around not being able to detect URLs that are at
|
|
|
|
the start of messages. */
|
|
|
|
if (start[0] == ':')
|
|
|
|
{
|
|
|
|
start++;
|
|
|
|
wlen--;
|
|
|
|
}
|
|
|
|
if (start[0] == '+' || start[0] == '-')
|
|
|
|
{
|
|
|
|
start++;
|
|
|
|
wlen--;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (wlen > 2 && url_check_word (start, wlen) == WORD_URL)
|
2011-02-24 06:14:30 +03:00
|
|
|
{
|
|
|
|
url_add (start, wlen);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (po[0] == 0)
|
|
|
|
return;
|
|
|
|
po++;
|
|
|
|
start = po;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
po++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|