2013-09-23 00:51:23 +04:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
|
|
wakatime.stats
|
|
|
|
~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
Stats about files
|
|
|
|
|
|
|
|
:copyright: (c) 2013 Alan Hamlett.
|
|
|
|
:license: BSD, see LICENSE for more details.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import logging
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
|
2014-09-30 20:27:35 +04:00
|
|
|
from .compat import u, open
|
2014-12-23 14:39:07 +03:00
|
|
|
from .languages import DependencyParser
|
2014-09-30 20:27:35 +04:00
|
|
|
|
2013-09-23 02:24:50 +04:00
|
|
|
if sys.version_info[0] == 2:
|
2014-12-01 09:21:46 +03:00
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages', 'pygments_py2'))
|
2013-09-23 02:24:50 +04:00
|
|
|
else:
|
2014-12-01 09:21:46 +03:00
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages', 'pygments_py3'))
|
2015-06-21 20:35:14 +03:00
|
|
|
from pygments.lexers import get_lexer_by_name, guess_lexer_for_filename
|
|
|
|
from pygments.modeline import get_filetype_from_buffer
|
|
|
|
from pygments.util import ClassNotFound
|
2013-09-23 00:51:23 +04:00
|
|
|
|
|
|
|
|
2014-07-25 12:01:39 +04:00
|
|
|
log = logging.getLogger('WakaTime')
|
2013-09-23 00:51:23 +04:00
|
|
|
|
|
|
|
|
2015-06-21 20:35:14 +03:00
|
|
|
# extensions taking priority over lexer
|
2013-10-26 08:33:31 +04:00
|
|
|
EXTENSIONS = {
|
2013-11-01 04:19:17 +04:00
|
|
|
'j2': 'HTML',
|
2013-11-03 23:36:43 +04:00
|
|
|
'markdown': 'Markdown',
|
|
|
|
'md': 'Markdown',
|
2014-11-13 06:58:54 +03:00
|
|
|
'mdown': 'Markdown',
|
2013-12-13 18:35:49 +04:00
|
|
|
'twig': 'Twig',
|
2013-10-26 08:33:31 +04:00
|
|
|
}
|
2015-06-21 20:35:14 +03:00
|
|
|
|
|
|
|
# lexers to human readable languages
|
2013-10-27 04:59:41 +04:00
|
|
|
TRANSLATIONS = {
|
|
|
|
'CSS+Genshi Text': 'CSS',
|
|
|
|
'CSS+Lasso': 'CSS',
|
|
|
|
'HTML+Django/Jinja': 'HTML',
|
|
|
|
'HTML+Lasso': 'HTML',
|
|
|
|
'JavaScript+Genshi Text': 'JavaScript',
|
|
|
|
'JavaScript+Lasso': 'JavaScript',
|
|
|
|
'Perl6': 'Perl',
|
2013-11-01 04:19:17 +04:00
|
|
|
'RHTML': 'HTML',
|
2013-10-27 04:59:41 +04:00
|
|
|
}
|
2013-10-26 08:33:31 +04:00
|
|
|
|
2015-06-21 20:35:14 +03:00
|
|
|
# extensions for when no lexer is found
|
|
|
|
AUXILIARY_EXTENSIONS = {
|
|
|
|
'vb': 'VB.net',
|
|
|
|
}
|
|
|
|
|
2013-10-26 08:33:31 +04:00
|
|
|
|
2013-09-23 00:51:23 +04:00
|
|
|
def guess_language(file_name):
|
2015-06-21 20:35:14 +03:00
|
|
|
"""Guess lexer and language for a file.
|
|
|
|
|
|
|
|
Returns (language, lexer) tuple where language is a unicode string.
|
|
|
|
"""
|
|
|
|
|
|
|
|
lexer = smart_guess_lexer(file_name)
|
|
|
|
|
|
|
|
language = None
|
|
|
|
|
|
|
|
# guess language from file extension
|
|
|
|
if file_name:
|
|
|
|
language = get_language_from_extension(file_name, EXTENSIONS)
|
|
|
|
|
|
|
|
# get language from lexer if we didn't have a hard-coded extension rule
|
|
|
|
if language is None and lexer:
|
|
|
|
language = u(lexer.name)
|
|
|
|
|
|
|
|
if language is None:
|
|
|
|
language = get_language_from_extension(file_name, AUXILIARY_EXTENSIONS)
|
|
|
|
|
|
|
|
if language is not None:
|
|
|
|
language = translate_language(language)
|
|
|
|
|
|
|
|
return language, lexer
|
|
|
|
|
|
|
|
|
|
|
|
def smart_guess_lexer(file_name):
|
|
|
|
"""Guess Pygments lexer for a file.
|
|
|
|
|
|
|
|
Looks for a vim modeline in file contents, then compares the accuracy
|
|
|
|
of that lexer with a second guess. The second guess looks up all lexers
|
|
|
|
matching the file name, then runs a text analysis for the best choice.
|
|
|
|
"""
|
|
|
|
lexer = None
|
|
|
|
|
|
|
|
text = get_file_contents(file_name)
|
|
|
|
|
|
|
|
lexer_1, accuracy_1 = guess_lexer_using_filename(file_name, text)
|
|
|
|
lexer_2, accuracy_2 = guess_lexer_using_modeline(text)
|
|
|
|
|
|
|
|
if lexer_1:
|
|
|
|
lexer = lexer_1
|
|
|
|
if (lexer_2 and accuracy_2 and
|
|
|
|
(not accuracy_1 or accuracy_2 > accuracy_1)):
|
|
|
|
lexer = lexer_2
|
|
|
|
|
|
|
|
return lexer
|
|
|
|
|
|
|
|
|
|
|
|
def guess_lexer_using_filename(file_name, text):
|
|
|
|
"""Guess lexer for given text, limited to lexers for this file's extension.
|
|
|
|
|
|
|
|
Returns a tuple of (lexer, accuracy).
|
|
|
|
"""
|
|
|
|
|
|
|
|
lexer, accuracy = None, None
|
|
|
|
|
2013-09-23 00:51:23 +04:00
|
|
|
try:
|
2015-06-21 20:35:14 +03:00
|
|
|
lexer = guess_lexer_for_filename(file_name, text)
|
2013-10-15 08:51:35 +04:00
|
|
|
except:
|
2013-09-23 00:51:23 +04:00
|
|
|
pass
|
|
|
|
|
2015-06-21 20:35:14 +03:00
|
|
|
if lexer is not None:
|
|
|
|
try:
|
|
|
|
accuracy = lexer.analyse_text(text)
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
return lexer, accuracy
|
|
|
|
|
|
|
|
|
|
|
|
def guess_lexer_using_modeline(text):
|
|
|
|
"""Guess lexer for given text using Vim modeline.
|
|
|
|
|
|
|
|
Returns a tuple of (lexer, accuracy).
|
|
|
|
"""
|
|
|
|
|
|
|
|
lexer, accuracy = None, None
|
|
|
|
|
|
|
|
file_type = get_filetype_from_buffer(text)
|
|
|
|
if file_type is not None:
|
|
|
|
try:
|
|
|
|
lexer = get_lexer_by_name(file_type)
|
|
|
|
except ClassNotFound:
|
|
|
|
pass
|
|
|
|
|
|
|
|
if lexer is not None:
|
|
|
|
try:
|
|
|
|
accuracy = lexer.analyse_text(text)
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
|
|
|
|
return lexer, accuracy
|
|
|
|
|
|
|
|
|
|
|
|
def get_language_from_extension(file_name, extension_map):
|
|
|
|
"""Returns a matching language for the given file_name using extension_map.
|
|
|
|
"""
|
|
|
|
|
|
|
|
extension = file_name.rsplit('.', 1)[-1] if len(file_name.rsplit('.', 1)) > 1 else None
|
2013-09-23 00:51:23 +04:00
|
|
|
|
2013-10-26 08:33:31 +04:00
|
|
|
if extension:
|
2015-06-21 20:35:14 +03:00
|
|
|
if extension in extension_map:
|
|
|
|
return extension_map[extension]
|
|
|
|
if extension.lower() in extension_map:
|
|
|
|
return extension_map[extension.lower()]
|
|
|
|
|
2013-10-26 08:33:31 +04:00
|
|
|
return None
|
|
|
|
|
|
|
|
|
2013-10-27 04:59:41 +04:00
|
|
|
def translate_language(language):
|
2015-06-21 20:35:14 +03:00
|
|
|
"""Turns Pygments lexer class name string into human-readable language.
|
|
|
|
"""
|
|
|
|
|
2013-10-27 04:59:41 +04:00
|
|
|
if language in TRANSLATIONS:
|
|
|
|
language = TRANSLATIONS[language]
|
|
|
|
return language
|
|
|
|
|
|
|
|
|
2013-09-23 00:51:23 +04:00
|
|
|
def number_lines_in_file(file_name):
|
|
|
|
lines = 0
|
|
|
|
try:
|
2014-09-30 20:27:35 +04:00
|
|
|
with open(file_name, 'r', encoding='utf-8') as fh:
|
|
|
|
for line in fh:
|
2013-09-23 00:51:23 +04:00
|
|
|
lines += 1
|
2014-10-07 15:46:19 +04:00
|
|
|
except:
|
2013-09-23 00:51:23 +04:00
|
|
|
return None
|
|
|
|
return lines
|
|
|
|
|
|
|
|
|
2015-05-07 02:33:32 +03:00
|
|
|
def get_file_stats(file_name, notfile=False, lineno=None, cursorpos=None):
|
2015-03-10 01:23:29 +03:00
|
|
|
if notfile:
|
|
|
|
stats = {
|
|
|
|
'language': None,
|
|
|
|
'dependencies': [],
|
|
|
|
'lines': None,
|
2015-05-07 01:45:34 +03:00
|
|
|
'lineno': lineno,
|
2015-05-07 02:33:32 +03:00
|
|
|
'cursorpos': cursorpos,
|
2015-03-10 01:23:29 +03:00
|
|
|
}
|
|
|
|
else:
|
|
|
|
language, lexer = guess_language(file_name)
|
|
|
|
parser = DependencyParser(file_name, lexer)
|
|
|
|
dependencies = parser.parse()
|
|
|
|
stats = {
|
|
|
|
'language': language,
|
|
|
|
'dependencies': dependencies,
|
|
|
|
'lines': number_lines_in_file(file_name),
|
2015-05-07 01:45:34 +03:00
|
|
|
'lineno': lineno,
|
2015-05-07 02:33:32 +03:00
|
|
|
'cursorpos': cursorpos,
|
2015-03-10 01:23:29 +03:00
|
|
|
}
|
2013-09-23 00:51:23 +04:00
|
|
|
return stats
|
2015-06-21 20:35:14 +03:00
|
|
|
|
|
|
|
|
|
|
|
def get_file_contents(file_name):
|
|
|
|
"""Returns the first 512000 bytes of the file's contents.
|
|
|
|
"""
|
|
|
|
|
|
|
|
text = None
|
|
|
|
try:
|
|
|
|
with open(file_name, 'r', encoding='utf-8') as fh:
|
|
|
|
text = fh.read(512000)
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
return text
|