sublime-wakatime/packages/wakatime/dependencies/html.py

# -*- coding: utf-8 -*-
"""
    wakatime.dependencies.html
    ~~~~~~~~~~~~~~~~~~~~~~~~~~

    Parse dependencies from HTML.

    :copyright: (c) 2014 Alan Hamlett.
    :license: BSD, see LICENSE for more details.
"""

from . import TokenParser
from ..compat import u


""" If these keywords are found in the source file, treat them as a dependency.
Must be lower-case strings.
"""
KEYWORDS = [
    '_',
    '$',
    'angular',
    'assert',  # probably mocha
    'backbone',
    'batman',
    'c3',
    'can',
    'casper',
    'chai',
    'chaplin',
    'd3',
    'define',  # probably require
    'describe',  # mocha or jasmine
    'eco',
    'ember',
    'espresso',
    'expect',  # probably jasmine
    'exports',  # probably npm
    'express',
    'gulp',
    'handlebars',
    'highcharts',
    'jasmine',
    'jquery',
    'jstz',
    'ko',  # probably knockout
    'm',  # probably mithril
    'marionette',
    'meteor',
    'moment',
    'monitorio',
    'mustache',
    'phantom',
    'pickadate',
    'pikaday',
    'qunit',
    'react',
    'reactive',
    'require',  # probably the commonjs spec
    'ripple',
    'rivets',
    'socketio',
    'spine',
    'thorax',
    'underscore',
    'vue',
    'way',
    'zombie',
]


class HtmlParser(TokenParser):
    tags = []
    opening_tag = False
    getting_attrs = False
    current_attr = None
    current_attr_value = None

    def parse(self):
        for index, token, content in self.tokens:
            self._process_token(token, content)
        return self.dependencies

    def _process_token(self, token, content):
        if u(token) == 'Token.Punctuation':
            self._process_punctuation(token, content)
        elif u(token) == 'Token.Name.Tag':
            self._process_tag(token, content)
        elif u(token) == 'Token.Literal.String':
            self._process_string(token, content)
        elif u(token) == 'Token.Name.Attribute':
            self._process_attribute(token, content)

    @property
    def current_tag(self):
        return None if len(self.tags) == 0 else self.tags[0]

    def _process_punctuation(self, token, content):
        if content.startswith('</') or content.startswith('/'):
            try:
                self.tags.pop(0)
            except IndexError:
                # ignore errors from malformed markup
                pass
            self.opening_tag = False
            self.getting_attrs = False
        elif content.startswith('<'):
            self.opening_tag = True
        elif content.startswith('>'):
            self.opening_tag = False
            self.getting_attrs = False

    def _process_tag(self, token, content):
        if self.opening_tag:
            self.tags.insert(0, content.replace('<', '', 1).strip().lower())
            self.getting_attrs = True
        self.current_attr = None

    def _process_attribute(self, token, content):
        if self.getting_attrs:
            self.current_attr = content.lower().strip('=')
        self.current_attr_value = None

    def _process_string(self, token, content):
        if self.getting_attrs and self.current_attr is not None:
            if content.endswith('"') or content.endswith("'"):
                if self.current_attr_value is not None:
                    self.current_attr_value += content
                    if self.current_tag == 'script' and self.current_attr == 'src':
                        self.append(self.current_attr_value)
                    self.current_attr = None
                    self.current_attr_value = None
                else:
                    if len(content) == 1:
                        self.current_attr_value = content
                    else:
                        if self.current_tag == 'script' and self.current_attr == 'src':
                            self.append(content)
                        self.current_attr = None
                        self.current_attr_value = None
            elif content.startswith('"') or content.startswith("'"):
                if self.current_attr_value is None:
                    self.current_attr_value = content