v/vlib/v/vmod/parser.v

module vmod

import os

enum TokenKind {
	module_keyword
	field_key
	lcbr
	rcbr
	labr
	rabr
	comma
	colon
	eof
	str
	ident
	unknown
}

pub struct Manifest {
pub mut:
	name         string
	version      string
	description  string
	dependencies []string
	license      string
	repo_url     string
	author       string
	unknown      map[string][]string
}

struct Scanner {
mut:
	pos         int
	text        string
	inside_text bool
	tokens      []Token
}

struct Parser {
mut:
	file_path string
	scanner   Scanner
}

struct Token {
	typ TokenKind
	val string
}

pub fn from_file(vmod_path string) ?Manifest {
	if !os.exists(vmod_path) {
		return error('v.mod: v.mod file not found.')
	}
	contents := os.read_file(vmod_path) or { '' }
	return decode(contents)
}

pub fn decode(contents string) ?Manifest {
	mut parser := Parser{
		scanner: Scanner{
			pos: 0
			text: contents
		}
	}
	return parser.parse()
}

fn (mut s Scanner) tokenize(t_type TokenKind, val string) {
	s.tokens << Token{t_type, val}
}

fn (mut s Scanner) skip_whitespace() {
	for s.pos < s.text.len && s.text[s.pos].is_space() {
		s.pos++
	}
}

fn is_name_alpha(chr byte) bool {
	return chr.is_letter() || chr == `_`
}

fn (mut s Scanner) create_string(q byte) string {
	mut str := ''
	for s.text[s.pos] != q {
		if s.text[s.pos] == `\\` && s.text[s.pos + 1] == q {
			str += s.text[s.pos..s.pos + 1]
			s.pos += 2
		} else {
			str += s.text[s.pos].ascii_str()
			s.pos++
		}
	}
	return str
}

fn (mut s Scanner) create_ident() string {
	mut text := ''
	for is_name_alpha(s.text[s.pos]) {
		text += s.text[s.pos].ascii_str()
		s.pos++
	}
	return text
}

fn (s Scanner) peek_char(c byte) bool {
	return s.pos - 1 < s.text.len && s.text[s.pos - 1] == c
}

fn (mut s Scanner) scan_all() {
	for s.pos < s.text.len {
		c := s.text[s.pos]
		if c.is_space() || c == `\\` {
			s.pos++
			continue
		}
		if is_name_alpha(c) {
			name := s.create_ident()
			if name == 'Module' {
				s.tokenize(.module_keyword, name)
				s.pos++
				continue
			} else if s.text[s.pos] == `:` {
				s.tokenize(.field_key, name + ':')
				s.pos += 2
				continue
			} else {
				s.tokenize(.ident, name)
				s.pos++
				continue
			}
		}
		if c in [`'`, `\"`] && !s.peek_char(`\\`) {
			s.pos++
			str := s.create_string(c)
			s.tokenize(.str, str)
			s.pos++
			continue
		}
		match c {
			`{` { s.tokenize(.lcbr, c.ascii_str()) }
			`}` { s.tokenize(.rcbr, c.ascii_str()) }
			`[` { s.tokenize(.labr, c.ascii_str()) }
			`]` { s.tokenize(.rabr, c.ascii_str()) }
			`:` { s.tokenize(.colon, c.ascii_str()) }
			`,` { s.tokenize(.comma, c.ascii_str()) }
			else { s.tokenize(.unknown, c.ascii_str()) }
		}
		s.pos++
	}
	s.tokenize(.eof, 'eof')
}

fn get_array_content(tokens []Token, st_idx int) ?([]string, int) {
	mut vals := []string{}
	mut idx := st_idx
	if tokens[idx].typ != .labr {
		return error('vmod: not a valid array')
	}
	idx++
	for {
		tok := tokens[idx]
		match tok.typ {
			.str {
				vals << tok.val
				if tokens[idx + 1].typ !in [.comma, .rabr] {
					return error('vmod: invalid separator "${tokens[idx + 1].val}"')
				}
				idx += if tokens[idx + 1].typ == .comma { 2 } else { 1 }
			}
			.rabr {
				idx++
				break
			}
			else {
				return error('vmod: invalid token "$tok.val"')
			}
		}
	}
	return vals, idx
}

fn (mut p Parser) parse() ?Manifest {
	err_label := 'vmod:'
	if p.scanner.text.len == 0 {
		return error('$err_label no content.')
	}
	p.scanner.scan_all()
	tokens := p.scanner.tokens
	mut mn := Manifest{}
	if tokens[0].typ != .module_keyword {
		return error('vmod: v.mod files should start with Module')
	}
	mut i := 1
	for i < tokens.len {
		tok := tokens[i]
		match tok.typ {
			.lcbr {
				if tokens[i + 1].typ !in [.field_key, .rcbr] {
					return error('$err_label invalid content after opening brace')
				}
				i++
				continue
			}
			.rcbr {
				break
			}
			.field_key {
				field_name := tok.val.trim_right(':')
				if tokens[i + 1].typ !in [.str, .labr] {
					return error('$err_label value of field "$field_name" must be either string or an array of strings')
				}
				field_value := tokens[i + 1].val
				match field_name {
					'name' {
						mn.name = field_value
					}
					'version' {
						mn.version = field_value
					}
					'license' {
						mn.license = field_value
					}
					'repo_url' {
						mn.repo_url = field_value
					}
					'description' {
						mn.description = field_value
					}
					'author' {
						mn.author = field_value
					}
					'dependencies' {
						deps, idx := get_array_content(tokens, i + 1) ?
						mn.dependencies = deps
						i = idx
						continue
					}
					else {
						if tokens[i + 1].typ == .labr {
							vals, idx := get_array_content(tokens, i + 1) ?
							mn.unknown[field_name] = vals
							i = idx
							continue
						}
						mn.unknown[field_name] = [field_value]
					}
				}
				i += 2
				continue
			}
			.comma {
				if tokens[i - 1].typ !in [.str, .rabr] || tokens[i + 1].typ != .field_key {
					return error('$err_label invalid comma placement')
				}
				i++
				continue
			}
			else {
				return error('$err_label invalid token "$tok.val"')
			}
		}
	}
	return mn
}