Use punycode to parse unicode characters correctly

This commit is contained in:
Niklas von Hertzen
2014-09-14 19:32:26 +03:00
parent b8d3688c29
commit 9d088fa431
11 changed files with 661 additions and 17 deletions

View File

@ -22,6 +22,8 @@ window.html2canvas = function(nodeList, options) {
});
};
window.html2canvas.punycode = this.punycode;
function renderDocument(document, options, windowWidth, windowHeight) {
return createWindowClone(document, windowWidth, windowHeight, options).then(function(container) {
log("Document cloned");

View File

@ -304,7 +304,11 @@ NodeParser.prototype.paintFormValue = function(container) {
NodeParser.prototype.paintText = function(container) {
container.applyTextTransform();
var textList = container.node.data.split(!this.options.letterRendering || noLetterSpacing(container) ? /(\b| )/ : "");
var characters = window.html2canvas.punycode.ucs2.decode(container.node.data);
var textList = (!this.options.letterRendering || noLetterSpacing(container)) && !hasUnicode(container.node.data) ? getWords(characters) : characters.map(function(character) {
return window.html2canvas.punycode.ucs2.encode([character]);
});
var weight = container.parent.fontWeight();
var size = container.parent.css('fontSize');
var family = container.parent.css('fontFamily');
@ -676,3 +680,41 @@ function stripQuotes(content) {
var first = content.substr(0, 1);
return (first === content.substr(content.length - 1) && first.match(/'|"/)) ? content.substr(1, content.length - 2) : content;
}
function getWords(characters) {
var words = [], i = 0, onWordBoundary = false, word;
while(characters.length) {
if (isWordBoundary(characters[i]) === onWordBoundary) {
word = characters.splice(0, i);
if (word.length) {
words.push(window.html2canvas.punycode.ucs2.encode(word));
}
onWordBoundary =! onWordBoundary;
i = 0;
} else {
i++;
}
if (i >= characters.length) {
word = characters.splice(0, i);
if (word.length) {
words.push(window.html2canvas.punycode.ucs2.encode(word));
}
}
}
return words;
}
function isWordBoundary(characterCode) {
return [
32, // <space>
13, // \r
10, // \n
9, // \t
45 // -
].indexOf(characterCode) !== -1;
}
function hasUnicode(string) {
return /[^\u0000-\u00ff]/.test(string);
}

1
src/punycode Submodule

Submodule src/punycode added at 5c04e6fe95