From 9629afd7d6cd832ca1cd53281fbd2fdaf7a321c5 Mon Sep 17 00:00:00 2001 From: Niklas von Hertzen Date: Tue, 10 Aug 2021 01:01:25 +0800 Subject: [PATCH] feat: correctly split graphemes --- package-lock.json | 37 ++++++++++++++++++++-- package.json | 3 +- src/core/features.ts | 2 +- src/css/layout/text.ts | 47 ++-------------------------- src/css/syntax/tokenizer.ts | 2 +- src/css/types/functions/counter.ts | 2 +- src/render/canvas/canvas-renderer.ts | 2 +- 7 files changed, 43 insertions(+), 52 deletions(-) diff --git a/package-lock.json b/package-lock.json index c06d8da..77976c0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,10 +5,11 @@ "requires": true, "packages": { "": { - "version": "1.1.5", + "version": "1.2.1", "license": "MIT", "dependencies": { - "css-line-break": "2.0.1" + "css-line-break": "2.0.1", + "text-segmentation": "^1.0.1" }, "devDependencies": { "@babel/cli": "^7.4.3", @@ -23441,6 +23442,14 @@ "node": ">=0.10" } }, + "node_modules/text-segmentation": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/text-segmentation/-/text-segmentation-1.0.1.tgz", + "integrity": "sha512-A938k+suyk3pe0ifQRZgUcXN34dx1wg4W6dp0wXRIcsHtUpFH/w/Uc8RJVeR9sNd+wpDdSflg2fijdnLDseSmg==", + "dependencies": { + "utrie": "^1.0.0" + } + }, "node_modules/text-table": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", @@ -24517,6 +24526,14 @@ "node": ">= 0.4.0" } }, + "node_modules/utrie": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/utrie/-/utrie-1.0.0.tgz", + "integrity": "sha512-4m3e/2kStkq/UwEzcVXSxc3ifaCve6h+7HIAUd8g/CKdPunVw7648x3UAVREDQnSTJE7eNtfN67dDzzXlf+G1A==", + "dependencies": { + "base64-arraybuffer": "^0.2.0" + } + }, "node_modules/uuid": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.1.0.tgz", @@ -44509,6 +44526,14 @@ "integrity": "sha512-wiBrwC1EhBelW12Zy26JeOUkQ5mRu+5o8rpsJk5+2t+Y5vE7e842qtZDQ2g1NpX/29HdyFeJ4nSIhI47ENSxlQ==", "dev": true }, + "text-segmentation": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/text-segmentation/-/text-segmentation-1.0.1.tgz", + "integrity": "sha512-A938k+suyk3pe0ifQRZgUcXN34dx1wg4W6dp0wXRIcsHtUpFH/w/Uc8RJVeR9sNd+wpDdSflg2fijdnLDseSmg==", + "requires": { + "utrie": "^1.0.0" + } + }, "text-table": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", @@ -45332,6 +45357,14 @@ "integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=", "dev": true }, + "utrie": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/utrie/-/utrie-1.0.0.tgz", + "integrity": "sha512-4m3e/2kStkq/UwEzcVXSxc3ifaCve6h+7HIAUd8g/CKdPunVw7648x3UAVREDQnSTJE7eNtfN67dDzzXlf+G1A==", + "requires": { + "base64-arraybuffer": "^0.2.0" + } + }, "uuid": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.1.0.tgz", diff --git a/package.json b/package.json index b69716c..6399cf1 100644 --- a/package.json +++ b/package.json @@ -118,6 +118,7 @@ "homepage": "https://html2canvas.hertzen.com", "license": "MIT", "dependencies": { - "css-line-break": "2.0.1" + "css-line-break": "2.0.1", + "text-segmentation": "^1.0.1" } } diff --git a/src/core/features.ts b/src/core/features.ts index 78514bd..bfda213 100644 --- a/src/core/features.ts +++ b/src/core/features.ts @@ -1,4 +1,4 @@ -import {fromCodePoint, toCodePoints} from 'css-line-break'; +import {fromCodePoint, toCodePoints} from 'text-segmentation'; const testRangeBounds = (document: Document) => { const TEST_HEIGHT = 123; diff --git a/src/css/layout/text.ts b/src/css/layout/text.ts index 9b33e7c..51ba4c4 100644 --- a/src/css/layout/text.ts +++ b/src/css/layout/text.ts @@ -1,6 +1,5 @@ -import {OVERFLOW_WRAP} from '../property-descriptors/overflow-wrap'; import {CSSParsedDeclaration} from '../index'; -import {fromCodePoint, LineBreaker, toCodePoints} from 'css-line-break'; +import {splitGraphemes} from 'text-segmentation'; import {Bounds, parseBounds} from './bounds'; import {FEATURES} from '../../core/features'; import {Context} from '../../core/context'; @@ -21,7 +20,7 @@ export const parseTextBounds = ( styles: CSSParsedDeclaration, node: Text ): TextBounds[] => { - const textList = breakText(value, styles); + const textList = splitGraphemes(value); const textBounds: TextBounds[] = []; let offset = 0; textList.forEach((text) => { @@ -84,45 +83,3 @@ const createRange = (node: Text, offset: number, length: number): Range => { const getRangeBounds = (context: Context, node: Text, offset: number, length: number): Bounds => { return Bounds.fromClientRect(context, createRange(node, offset, length).getBoundingClientRect()); }; - -const breakText = (value: string, styles: CSSParsedDeclaration): string[] => { - return styles.letterSpacing !== 0 ? toCodePoints(value).map((i) => fromCodePoint(i)) : breakWords(value, styles); -}; - -// https://drafts.csswg.org/css-text/#word-separator -const wordSeparators = [0x0020, 0x00a0, 0x1361, 0x10100, 0x10101, 0x1039, 0x1091]; - -const breakWords = (str: string, styles: CSSParsedDeclaration): string[] => { - const breaker = LineBreaker(str, { - lineBreak: styles.lineBreak, - wordBreak: styles.overflowWrap === OVERFLOW_WRAP.BREAK_WORD ? 'break-word' : styles.wordBreak - }); - - const words = []; - let bk; - - while (!(bk = breaker.next()).done) { - if (bk.value) { - const value = bk.value.slice(); - const codePoints = toCodePoints(value); - let word = ''; - codePoints.forEach((codePoint) => { - if (wordSeparators.indexOf(codePoint) === -1) { - word += fromCodePoint(codePoint); - } else { - if (word.length) { - words.push(word); - } - words.push(fromCodePoint(codePoint)); - word = ''; - } - }); - - if (word.length) { - words.push(word); - } - } - } - - return words; -}; diff --git a/src/css/syntax/tokenizer.ts b/src/css/syntax/tokenizer.ts index 7a1ba51..5bb4feb 100644 --- a/src/css/syntax/tokenizer.ts +++ b/src/css/syntax/tokenizer.ts @@ -1,6 +1,6 @@ // https://www.w3.org/TR/css-syntax-3 -import {fromCodePoint, toCodePoints} from 'css-line-break'; +import {fromCodePoint, toCodePoints} from 'text-segmentation'; export enum TokenType { STRING_TOKEN, diff --git a/src/css/types/functions/counter.ts b/src/css/types/functions/counter.ts index 8404f68..4b3bfad 100644 --- a/src/css/types/functions/counter.ts +++ b/src/css/types/functions/counter.ts @@ -1,5 +1,5 @@ import {LIST_STYLE_TYPE} from '../../property-descriptors/list-style-type'; -import {fromCodePoint} from 'css-line-break'; +import {fromCodePoint} from 'text-segmentation'; import {contains} from '../../../core/bitwise'; import {CSSParsedCounterDeclaration} from '../../index'; diff --git a/src/render/canvas/canvas-renderer.ts b/src/render/canvas/canvas-renderer.ts index 2c4ded1..6e9be10 100644 --- a/src/render/canvas/canvas-renderer.ts +++ b/src/render/canvas/canvas-renderer.ts @@ -19,7 +19,7 @@ import { import {calculateBackgroundRendering, getBackgroundValueForIndex} from '../background'; import {isDimensionToken} from '../../css/syntax/parser'; import {TextBounds} from '../../css/layout/text'; -import {fromCodePoint, toCodePoints} from 'css-line-break'; +import {fromCodePoint, toCodePoints} from 'text-segmentation'; import {ImageElementContainer} from '../../dom/replaced-elements/image-element-container'; import {contentBox} from '../box-sizing'; import {CanvasElementContainer} from '../../dom/replaced-elements/canvas-element-container';