Compare commits

..

181 Commits
7.0.4 ... 9.1.0

Author SHA1 Message Date
04173d3bcc v9.1.0 2020-02-09 23:37:17 -08:00
3206a07476 changes for v9.1.0 2020-02-09 23:36:29 -08:00
9330236816 upgrade wakatime-cli to v13.0.4 2020-02-09 23:35:43 -08:00
885c11f01a Detect python in Windows LocalAppData install locations 2020-02-09 23:33:39 -08:00
8acda0157a upgrade embedded python to v3.8.1 2020-02-09 23:17:13 -08:00
935ddbd5f6 v9.0.2 2019-12-04 22:04:13 -08:00
b57b1eb696 changes for v9.0.2 2019-12-04 22:03:49 -08:00
6ec097b9d1 upgrade wakatime-cli to v13.0.3 2019-12-04 22:02:51 -08:00
b3ed36d3b2 fix badge locations 2019-11-27 22:28:20 -08:00
3669e4df6a add wakatime badge 2019-11-25 21:30:54 -08:00
3504096082 v9.0.1 2019-11-24 07:49:27 -08:00
5990947706 changes for v9.0.1 2019-11-24 07:48:58 -08:00
2246e31244 upgrade wakatime-cli to v13.0.2 2019-11-24 07:46:13 -08:00
b55fe702d3 v9.0.0 2019-06-23 19:03:26 -07:00
e0fbbb50bb changes for v9.0.0 2019-06-23 19:03:08 -07:00
32c0cb5a97 upgrade wakatime-cli to v12.0.0 2019-06-23 18:56:57 -07:00
67d8b0d24f v8.7.0 2019-05-29 06:44:07 -07:00
b8b2f4944b changes for v8.7.0 2019-05-29 06:43:54 -07:00
a20161164c prevent creating user settings file when api key found from common config 2019-05-29 06:42:38 -07:00
405211bb07 v8.6.1 2019-05-28 20:54:57 -07:00
ffc879c4eb changes for v8.6.1 2019-05-28 20:54:43 -07:00
1e23919694 fix import path of parseConfigFile 2019-05-28 20:53:58 -07:00
b2086a3cd2 v8.6.0 2019-05-27 12:41:14 -07:00
005b07520c changes for v8.6.0 2019-05-27 12:40:55 -07:00
60608bd322 Skip prompting for api key when found from common config file 2019-05-27 12:39:48 -07:00
cde8f8f1de v8.5.0 2019-05-10 09:45:11 -07:00
4adfca154c changes for v8.5.0 2019-05-10 09:44:57 -07:00
f7b3924a30 use wakatime-cli to fetch today coding time 2019-05-10 09:43:30 -07:00
db00024455 remove clock icon from status bar 2019-05-10 09:18:38 -07:00
9a6be7ca4e v8.4.2 2019-05-07 18:44:24 -07:00
1ea9b2a761 changes for v8.4.2 2019-05-07 18:43:57 -07:00
bd5e87e030 upgrade wakatime-cli to v11.0.0 2019-05-07 18:42:31 -07:00
0256ff4a6a v8.4.1 2019-05-01 10:57:50 -07:00
9d170b3276 changes for v8.4.1 2019-05-01 10:57:38 -07:00
c54e575210 use api subdomain for fetching summaries 2019-05-01 10:57:11 -07:00
07513d8f10 v8.4.0 2019-05-01 10:49:59 -07:00
30902cc050 changes for v8.4.0 2019-05-01 10:49:40 -07:00
aa7962d49a show today coding activity time in status bar 2019-05-01 10:48:28 -07:00
d8c662f3db v8.3.6 2019-04-30 16:25:28 -07:00
10d88ebf2d changes for v8.3.6 2019-04-30 16:25:14 -07:00
2f28c561b1 upgrade wakatime-cli to v10.8.4 2019-04-30 16:24:35 -07:00
24968507df v8.3.5 2019-04-30 09:21:47 -07:00
641cd539ed changes for v8.3.5 2019-04-30 09:21:33 -07:00
0c65d7e5b2 Upgrade wakatime-cli to v10.8.3 2019-04-30 09:20:29 -07:00
f0532f5b8e v8.3.4 2019-03-30 19:17:06 -07:00
8094db9680 changes for v8.3.4 2019-03-30 19:16:49 -07:00
bf20551849 upgrade wakatime-cli to v10.8.2 2019-03-30 18:53:40 -07:00
2b6e32b578 remove redundant line 2019-03-13 09:36:51 +09:00
363c3d38e2 update license file with copyright owner inline 2019-03-13 09:20:50 +09:00
88466d7db2 v8.3.3 2018-12-19 07:43:01 -08:00
122fcbbee5 changes for v8.3.3 2018-12-19 07:42:30 -08:00
c41fcec5d8 upgrade wakatime-cli to v10.6.1 2018-12-19 07:38:18 -08:00
be09b34d44 v8.3.2 2018-10-06 20:40:57 -07:00
e1ee1c1216 changes for v8.3.2 2018-10-06 20:40:37 -07:00
a37061924b Send buffered heartbeats to API every 30 seconds. 2018-10-06 20:40:12 -07:00
da01fa268b v8.3.1 2018-10-05 00:07:11 -07:00
c279418651 changes for v8.3.1 2018-10-05 00:06:46 -07:00
5cf2c8f7ac upgrade wakatime-cli to v10.4.1 2018-10-05 00:06:02 -07:00
d1455e77a8 v8.3.0 2018-10-03 00:55:02 -07:00
8499e7bafe changes for v8.3.0 2018-10-03 00:54:40 -07:00
abc26a0864 upgrade wakatime-cli to v10.4.0 2018-10-03 00:47:20 -07:00
71ad97ffe9 v8.2.0 2018-09-30 22:03:01 -07:00
3ec5995c99 changes for v8.2.0 2018-09-30 22:02:53 -07:00
195cf4de36 upgrade wakatime-cli to v10.3.0 2018-09-30 21:48:20 -07:00
b39eefb4f5 cross-platform Popen with hidden window 2018-09-30 21:29:04 -07:00
bbf5761e26 v8.1.2 2018-09-20 22:31:14 -07:00
c4df1dc633 changes for v8.1.2 2018-09-20 22:30:57 -07:00
360a491cda upgrade wakatime-cli to v10.2.4 2018-09-20 22:29:34 -07:00
f61a34eda7 build subprocess stdin near heartbeats 2018-09-13 19:39:05 -07:00
48123d7409 v8.1.1 2018-04-26 08:42:51 -07:00
c8a15d7ac0 changes for v8.1.1 2018-04-26 08:42:00 -07:00
202df81e04 upgrade wakatime-cli to v10.2.1 2018-04-26 08:40:02 -07:00
5e34f3f6a7 v8.1.0 2018-04-03 23:43:43 -07:00
d4441e5575 changes for v8.1.0 2018-04-03 23:43:26 -07:00
9eac8e2bd3 prefer python3 when running wakatime-cli 2018-04-03 23:42:02 -07:00
11d8fc3a09 v8.0.8 2018-03-15 01:51:31 -07:00
d1f1f51f23 changes for v8.0.8 2018-03-15 01:51:20 -07:00
b10bb36c09 Upgrade wakatime-cli to v10.1.3 2018-03-15 01:50:36 -07:00
dc9474befa v8.0.7 2018-03-15 01:32:56 -07:00
b910807e98 changes for v8.0.7 2018-03-15 01:32:37 -07:00
bc770515f0 Upgrade wakatime-cli to v10.1.2 2018-03-15 01:31:17 -07:00
9e102d7c5c v8.0.6 2018-01-04 23:34:40 -08:00
5c1770fb48 changes for v8.0.6 2018-01-04 23:34:13 -08:00
683397534c upgrade wakatime-cli to v10.1.0 2018-01-04 23:33:07 -08:00
1c92017543 v8.0.5 2017-11-24 16:16:47 -08:00
fda1307668 changes for v8.0.5 2017-11-24 16:16:34 -08:00
1c84d457c5 upgrade wakatime-cli to v10.0.5 2017-11-24 16:16:04 -08:00
1e680ce739 v8.0.4 2017-11-23 12:49:07 -08:00
376adbb7d7 changes for v8.0.4 2017-11-23 12:48:44 -08:00
e0040e185b upgrade wakatime-cli to v10.0.4 2017-11-23 12:41:59 -08:00
c4a88541d0 v8.0.3 2017-11-22 13:12:09 -08:00
0cf621d177 changes for v8.0.3 2017-11-22 13:11:48 -08:00
db9d6cec97 upgrade wakatime-cli to v10.0.3 2017-11-22 13:09:17 -08:00
2c17f49a6b v8.0.2 2017-11-15 18:36:43 -08:00
95116d6007 changes for v8.0.2 2017-11-15 18:36:28 -08:00
8c52596f8f upgrade wakatime-cli to v10.0.2 2017-11-15 18:35:43 -08:00
3109817dc7 v8.0.1 2017-11-09 09:10:19 -08:00
0c0f965763 changes for v8.0.1 2017-11-09 09:10:06 -08:00
1573e9c825 upgrade wakatime-cli to v10.0.1 2017-11-09 09:09:28 -08:00
a0b8f349c2 v8.0.0 2017-11-08 23:14:04 -08:00
2fb60b1589 changes for v8.0.0 2017-11-08 23:13:47 -08:00
02786a744e upgrade wakatime-cli to v10.0.0 2017-11-08 23:12:05 -08:00
729a4360ba v7.0.26 2017-11-07 18:55:25 -08:00
8f45de85ec changes for v7.0.26 2017-11-07 18:55:03 -08:00
4672f70c87 upgrade wakatime-cli to v9.0.1 2017-11-07 18:54:17 -08:00
46a9aae942 v7.0.25 2017-11-05 20:10:51 -08:00
9e77ce2697 changes for v7.0.25 2017-11-05 20:08:25 -08:00
385ba818cc ability to override python binary location 2017-11-05 20:04:48 -08:00
7492c3ce12 upgrade wakatime-cli to v9.0.0 2017-11-05 19:51:43 -08:00
03eed88917 v7.0.24 2017-10-29 11:38:11 -07:00
60a7ad96b5 changes for v7.0.24 2017-10-29 11:37:31 -07:00
2d1d5d336a link to issues from changelog 2017-10-29 11:35:38 -07:00
e659759b2d upgrade wakatime-cli to v8.0.5 2017-10-29 11:32:03 -07:00
a290e5d86d v7.0.23 2017-09-14 17:54:06 -07:00
d5b922bb10 changes for 7.0.23 2017-09-14 17:53:51 -07:00
ec7b5e3530 add missing setting description 2017-09-14 17:51:22 -07:00
aa3f2e8af6 Merge pull request #76 from krishnaglick/master
Adding "Include" Functionality
2017-09-14 17:46:55 -07:00
f4e53cd682 Added 'include' functionality 2017-09-14 13:54:50 -04:00
aba72b0f1e v7.0.22 2017-06-08 00:21:33 -07:00
5b9d86a57d changes for v7.0.22 2017-06-08 00:21:15 -07:00
fa40874635 upgrade wakatime-cli to v8.0.3 2017-06-08 00:20:10 -07:00
6d4a4cf9eb link to FAQ from troubleshooting section 2017-05-26 07:40:14 -07:00
f628b8dd11 v7.0.21 2017-05-24 23:55:36 -07:00
f932ee9fc6 changes for v7.0.21 2017-05-24 23:55:17 -07:00
2f14009279 Upgrade wakatime-cli to v8.0.2 2017-05-24 23:53:28 -07:00
453d96bf9c use unicode arrow 2017-05-02 19:24:54 -07:00
9de153f156 use unicode arrow 2017-04-15 10:13:42 -07:00
dcc782338d v7.0.20 2017-04-10 19:04:00 -07:00
9d0dba988a remove sub-list install instructions 2017-04-10 19:03:05 -07:00
e76f2e514e v7.0.19 2017-04-10 18:59:05 -07:00
224f7cd82a Remove /var/www/ from default ignored folders 2017-04-10 18:55:24 -07:00
3cce525a84 update dashboard step for install instructions 2017-03-30 00:23:41 -07:00
ce885501ad v7.0.18 2017-03-16 08:40:05 -07:00
c9448a9a19 changes for v7.0.18 2017-03-16 08:37:29 -07:00
04f8c61ebc no need to create empty common config file 2017-03-16 08:36:17 -07:00
04a4630024 upgrade wakatime-cli to v8.0.0 2017-03-16 08:26:24 -07:00
02138220fd allow setting a proxy in sublime-settings file 2017-03-10 08:11:40 -08:00
d0b162bdd8 Merge pull request #71 from marcus-at-localhorst/master
Use correct argument --exclude instead of --ignore
2017-03-10 08:02:44 -08:00
1b8895cd38 add --hidefilenames in config 2017-03-04 17:21:21 +01:00
938bbb73d1 Use correct argument --exclude instead of --ignore
https://github.com/wakatime/sublime-wakatime/issues/70
2017-03-04 17:02:04 +01:00
008fdc6b49 v7.0.17 2017-03-01 22:32:08 -08:00
a788625dd0 changes for v7.0.17 2017-03-01 22:31:49 -08:00
bcbce681c3 upgrade wakatime-cli to v7.0.4 2017-03-01 22:30:57 -08:00
35299db832 don't wrap ctrl in code blocks 2017-02-26 20:23:51 -08:00
eb7814624c escape backtick with backslash 2017-02-26 20:23:01 -08:00
1c092b2fd8 escape backslash correctly 2017-02-26 20:21:29 -08:00
507ef95f71 keyboard shortcut for opening console window 2017-02-26 20:08:15 -08:00
9777bc7788 v7.0.16 2017-02-20 16:20:09 -08:00
20b78defa6 changes for v7.0.16 2017-02-20 16:19:55 -08:00
8cb1c557d9 upgrade wakatime-cli to v7.0.2 2017-02-20 16:18:38 -08:00
20a1965f13 v7.0.15 2017-02-13 23:31:26 -08:00
0b802a554e changes for v7.0.15 2017-02-13 23:31:09 -08:00
30186c9b2c upgrade wakatime-cli to v6.2.2 2017-02-13 23:30:21 -08:00
311a0b5309 v7.0.14 2017-02-08 19:25:55 -08:00
b7602d89fb changes for v7.0.14 2017-02-08 19:25:42 -08:00
305de46e32 upgrade wakatime-cli to v6.2.1 2017-02-08 19:25:31 -08:00
c574234927 prevent crashing when logging object unable to be converted to unicode 2017-02-08 19:24:26 -08:00
a69c50f470 use str on objects which are not strings 2017-01-10 10:05:24 -08:00
f4b40089f3 v7.0.13 2016-11-11 11:31:43 +01:00
08394357b7 support old Sublime Text with Python 2.6 2016-11-11 11:29:51 +01:00
205d4eb163 fix import namespace 2016-11-11 11:16:17 +01:00
c4c27e4e9e v7.0.12 2016-10-24 12:41:46 +02:00
9167eb2558 upgrade wakatime-cli to v6.2.0 2016-10-24 12:39:37 +02:00
eaa3bb5180 use python v3.5.2 2016-10-15 16:16:39 +02:00
7755971d11 v7.0.11 2016-09-23 08:37:30 +02:00
7634be5446 handle UnicodeDecodeError exceptions when printing log messages 2016-09-23 08:36:23 +02:00
5e17ad88f6 v7.0.10 2016-09-22 10:28:04 +02:00
24d0f65116 upgrade wakatime-cli to v6.0.9 2016-09-22 10:26:57 +02:00
a326046733 handle UnicodeDecodeError when looking for python to fix #68 2016-09-22 10:24:49 +02:00
9bab00fd8b v7.0.9 2016-09-02 10:54:32 +02:00
b4a13a48b9 upgrade wakatime-cli to v6.0.8 2016-09-02 10:50:54 +02:00
21601f9688 v7.0.8 2016-07-21 15:17:59 +02:00
4c3ec87341 upgrade wakatime-cli to master 2016-07-21 15:16:03 +02:00
b149d7fc87 v7.0.7 2016-07-06 23:26:41 +02:00
52e6107c6e upgrade wakatime-cli to v6.0.7 2016-07-06 23:25:21 +02:00
b340637331 upgrade wakatime-cli to v6.0.6 2016-06-17 10:17:29 +02:00
044867449a v7.0.6 2016-06-13 16:50:10 +02:00
9e3f438823 upgrade wakatime-cli to v6.0.5 2016-06-13 16:48:53 +02:00
887d55c3f3 v7.0.5 2016-06-08 20:46:57 +02:00
19d54f3310 upgrade wakatime-cli to master version to fix unhandled retry exception 2016-06-08 20:43:24 +02:00
514a8762eb update settings screenshot 2016-05-21 15:56:16 +02:00
2126 changed files with 79728 additions and 90381 deletions

View File

@ -13,3 +13,4 @@ Patches and Suggestions
- Jimmy Selgen Nielsen <jimmy.selgen@gmail.com>
- Patrik Kernstock <info@pkern.at>
- Krishna Glick <krishnaglick@gmail.com>

View File

@ -3,6 +3,454 @@ History
-------
9.1.0 (2020-02-09)
++++++++++++++++++
- Detect python in Windows LocalAppData install locations.
- Upgrade wakatime-cli to v13.0.4.
- Bundle cryptography, pyopenssl, and ipaddress packages for improved SSL
support on Python2.
9.0.2 (2019-12-04)
++++++++++++++++++
- Upgrade wakatime-cli to v13.0.3.
- Support slashes in Mercurial and Git branch names.
`wakatime#199 <https://github.com/wakatime/wakatime/issues/199>`_
9.0.1 (2019-11-24)
++++++++++++++++++
- Upgrade wakatime-cli to v13.0.2.
- Filter dependencies longer than 200 characters.
- Close sqlite connection even when error raised.
`wakatime#196 <https://github.com/wakatime/wakatime/issues/196>`_
- Detect ColdFusion as root language instead of HTML.
- New arguments for reading and writing ini config file.
- Today argument shows categories when available.
- Prevent unnecessarily debug log when syncing offline heartbeats.
- Support for Python 3.7.
9.0.0 (2019-06-23)
++++++++++++++++++
- New optional config option hide_branch_names.
`wakatime#183 <https://github.com/wakatime/wakatime/issues/183>`_
8.7.0 (2019-05-29)
++++++++++++++++++
- Prevent creating user sublime-settings file when api key already exists in
common wakatime.cfg file.
`#98 <https://github.com/wakatime/sublime-wakatime/issues/98>`_
8.6.1 (2019-05-28)
++++++++++++++++++
- Fix parsing common wakatime.cfg file.
`#98 <https://github.com/wakatime/sublime-wakatime/issues/98>`_
8.6.0 (2019-05-27)
++++++++++++++++++
- Prevent prompting for api key when found from config file.
`#98 <https://github.com/wakatime/sublime-wakatime/issues/98>`_
8.5.0 (2019-05-10)
++++++++++++++++++
- Remove clock icon from status bar.
- Use wakatime-cli to fetch status bar coding time.
8.4.2 (2019-05-07)
++++++++++++++++++
- Upgrade wakatime-cli to v11.0.0.
- Rename argument --show-time-today to --today.
- New argument --show-time-today for printing Today's coding time.
8.4.1 (2019-05-01)
++++++++++++++++++
- Use api subdomain for fetching today's coding activity.
8.4.0 (2019-05-01)
++++++++++++++++++
- Show today's coding time in status bar.
8.3.6 (2019-04-30)
++++++++++++++++++
- Upgrade wakatime-cli to v10.8.4.
- Use wakatime fork of certifi package.
`#95 <https://github.com/wakatime/sublime-wakatime/issues/95>`_
8.3.5 (2019-04-30)
++++++++++++++++++
- Upgrade wakatime-cli to v10.8.3.
- Upgrade certifi to version 2019.03.09.
8.3.4 (2019-03-30)
++++++++++++++++++
- Upgrade wakatime-cli to v10.8.2.
- Detect go.mod files as Go language.
`jetbrains-wakatime#119 <https://github.com/wakatime/jetbrains-wakatime/issues/119>`_
- Detect C++ language from all C++ file extensions.
`vscode-wakatime#87 <https://github.com/wakatime/vscode-wakatime/issues/87>`_
- Add ssl_certs_file arg and config for custom ca bundles.
`wakatime#164 <https://github.com/wakatime/wakatime/issues/164>`_
- Fix bug causing random project names when hide project names enabled.
`vscode-wakatime#162 <https://github.com/wakatime/vscode-wakatime/issues/61>`_
- Add support for UNC network shares without drive letter mapped on Winows.
`wakatime#162 <https://github.com/wakatime/wakatime/issues/162>`_
8.3.3 (2018-12-19)
++++++++++++++++++
- Upgrade wakatime-cli to v10.6.1.
- Correctly parse include_only_with_project_file when set to false.
`wakatime#161 <https://github.com/wakatime/wakatime/issues/161>`_
- Support language argument for non-file entity types.
- Send 25 heartbeats per API request.
- New category "Writing Tests".
`wakatime#156 <https://github.com/wakatime/wakatime/issues/156>`_
- Fix bug caused by git config section without any submodule option defined.
`wakatime#152 <https://github.com/wakatime/wakatime/issues/152>`_
8.3.2 (2018-10-06)
++++++++++++++++++
- Send buffered heartbeats to API every 30 seconds.
8.3.1 (2018-10-05)
++++++++++++++++++
- Upgrade wakatime-cli to v10.4.1.
- Send 50 offline heartbeats to API per request with 1 second delay in between.
8.3.0 (2018-10-03)
++++++++++++++++++
- Upgrade wakatime-cli to v10.4.0.
- Support logging coding activity to remote network drive files on Windows
platform by detecting UNC path from drive letter.
`wakatime#72 <https://github.com/wakatime/wakatime/issues/72>`_
8.2.0 (2018-09-30)
++++++++++++++++++
- Prevent opening cmd window on Windows when running wakatime-cli.
`#91 <https://github.com/wakatime/sublime-wakatime/issues/91>`_
- Upgrade wakatime-cli to v10.3.0.
- Re-enable detecting projects from Subversion folder on Windows platform.
- Prevent opening cmd window on Windows when detecting project from Subversion.
- Run tests on Windows using Appveyor.
8.1.2 (2018-09-20)
++++++++++++++++++
- Upgrade wakatime-cli to v10.2.4.
- Default --sync-offline-activity to 100 instead of 5, so offline coding is
synced to dashboard faster.
- Batch heartbeats in groups of 10 per api request.
- New config hide_project_name and argument --hide-project-names for
obfuscating project names when sending coding activity to api.
- Fix mispelled Gosu language.
`wakatime#137 <https://github.com/wakatime/wakatime/issues/137>`_
- Remove metadata when hiding project or file names.
- New --local-file argument to be used when --entity is a remote file.
- New argument --sync-offline-activity for configuring the maximum offline
heartbeats to sync to the WakaTime API.
8.1.1 (2018-04-26)
++++++++++++++++++
- Upgrade wakatime-cli to v10.2.1.
- Force forward slash for file paths.
- New --category argument.
- New --exclude-unknown-project argument and corresponding config setting.
- Support for project detection from git worktree folders.
8.1.0 (2018-04-03)
++++++++++++++++++
- Prefer Python3 over Python2 when running wakatime-cli core.
- Improve detection of Python3 on Ubuntu 17.10 platforms.
8.0.8 (2018-03-15)
++++++++++++++++++
- Upgrade wakatime-cli to v10.1.3.
- Smarter C vs C++ vs Objective-C language detection.
8.0.7 (2018-03-15)
++++++++++++++++++
- Upgrade wakatime-cli to v10.1.2.
- Detect dependencies from Swift, Objective-C, TypeScript and JavaScript files.
- Categorize .mjs files as JavaScript.
`wakatime#121 <https://github.com/wakatime/wakatime/issues/121>`_
- Detect dependencies from Elm, Haskell, Haxe, Kotlin, Rust, and Scala files.
- Improved Matlab vs Objective-C language detection.
`wakatime#129 <https://github.com/wakatime/wakatime/issues/129>`_
8.0.6 (2018-01-04)
++++++++++++++++++
- Upgrade wakatime-cli to v10.1.0.
- Ability to only track folders containing a .wakatime-project file using new
include_only_with_project_file argument and config option.
8.0.5 (2017-11-24)
++++++++++++++++++
- Upgrade wakatime-cli to v10.0.5.
- Fix bug that caused heartbeats to be cached locally instead of sent to API.
8.0.4 (2017-11-23)
++++++++++++++++++
- Upgrade wakatime-cli to v10.0.4.
- Improve Java dependency detection.
- Skip null or missing heartbeats from extra heartbeats argument.
8.0.3 (2017-11-22)
++++++++++++++++++
- Upgrade wakatime-cli to v10.0.3.
- Support saving unicode heartbeats when working offline.
`wakatime#112 <https://github.com/wakatime/wakatime/issues/112>`_
8.0.2 (2017-11-15)
++++++++++++++++++
- Upgrade wakatime-cli to v10.0.2.
- Limit bulk syncing to 5 heartbeats per request.
`wakatime#109 <https://github.com/wakatime/wakatime/issues/109>`_
8.0.1 (2017-11-09)
++++++++++++++++++
- Upgrade wakatime-cli to v10.0.1.
- Parse array of results from bulk heartbeats endpoint, only saving heartbeats
to local offline cache when they were not accepted by the api.
8.0.0 (2017-11-08)
++++++++++++++++++
- Upgrade wakatime-cli to v10.0.0.
- Upload multiple heartbeats to bulk endpoint for improved network performance.
`wakatime#107 <https://github.com/wakatime/wakatime/issues/107>`_
7.0.26 (2017-11-07)
++++++++++++++++++
- Upgrade wakatime-cli to v9.0.1.
- Fix bug causing 401 response when hidefilenames is enabled.
`wakatime#106 <https://github.com/wakatime/wakatime/issues/106>`_
7.0.25 (2017-11-05)
++++++++++++++++++
- Ability to override python binary location in sublime-settings file.
`#78 <https://github.com/wakatime/sublime-wakatime/issues/78>`_
- Upgrade wakatime-cli to v9.0.0.
- Detect project and branch names from git submodules.
`wakatime#105 <https://github.com/wakatime/wakatime/issues/105>`_
7.0.24 (2017-10-29)
++++++++++++++++++
- Upgrade wakatime-cli to v8.0.5.
- Allow passing string arguments wrapped in extra quotes for plugins which
cannot properly escape spaces in arguments.
- Upgrade pytz to v2017.2.
- Upgrade requests to v2.18.4.
- Upgrade tzlocal to v1.4.
- Use WAKATIME_HOME env variable for offline and session caching.
`wakatime#102 <https://github.com/wakatime/wakatime/issues/102>`_
7.0.23 (2017-09-14)
++++++++++++++++++
- Add "include" setting to bypass ignored files.
`#89 <https://github.com/wakatime/sublime-wakatime/issues/89>`_
7.0.22 (2017-06-08)
++++++++++++++++++
- Upgrade wakatime-cli to v8.0.3.
- Improve Matlab language detection.
7.0.21 (2017-05-24)
++++++++++++++++++
- Upgrade wakatime-cli to v8.0.2.
- Only treat proxy string as NTLM proxy after unable to connect with HTTPS and
SOCKS proxy.
- Support running automated tests on Linux, OS X, and Windows.
- Ability to disable SSL cert verification.
`wakatime#90 <https://github.com/wakatime/wakatime/issues/90>`_
- Disable line count stats for files larger than 2MB to improve performance.
- Print error saying Python needs upgrading when requests can't be imported.
7.0.20 (2017-04-10)
++++++++++++++++++
- Fix install instructions formatting.
7.0.19 (2017-04-10)
++++++++++++++++++
- Remove /var/www/ from default ignored folders.
7.0.18 (2017-03-16)
++++++++++++++++++
- Upgrade wakatime-cli to v8.0.0.
- No longer creating ~/.wakatime.cfg file, since only using Sublime settings.
7.0.17 (2017-03-01)
++++++++++++++++++
- Upgrade wakatime-cli to v7.0.4.
7.0.16 (2017-02-20)
++++++++++++++++++
- Upgrade wakatime-cli to v7.0.2.
7.0.15 (2017-02-13)
++++++++++++++++++
- Upgrade wakatime-cli to v6.2.2.
- Upgrade pygments library to v2.2.0 for improved language detection.
7.0.14 (2017-02-08)
++++++++++++++++++
- Upgrade wakatime-cli to v6.2.1.
- Allow boolean or list of regex patterns for hidefilenames config setting.
7.0.13 (2016-11-11)
++++++++++++++++++
- Support old Sublime Text with Python 2.6.
- Fix bug that prevented reading default api key from existing config file.
7.0.12 (2016-10-24)
++++++++++++++++++
- Upgrade wakatime-cli to v6.2.0.
- Exit with status code 104 when api key is missing or invalid. Exit with
status code 103 when config file missing or invalid.
- New WAKATIME_HOME env variable for setting path to config and log files.
- Improve debug warning message from unsupported dependency parsers.
7.0.11 (2016-09-23)
++++++++++++++++++
- Handle UnicodeDecodeError when when logging.
`#68 <https://github.com/wakatime/sublime-wakatime/issues/68>`_
7.0.10 (2016-09-22)
++++++++++++++++++
- Handle UnicodeDecodeError when looking for python.
`#68 <https://github.com/wakatime/sublime-wakatime/issues/68>`_
- Upgrade wakatime-cli to v6.0.9.
7.0.9 (2016-09-02)
++++++++++++++++++
- Upgrade wakatime-cli to v6.0.8.
7.0.8 (2016-07-21)
++++++++++++++++++
- Upgrade wakatime-cli to master version to fix debug logging encoding bug.
7.0.7 (2016-07-06)
++++++++++++++++++
- Upgrade wakatime-cli to v6.0.7.
- Handle unknown exceptions from requests library by deleting cached session
object because it could be from a previous conflicting version.
- New hostname setting in config file to set machine hostname. Hostname
argument takes priority over hostname from config file.
- Prevent logging unrelated exception when logging tracebacks.
- Use correct namespace for pygments.lexers.ClassNotFound exception so it is
caught when dependency detection not available for a language.
7.0.6 (2016-06-13)
++++++++++++++++++
- Upgrade wakatime-cli to v6.0.5.
- Upgrade pygments to v2.1.3 for better language coverage.
7.0.5 (2016-06-08)
++++++++++++++++++
- Upgrade wakatime-cli to master version to fix bug in urllib3 package causing
unhandled retry exceptions.
- Prevent tracking git branch with detached head.
7.0.4 (2016-05-21)
++++++++++++++++++

View File

@ -1,7 +1,6 @@
BSD 3-Clause License
Copyright (c) 2014 by the respective authors (see AUTHORS file).
All rights reserved.
Copyright (c) 2014 Alan Hamlett.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

View File

@ -1,56 +1,56 @@
sublime-wakatime
================
# sublime-wakatime
[![Coding time tracker](https://wakatime.com/badge/github/wakatime/sublime-wakatime.svg)](https://wakatime.com/badge/github/wakatime/sublime-wakatime)
Metrics, insights, and time tracking automatically generated from your programming activity.
Installation
------------
## Installation
1. Install [Package Control](https://packagecontrol.io/installation).
2. Using [Package Control](https://packagecontrol.io/docs/usage):
2. In Sublime, press `ctrl+shift+p`(Windows, Linux) or `cmd+shift+p`(OS X).
a) Inside Sublime, press `ctrl+shift+p`(Windows, Linux) or `cmd+shift+p`(OS X).
3. Type `install`, then press `enter` with `Package Control: Install Package` selected.
b) Type `install`, then press `enter` with `Package Control: Install Package` selected.
4. Type `wakatime`, then press `enter` with the `WakaTime` plugin selected.
c) Type `wakatime`, then press `enter` with the `WakaTime` plugin selected.
5. Enter your [api key](https://wakatime.com/settings#apikey), then press `enter`.
3. Enter your [api key](https://wakatime.com/settings#apikey), then press `enter`.
4. Use Sublime and your time will be tracked for you automatically.
5. Visit https://wakatime.com/dashboard to see your logged time.
6. Use Sublime and your coding activity will be displayed on your [WakaTime dashboard](https://wakatime.com).
Screen Shots
------------
## Screen Shots
![Project Overview](https://wakatime.com/static/img/ScreenShots/Screen-Shot-2016-03-21.png)
Unresponsive Plugin Warning
---------------------------
## Unresponsive Plugin Warning
In Sublime Text 2, if you get a warning message:
A plugin (WakaTime) may be making Sublime Text unresponsive by taking too long (0.017332s) in its on_modified callback.
To fix this, go to `Preferences > Settings - User` then add the following setting:
To fix this, go to `Preferences Settings - User` then add the following setting:
`"detect_slow_plugins": false`
Troubleshooting
---------------
## Troubleshooting
First, turn on debug mode in your `WakaTime.sublime-settings` file.
![sublime user settings](https://wakatime.com/static/img/ScreenShots/sublime-wakatime-settings-menu.png?v=2)
![sublime user settings](https://wakatime.com/static/img/ScreenShots/sublime-wakatime-settings-menu.png?v=3)
Add the line: `"debug": true`
Then, open your Sublime Console with `View -> Show Console` to see the plugin executing the wakatime cli process when sending a heartbeat. Also, tail your `$HOME/.wakatime.log` file to debug wakatime cli problems.
Then, open your Sublime Console with `View Show Console` ( CTRL + \` ) to see the plugin executing the wakatime cli process when sending a heartbeat.
Also, tail your `$HOME/.wakatime.log` file to debug wakatime cli problems.
For more general troubleshooting information, see [wakatime/wakatime#troubleshooting](https://github.com/wakatime/wakatime#troubleshooting).
The [How to Debug Plugins][how to debug] guide shows how to check when coding activity was last received from your editor using the [User Agents API][user agents api].
For more general troubleshooting info, see the [wakatime-cli Troubleshooting Section][wakatime-cli-help].
[wakatime-cli-help]: https://github.com/wakatime/wakatime#troubleshooting
[how to debug]: https://wakatime.com/faq#debug-plugins
[user agents api]: https://wakatime.com/developers#user_agents

View File

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
""" ==========================================================
File: WakaTime.py
Description: Automatic time tracking for Sublime Text 2 and 3.
@ -7,24 +8,26 @@ Website: https://wakatime.com/
==========================================================="""
__version__ = '7.0.4'
__version__ = '9.1.0'
import sublime
import sublime_plugin
import contextlib
import json
import os
import platform
import re
import subprocess
import sys
import time
import threading
import traceback
import urllib
import webbrowser
from datetime import datetime
from subprocess import STDOUT, PIPE
from zipfile import ZipFile
from subprocess import Popen, STDOUT, PIPE
try:
import _winreg as winreg # py2
except ImportError:
@ -40,11 +43,15 @@ except ImportError:
is_py2 = (sys.version_info[0] == 2)
is_py3 = (sys.version_info[0] == 3)
is_win = platform.system() == 'Windows'
if is_py2:
def u(text):
if text is None:
return None
if isinstance(text, unicode):
return text
try:
return text.decode('utf-8')
except:
@ -54,7 +61,13 @@ if is_py2:
try:
return unicode(text)
except:
return text
try:
return text.decode('utf-8', 'replace')
except:
try:
return unicode(str(text))
except:
return unicode('')
elif is_py3:
def u(text):
@ -71,7 +84,7 @@ elif is_py3:
try:
return str(text)
except:
return text
return text.decode('utf-8', 'replace')
else:
raise Exception('Unsupported Python version: {0}.{1}.{2}'.format(
@ -81,8 +94,22 @@ else:
))
class Popen(subprocess.Popen):
"""Patched Popen to prevent opening cmd window on Windows platform."""
def __init__(self, *args, **kwargs):
startupinfo = kwargs.get('startupinfo')
if is_win or True:
try:
startupinfo = startupinfo or subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
except AttributeError:
pass
kwargs['startupinfo'] = startupinfo
super(Popen, self).__init__(*args, **kwargs)
# globals
HEARTBEAT_FREQUENCY = 2
ST_VERSION = int(sublime.version())
PLUGIN_DIR = os.path.dirname(os.path.realpath(__file__))
API_CLIENT = os.path.join(PLUGIN_DIR, 'packages', 'wakatime', 'cli.py')
@ -93,8 +120,14 @@ LAST_HEARTBEAT = {
'file': None,
'is_write': False,
}
LAST_HEARTBEAT_SENT_AT = 0
LAST_FETCH_TODAY_CODING_TIME = 0
FETCH_TODAY_DEBOUNCE_COUNTER = 0
FETCH_TODAY_DEBOUNCE_SECONDS = 60
PYTHON_LOCATION = None
HEARTBEATS = queue.Queue()
HEARTBEAT_FREQUENCY = 2 # minutes between logging heartbeat when editing same file
SEND_BUFFER_SECONDS = 30 # seconds between sending buffered heartbeats to API
# Log Levels
@ -107,9 +140,45 @@ ERROR = 'ERROR'
# add wakatime package to path
sys.path.insert(0, os.path.join(PLUGIN_DIR, 'packages'))
try:
from wakatime.base import parseConfigFile
from wakatime.configs import parseConfigFile
except ImportError:
pass
def parseConfigFile():
return None
class ApiKey(object):
_key = None
def read(self):
if self._key:
return self._key
key = SETTINGS.get('api_key')
if key:
self._key = key
return self._key
try:
configs = parseConfigFile()
if configs:
if configs.has_option('settings', 'api_key'):
key = configs.get('settings', 'api_key')
if key:
self._key = key
return self._key
except:
pass
return self._key
def write(self, key):
global SETTINGS
self._key = key
SETTINGS.set('api_key', str(key))
sublime.save_settings(SETTINGS_FILE)
APIKEY = ApiKey()
def set_timeout(callback, seconds):
@ -135,81 +204,120 @@ def log(lvl, message, *args, **kwargs):
msg = message.format(*args)
elif len(kwargs) > 0:
msg = message.format(**kwargs)
print('[WakaTime] [{lvl}] {msg}'.format(lvl=lvl, msg=msg))
try:
print('[WakaTime] [{lvl}] {msg}'.format(lvl=lvl, msg=msg))
except UnicodeDecodeError:
print(u('[WakaTime] [{lvl}] {msg}').format(lvl=lvl, msg=u(msg)))
except RuntimeError:
set_timeout(lambda: log(lvl, message, *args, **kwargs), 0)
def resources_folder():
if platform.system() == 'Windows':
if is_win:
return os.path.join(os.getenv('APPDATA'), 'WakaTime')
else:
return os.path.join(os.path.expanduser('~'), '.wakatime')
def update_status_bar(status):
def update_status_bar(status=None, debounced=False, msg=None):
"""Updates the status bar."""
global LAST_FETCH_TODAY_CODING_TIME, FETCH_TODAY_DEBOUNCE_COUNTER
try:
if SETTINGS.get('status_bar_message'):
msg = datetime.now().strftime(SETTINGS.get('status_bar_message_fmt'))
if '{status}' in msg:
msg = msg.format(status=status)
if not msg and SETTINGS.get('status_bar_message') is not False and SETTINGS.get('status_bar_enabled'):
if SETTINGS.get('status_bar_coding_activity') and status == 'OK':
if debounced:
FETCH_TODAY_DEBOUNCE_COUNTER -= 1
if debounced or not LAST_FETCH_TODAY_CODING_TIME:
now = int(time.time())
if LAST_FETCH_TODAY_CODING_TIME and (FETCH_TODAY_DEBOUNCE_COUNTER > 0 or LAST_FETCH_TODAY_CODING_TIME > now - FETCH_TODAY_DEBOUNCE_SECONDS):
return
LAST_FETCH_TODAY_CODING_TIME = now
FetchStatusBarCodingTime().start()
return
else:
FETCH_TODAY_DEBOUNCE_COUNTER += 1
set_timeout(lambda: update_status_bar(status, debounced=True), FETCH_TODAY_DEBOUNCE_SECONDS)
return
else:
msg = 'WakaTime: {status}'.format(status=status)
if msg:
active_window = sublime.active_window()
if active_window:
for view in active_window.views():
view.set_status('wakatime', msg)
except RuntimeError:
set_timeout(lambda: update_status_bar(status), 0)
set_timeout(lambda: update_status_bar(status=status, debounced=debounced, msg=msg), 0)
def create_config_file():
"""Creates the .wakatime.cfg INI file in $HOME directory, if it does
not already exist.
"""
configFile = os.path.join(os.path.expanduser('~'), '.wakatime.cfg')
try:
with open(configFile) as fh:
pass
except IOError:
class FetchStatusBarCodingTime(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
self.debug = SETTINGS.get('debug')
self.api_key = APIKEY.read() or ''
self.proxy = SETTINGS.get('proxy')
self.python_binary = SETTINGS.get('python_binary')
def run(self):
if not self.api_key:
log(DEBUG, 'Missing WakaTime API key.')
return
python = self.python_binary
if not python or not python.strip():
python = python_binary()
if not python:
log(DEBUG, 'Missing Python.')
return
ua = 'sublime/%d sublime-wakatime/%s' % (ST_VERSION, __version__)
cmd = [
python,
API_CLIENT,
'--today',
'--key', str(bytes.decode(self.api_key.encode('utf8'))),
'--plugin', ua,
]
if self.debug:
cmd.append('--verbose')
if self.proxy:
cmd.extend(['--proxy', self.proxy])
log(DEBUG, ' '.join(obfuscate_apikey(cmd)))
try:
with open(configFile, 'w') as fh:
fh.write("[settings]\n")
fh.write("debug = false\n")
fh.write("hidefilenames = false\n")
except IOError:
process = Popen(cmd, stdout=PIPE, stderr=STDOUT)
output, err = process.communicate()
output = u(output)
retcode = process.poll()
if not retcode and output:
msg = 'Today: {output}'.format(output=output)
update_status_bar(msg=msg)
else:
log(DEBUG, 'wakatime-core today exited with status: {0}'.format(retcode))
if output:
log(DEBUG, u('wakatime-core today output: {0}').format(output))
except:
pass
def prompt_api_key():
global SETTINGS
create_config_file()
default_key = ''
try:
configs = parseConfigFile()
if configs is not None:
if configs.has_option('settings', 'api_key'):
default_key = configs.get('settings', 'api_key')
except:
pass
if SETTINGS.get('api_key'):
if APIKEY.read():
return True
else:
window = sublime.active_window()
if window:
def got_key(text):
if text:
SETTINGS.set('api_key', str(text))
sublime.save_settings(SETTINGS_FILE)
window = sublime.active_window()
if window:
window.show_input_panel('[WakaTime] Enter your wakatime.com api key:', default_key, got_key, None, None)
return True
else:
log(ERROR, 'Could not prompt for api key because no window found.')
return False
APIKEY.write(text)
window.show_input_panel('[WakaTime] Enter your wakatime.com api key:', '', got_key, None, None)
return True
else:
log(ERROR, 'Could not prompt for api key because no window found.')
return False
def python_binary():
@ -224,6 +332,21 @@ def python_binary():
'/usr/local/bin/',
'/usr/bin/',
]
if is_win and os.getenv('LOCALAPPDATA'):
appdata = os.getenv('LOCALAPPDATA')
ver = 39
while ver >= 27:
if ver >= 30 and ver <= 33:
ver -= 1
continue
paths.append('\\python{ver}\\'.format(ver=ver))
paths.append.push('\\Python{ver}\\'.format(ver=ver))
paths.append('{appdata}\\Programs\Python{ver}\\'.format(appdata=appdata, ver=ver))
paths.append('{appdata}\\Programs\Python{ver}-32\\'.format(appdata=appdata, ver=ver))
paths.append('{appdata}\\Programs\Python{ver}-64\\'.format(appdata=appdata, ver=ver))
ver -= 1
for path in paths:
path = find_python_in_folder(path)
if path is not None:
@ -250,7 +373,7 @@ def set_python_binary_location(path):
def find_python_from_registry(location, reg=None):
if platform.system() != 'Windows' or winreg is None:
if not is_win or winreg is None:
return None
if reg is None:
@ -302,19 +425,27 @@ def find_python_from_registry(location, reg=None):
reg=reg,
key=location,
))
except:
log(ERROR, 'Could not read registry value "{reg}\\{key}":\n{exc}'.format(
reg=reg,
key=location,
exc=traceback.format_exc(),
))
return val
def find_python_in_folder(folder, headless=True):
def find_python_in_folder(folder, python3=True, headless=True):
pattern = re.compile(r'\d+\.\d+')
path = 'python'
if folder is not None:
if folder:
path = os.path.realpath(os.path.join(folder, 'python'))
if headless:
if python3:
path = u(path) + u('3')
elif headless:
path = u(path) + u('w')
log(DEBUG, u('Looking for Python at: {0}').format(path))
log(DEBUG, u('Looking for Python at: {0}').format(u(path)))
try:
process = Popen([path, '--version'], stdout=PIPE, stderr=STDOUT)
output, err = process.communicate()
@ -326,9 +457,13 @@ def find_python_in_folder(folder, headless=True):
except:
log(DEBUG, u(sys.exc_info()[1]))
if headless:
path = find_python_in_folder(folder, headless=False)
if path is not None:
if python3:
path = find_python_in_folder(folder, python3=False, headless=headless)
if path:
return path
elif headless:
path = find_python_in_folder(folder, python3=python3, headless=False)
if path:
return path
return None
@ -428,11 +563,18 @@ def append_heartbeat(entity, timestamp, is_write, view, project, folders):
}
# process the queue of heartbeats in the future
seconds = 4
set_timeout(process_queue, seconds)
set_timeout(lambda: process_queue(timestamp), SEND_BUFFER_SECONDS)
def process_queue():
def process_queue(timestamp):
global LAST_HEARTBEAT_SENT_AT
# Prevent sending heartbeats more often than SEND_BUFFER_SECONDS
now = int(time.time())
if timestamp != LAST_HEARTBEAT['time'] and LAST_HEARTBEAT_SENT_AT > now - SEND_BUFFER_SECONDS:
return
LAST_HEARTBEAT_SENT_AT = now
try:
heartbeat = HEARTBEATS.get_nowait()
except queue.Empty:
@ -461,8 +603,12 @@ class SendHeartbeatsThread(threading.Thread):
threading.Thread.__init__(self)
self.debug = SETTINGS.get('debug')
self.api_key = SETTINGS.get('api_key', '')
self.api_key = APIKEY.read() or ''
self.ignore = SETTINGS.get('ignore', [])
self.include = SETTINGS.get('include', [])
self.hidefilenames = SETTINGS.get('hidefilenames')
self.proxy = SETTINGS.get('proxy')
self.python_binary = SETTINGS.get('python_binary')
self.heartbeat = heartbeat
self.has_extra_heartbeats = False
@ -499,11 +645,14 @@ class SendHeartbeatsThread(threading.Thread):
return heartbeat
def send_heartbeats(self):
if python_binary():
python = self.python_binary
if not python or not python.strip():
python = python_binary()
if python:
heartbeat = self.build_heartbeat(**self.heartbeat)
ua = 'sublime/%d sublime-wakatime/%s' % (ST_VERSION, __version__)
cmd = [
python_binary(),
python,
API_CLIENT,
'--entity', heartbeat['entity'],
'--time', str('%f' % heartbeat['timestamp']),
@ -518,25 +667,28 @@ class SendHeartbeatsThread(threading.Thread):
if heartbeat.get('cursorpos') is not None:
cmd.extend(['--cursorpos', heartbeat['cursorpos']])
for pattern in self.ignore:
cmd.extend(['--ignore', pattern])
cmd.extend(['--exclude', pattern])
for pattern in self.include:
cmd.extend(['--include', pattern])
if self.debug:
cmd.append('--verbose')
if self.hidefilenames:
cmd.append('--hidefilenames')
if self.proxy:
cmd.extend(['--proxy', self.proxy])
if self.has_extra_heartbeats:
cmd.append('--extra-heartbeats')
stdin = PIPE
extra_heartbeats = [self.build_heartbeat(**x) for x in self.extra_heartbeats]
extra_heartbeats = json.dumps(extra_heartbeats)
extra_heartbeats = json.dumps([self.build_heartbeat(**x) for x in self.extra_heartbeats])
inp = "{0}\n".format(extra_heartbeats).encode('utf-8')
else:
extra_heartbeats = None
stdin = None
inp = None
log(DEBUG, ' '.join(obfuscate_apikey(cmd)))
try:
process = Popen(cmd, stdin=stdin, stdout=PIPE, stderr=STDOUT)
inp = None
if self.has_extra_heartbeats:
inp = "{0}\n".format(extra_heartbeats)
inp = inp.encode('utf-8')
output, err = process.communicate(input=inp)
output = u(output)
retcode = process.poll()
@ -572,7 +724,7 @@ class DownloadPython(threading.Thread):
def run(self):
log(INFO, 'Downloading embeddable Python...')
ver = '3.5.0'
ver = '3.8.1'
arch = 'amd64' if platform.architecture()[0] == '64bit' else 'win32'
url = 'https://www.python.org/ftp/python/{ver}/python-{ver}-embed-{arch}.zip'.format(
ver=ver,
@ -589,7 +741,7 @@ class DownloadPython(threading.Thread):
urllib.request.urlretrieve(url, zip_file)
log(INFO, 'Extracting Python...')
with ZipFile(zip_file) as zf:
with contextlib.closing(ZipFile(zip_file)) as zf:
path = os.path.join(resources_folder(), 'python')
zf.extractall(path)
@ -606,11 +758,11 @@ def plugin_loaded():
SETTINGS = sublime.load_settings(SETTINGS_FILE)
log(INFO, 'Initializing WakaTime plugin v%s' % __version__)
update_status_bar('Initializing')
update_status_bar('Initializing...')
if not python_binary():
log(WARNING, 'Python binary not found.')
if platform.system() == 'Windows':
if is_win:
set_timeout(download_python, 0)
else:
sublime.error_message("Unable to find Python binary!\nWakaTime needs Python to work correctly.\n\nGo to https://www.python.org/downloads")
@ -622,6 +774,7 @@ def plugin_loaded():
def after_loaded():
if not prompt_api_key():
set_timeout(after_loaded, 0.5)
update_status_bar('OK')
# need to call plugin_loaded because only ST3 will auto-call it

View File

@ -6,18 +6,32 @@
// Your api key from https://wakatime.com/#apikey
// Set this in your User specific WakaTime.sublime-settings file.
"api_key": "",
// Ignore files; Files (including absolute paths) that match one of these
// POSIX regular expressions will not be logged.
"ignore": ["^/tmp/", "^/etc/", "^/var/", "COMMIT_EDITMSG$", "PULLREQ_EDITMSG$", "MERGE_MSG$", "TAG_EDITMSG$"],
// Debug mode. Set to true for verbose logging. Defaults to false.
"debug": false,
// Status bar message. Set to false to hide status bar message.
// Proxy with format https://user:pass@host:port or socks5://user:pass@host:port or domain\\user:pass.
"proxy": "",
// Ignore files; Files (including absolute paths) that match one of these
// POSIX regular expressions will not be logged.
"ignore": ["^/tmp/", "^/etc/", "^/var/(?!www/).*", "COMMIT_EDITMSG$", "PULLREQ_EDITMSG$", "MERGE_MSG$", "TAG_EDITMSG$"],
// Include files; Files (including absolute paths) that match one of these
// POSIX regular expressions will bypass your ignore setting.
"include": [".*"],
// Status bar for surfacing errors and displaying today's coding time. Set
// to false to hide. Defaults to true.
"status_bar_enabled": true,
// Show today's coding activity in WakaTime status bar item.
// Defaults to true.
"status_bar_message": true,
// Status bar message format.
"status_bar_message_fmt": "WakaTime {status} %I:%M %p"
"status_bar_coding_activity": true,
// Obfuscate file paths when sending to API. Your dashboard will no longer display coding activity per file.
"hidefilenames": false,
// Python binary location. Uses python from your PATH by default.
"python_binary": ""
}

View File

@ -1,9 +1,9 @@
__title__ = 'wakatime'
__description__ = 'Common interface to the WakaTime api.'
__url__ = 'https://github.com/wakatime/wakatime'
__version_info__ = ('6', '0', '3')
__version_info__ = ('13', '0', '4')
__version__ = '.'.join(__version_info__)
__author__ = 'Alan Hamlett'
__author_email__ = 'alan@wakatime.com'
__license__ = 'BSD'
__copyright__ = 'Copyright 2016 Alan Hamlett'
__copyright__ = 'Copyright 2017 Alan Hamlett'

349
packages/wakatime/api.py Normal file
View File

@ -0,0 +1,349 @@
# -*- coding: utf-8 -*-
"""
wakatime.api
~~~~~~~~~~~~
:copyright: (c) 2017 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
from __future__ import print_function
import base64
import logging
import sys
import traceback
from .compat import u, is_py3, json
from .constants import API_ERROR, AUTH_ERROR, SUCCESS, UNKNOWN_ERROR
from .offlinequeue import Queue
from .session_cache import SessionCache
from .utils import get_hostname, get_user_agent
from .packages import tzlocal
log = logging.getLogger('WakaTime')
try:
from .packages import requests
except ImportError: # pragma: nocover
log.traceback(logging.ERROR)
print(traceback.format_exc())
log.error('Please upgrade Python to the latest version.')
print('Please upgrade Python to the latest version.')
sys.exit(UNKNOWN_ERROR)
from .packages.requests.exceptions import RequestException
def send_heartbeats(heartbeats, args, configs, use_ntlm_proxy=False):
"""Send heartbeats to WakaTime API.
Returns `SUCCESS` when heartbeat was sent, otherwise returns an error code.
"""
if len(heartbeats) == 0:
return SUCCESS
api_url = args.api_url
if not api_url:
api_url = 'https://api.wakatime.com/api/v1/users/current/heartbeats.bulk'
log.debug('Sending heartbeats to api at %s' % api_url)
timeout = args.timeout
if not timeout:
timeout = 60
data = [h.sanitize().dict() for h in heartbeats]
log.debug(data)
# setup api request
request_body = json.dumps(data)
api_key = u(base64.b64encode(str.encode(args.key) if is_py3 else args.key))
auth = u('Basic {api_key}').format(api_key=api_key)
headers = {
'User-Agent': get_user_agent(args.plugin),
'Content-Type': 'application/json',
'Accept': 'application/json',
'Authorization': auth,
}
hostname = get_hostname(args)
if hostname:
headers['X-Machine-Name'] = u(hostname).encode('utf-8')
# add Olson timezone to request
try:
tz = tzlocal.get_localzone()
except:
tz = None
if tz:
headers['TimeZone'] = u(tz.zone).encode('utf-8')
session_cache = SessionCache()
session = session_cache.get()
should_try_ntlm = False
proxies = {}
if args.proxy:
if use_ntlm_proxy:
from .packages.requests_ntlm import HttpNtlmAuth
username = args.proxy.rsplit(':', 1)
password = ''
if len(username) == 2:
password = username[1]
username = username[0]
session.auth = HttpNtlmAuth(username, password, session)
else:
should_try_ntlm = '\\' in args.proxy
proxies['https'] = args.proxy
ssl_verify = not args.nosslverify
if args.ssl_certs_file and ssl_verify:
ssl_verify = args.ssl_certs_file
# send request to api
response, code = None, None
try:
response = session.post(api_url, data=request_body, headers=headers,
proxies=proxies, timeout=timeout,
verify=ssl_verify)
except RequestException:
if should_try_ntlm:
return send_heartbeats(heartbeats, args, configs, use_ntlm_proxy=True)
else:
exception_data = {
sys.exc_info()[0].__name__: u(sys.exc_info()[1]),
}
if log.isEnabledFor(logging.DEBUG):
exception_data['traceback'] = traceback.format_exc()
if args.offline:
queue = Queue(args, configs)
queue.push_many(heartbeats)
if log.isEnabledFor(logging.DEBUG):
log.warn(exception_data)
else:
log.error(exception_data)
except: # delete cached session when requests raises unknown exception
if should_try_ntlm:
return send_heartbeats(heartbeats, args, configs, use_ntlm_proxy=True)
else:
exception_data = {
sys.exc_info()[0].__name__: u(sys.exc_info()[1]),
'traceback': traceback.format_exc(),
}
if args.offline:
queue = Queue(args, configs)
queue.push_many(heartbeats)
log.warn(exception_data)
else:
code = response.status_code if response is not None else None
content = response.text if response is not None else None
if _success(code):
results = _get_results(response)
_process_server_results(heartbeats, code, content, results, args, configs)
session_cache.save(session)
return SUCCESS
else:
log.debug({
'response_code': code,
'response_text': content,
})
if should_try_ntlm:
return send_heartbeats(heartbeats, args, configs, use_ntlm_proxy=True)
_handle_unsent_heartbeats(heartbeats, code, content, args, configs)
session_cache.delete()
return AUTH_ERROR if code == 401 else API_ERROR
def get_time_today(args, use_ntlm_proxy=False):
"""Get coding time from WakaTime API for given time range.
Returns total time as string or `None` when unable to fetch summary from
the API. When verbose output enabled, returns error message when unable to
fetch summary.
"""
url = 'https://api.wakatime.com/api/v1/users/current/summaries'
timeout = args.timeout
if not timeout:
timeout = 60
api_key = u(base64.b64encode(str.encode(args.key) if is_py3 else args.key))
auth = u('Basic {api_key}').format(api_key=api_key)
headers = {
'User-Agent': get_user_agent(args.plugin),
'Accept': 'application/json',
'Authorization': auth,
}
session_cache = SessionCache()
session = session_cache.get()
should_try_ntlm = False
proxies = {}
if args.proxy:
if use_ntlm_proxy:
from .packages.requests_ntlm import HttpNtlmAuth
username = args.proxy.rsplit(':', 1)
password = ''
if len(username) == 2:
password = username[1]
username = username[0]
session.auth = HttpNtlmAuth(username, password, session)
else:
should_try_ntlm = '\\' in args.proxy
proxies['https'] = args.proxy
ssl_verify = not args.nosslverify
if args.ssl_certs_file and ssl_verify:
ssl_verify = args.ssl_certs_file
params = {
'start': 'today',
'end': 'today',
}
# send request to api
response, code = None, None
try:
response = session.get(url, params=params, headers=headers,
proxies=proxies, timeout=timeout,
verify=ssl_verify)
except RequestException:
if should_try_ntlm:
return get_time_today(args, use_ntlm_proxy=True)
session_cache.delete()
if log.isEnabledFor(logging.DEBUG):
exception_data = {
sys.exc_info()[0].__name__: u(sys.exc_info()[1]),
'traceback': traceback.format_exc(),
}
log.error(exception_data)
return '{0}: {1}'.format(sys.exc_info()[0].__name__, u(sys.exc_info()[1])), API_ERROR
return None, API_ERROR
except: # delete cached session when requests raises unknown exception
if should_try_ntlm:
return get_time_today(args, use_ntlm_proxy=True)
session_cache.delete()
if log.isEnabledFor(logging.DEBUG):
exception_data = {
sys.exc_info()[0].__name__: u(sys.exc_info()[1]),
'traceback': traceback.format_exc(),
}
log.error(exception_data)
return '{0}: {1}'.format(sys.exc_info()[0].__name__, u(sys.exc_info()[1])), API_ERROR
return None, API_ERROR
code = response.status_code if response is not None else None
content = response.text if response is not None else None
if code == requests.codes.ok:
try:
summary = response.json()['data'][0]
if len(summary['categories']) > 1:
text = ', '.join(['{0} {1}'.format(x['text'], x['name'].lower()) for x in summary['categories']])
else:
text = summary['grand_total']['text']
session_cache.save(session)
return text, SUCCESS
except:
if log.isEnabledFor(logging.DEBUG):
exception_data = {
sys.exc_info()[0].__name__: u(sys.exc_info()[1]),
'traceback': traceback.format_exc(),
}
log.error(exception_data)
return '{0}: {1}'.format(sys.exc_info()[0].__name__, u(sys.exc_info()[1])), API_ERROR
return None, API_ERROR
else:
if should_try_ntlm:
return get_time_today(args, use_ntlm_proxy=True)
session_cache.delete()
log.debug({
'response_code': code,
'response_text': content,
})
if log.isEnabledFor(logging.DEBUG):
return 'Error: {0}'.format(code), API_ERROR
return None, API_ERROR
def _process_server_results(heartbeats, code, content, results, args, configs):
log.debug({
'response_code': code,
'results': results,
})
for i in range(len(results)):
if len(heartbeats) <= i:
log.warn('Results from api not matching heartbeats sent.')
break
try:
c = results[i][1]
except:
log.traceback(logging.WARNING)
c = 0
try:
text = json.dumps(results[i][0])
except:
log.traceback(logging.WARNING)
text = ''
if not _success(c):
_handle_unsent_heartbeats([heartbeats[i]], c, text, args, configs)
leftover = len(heartbeats) - len(results)
if leftover > 0:
log.warn('Missing {0} results from api.'.format(leftover))
start = len(heartbeats) - leftover
_handle_unsent_heartbeats(heartbeats[start:], code, content, args, configs)
def _handle_unsent_heartbeats(heartbeats, code, content, args, configs):
if args.offline:
if code == 400:
log.error({
'response_code': code,
'response_content': content,
})
else:
if log.isEnabledFor(logging.DEBUG):
log.warn({
'response_code': code,
'response_content': content,
})
queue = Queue(args, configs)
queue.push_many(heartbeats)
else:
log.error({
'response_code': code,
'response_content': content,
})
def _get_results(response):
results = []
if response is not None:
try:
results = response.json()['responses']
except:
log.traceback(logging.WARNING)
return results
def _success(code):
return code == requests.codes.created or code == requests.codes.accepted

View File

@ -0,0 +1,412 @@
# -*- coding: utf-8 -*-
"""
wakatime.arguments
~~~~~~~~~~~~~~~~~~
Command-line arguments.
:copyright: (c) 2016 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
from __future__ import print_function
import os
import re
import time
import traceback
from .__about__ import __version__
from .compat import basestring
from .configs import getConfigFile, parseConfigFile
from .constants import AUTH_ERROR, DEFAULT_SYNC_OFFLINE_ACTIVITY, SUCCESS
from .packages import argparse
class FileAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
if isinstance(values, basestring) and values.startswith('"'):
values = re.sub(r'\\"', '"', values.strip('"'))
try:
if os.path.isfile(values):
values = os.path.realpath(values)
except: # pragma: nocover
pass
setattr(namespace, self.dest, values)
class StoreWithoutQuotes(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
if isinstance(values, basestring) and values.startswith('"'):
values = re.sub(r'\\"', '"', values.strip('"'))
setattr(namespace, self.dest, values)
def parse_arguments():
"""Parse command line arguments and configs from ~/.wakatime.cfg.
Command line arguments take precedence over config file settings.
Returns instances of ArgumentParser and SafeConfigParser.
"""
# define supported command line arguments
parser = argparse.ArgumentParser(description='Common interface for the ' +
'WakaTime api.')
parser.add_argument('--entity', dest='entity', metavar='FILE',
action=FileAction,
help='Absolute path to file for the heartbeat. Can ' +
'also be a url, domain or app when ' +
'--entity-type is not file.')
parser.add_argument('--file', dest='file', action=FileAction,
help=argparse.SUPPRESS)
parser.add_argument('--key', dest='key', action=StoreWithoutQuotes,
metavar='API_KEY',
help='Your wakatime api key; uses api_key from ' +
'~/.wakatime.cfg by default.')
parser.add_argument('--write', dest='is_write', action='store_true',
help='When set, tells api this heartbeat was ' +
'triggered from writing to a file.')
parser.add_argument('--plugin', dest='plugin', action=StoreWithoutQuotes,
help='Optional text editor plugin name and version ' +
'for User-Agent header.')
parser.add_argument('--time', dest='timestamp', metavar='time',
type=float, action=StoreWithoutQuotes,
help='Optional floating-point unix epoch timestamp. ' +
'Uses current time by default.')
parser.add_argument('--lineno', dest='lineno', action=StoreWithoutQuotes,
metavar='INT',
help='Optional line number. This is the current ' +
'line being edited.')
parser.add_argument('--cursorpos', dest='cursorpos',
metavar='INT', action=StoreWithoutQuotes,
help='Optional cursor position in the current file.')
parser.add_argument('--entity-type', dest='entity_type',
action=StoreWithoutQuotes,
help='Entity type for this heartbeat. Can be ' +
'"file", "domain" or "app". Defaults to "file".')
parser.add_argument('--category', dest='category',
action=StoreWithoutQuotes,
help='Category of this heartbeat activity. Can be ' +
'"coding", "building", "indexing", ' +
'"debugging", "running tests", ' +
'"writing tests", "manual testing", ' +
'"code reviewing", "browsing", or "designing". ' +
'Defaults to "coding".')
parser.add_argument('--proxy', dest='proxy', action=StoreWithoutQuotes,
help='Optional proxy configuration. Supports HTTPS '+
'and SOCKS proxies. For example: '+
'https://user:pass@host:port or '+
'socks5://user:pass@host:port or ' +
'domain\\user:pass')
parser.add_argument('--no-ssl-verify', dest='nosslverify',
action='store_true',
help='Disables SSL certificate verification for HTTPS '+
'requests. By default, SSL certificates are ' +
'verified.')
parser.add_argument('--ssl-certs-file', dest='ssl_certs_file',
metavar='FILE', action=StoreWithoutQuotes,
help='Override the bundled Python Requests CA certs ' +
'file. By default, uses certifi for ca certs.')
parser.add_argument('--project', dest='project', action=StoreWithoutQuotes,
help='Optional project name.')
parser.add_argument('--alternate-project', dest='alternate_project',
metavar='PROJECT', action=StoreWithoutQuotes,
help='Optional alternate project name. ' +
'Auto-discovered project takes priority.')
parser.add_argument('--alternate-language', dest='alternate_language',
action=StoreWithoutQuotes,
help=argparse.SUPPRESS)
parser.add_argument('--language', dest='language',
action=StoreWithoutQuotes,
help='Optional language name. If valid, takes ' +
'priority over auto-detected language.')
parser.add_argument('--local-file', dest='local_file', metavar='FILE',
action=FileAction,
help='Absolute path to local file for the ' +
'heartbeat. When --entity is a remote file, ' +
'this local file will be used for stats and ' +
'just the value of --entity sent with heartbeat.')
parser.add_argument('--hostname', dest='hostname',
action=StoreWithoutQuotes,
help='Hostname of current machine.')
parser.add_argument('--disable-offline', dest='offline',
action='store_false',
help='Disables offline time logging instead of ' +
'queuing logged time.')
parser.add_argument('--disableoffline', dest='offline_deprecated',
action='store_true',
help=argparse.SUPPRESS)
parser.add_argument('--hide-file-names', dest='hide_file_names',
action='store_true',
help='Obfuscate filenames. Will not send file names ' +
'to api.')
parser.add_argument('--hide-filenames', dest='hide_filenames',
action='store_true',
help=argparse.SUPPRESS)
parser.add_argument('--hidefilenames', dest='hidefilenames',
action='store_true',
help=argparse.SUPPRESS)
parser.add_argument('--hide-project-names', dest='hide_project_names',
action='store_true',
help='Obfuscate project names. When a project ' +
'folder is detected instead of using the ' +
'folder name as the project, a ' +
'.wakatime-project file is created with a ' +
'random project name.')
parser.add_argument('--hide-branch-names', dest='hide_branch_names',
action='store_true',
help='Obfuscate branch names. Will not send revision ' +
'control branch names to api.')
parser.add_argument('--exclude', dest='exclude', action='append',
metavar='PATH',
help='Filename patterns to exclude from logging. ' +
'POSIX regex syntax. Can be used more than once.')
parser.add_argument('--exclude-unknown-project',
dest='exclude_unknown_project', action='store_true',
help='When set, any activity where the project ' +
'cannot be detected will be ignored.')
parser.add_argument('--include', dest='include', action='append',
metavar='PATH',
help='Filename patterns to log. When used in ' +
'combination with --exclude, files matching ' +
'include will still be logged. POSIX regex ' +
'syntax. Can be used more than once.')
parser.add_argument('--include-only-with-project-file',
dest='include_only_with_project_file',
action='store_true',
help='Disables tracking folders unless they contain ' +
'a .wakatime-project file. Defaults to false.')
parser.add_argument('--ignore', dest='ignore', action='append',
help=argparse.SUPPRESS)
parser.add_argument('--extra-heartbeats', dest='extra_heartbeats',
action='store_true',
help='Reads extra heartbeats from STDIN as a JSON ' +
'array until EOF.')
parser.add_argument('--log-file', dest='log_file',
metavar='FILE', action=StoreWithoutQuotes,
help='Defaults to ~/.wakatime.log.')
parser.add_argument('--logfile', dest='logfile', action=StoreWithoutQuotes,
help=argparse.SUPPRESS)
parser.add_argument('--api-url', dest='api_url', action=StoreWithoutQuotes,
metavar='URL',
help='Heartbeats api url. For debugging with a ' +
'local server.')
parser.add_argument('--apiurl', dest='apiurl', action=StoreWithoutQuotes,
help=argparse.SUPPRESS)
parser.add_argument('--timeout', dest='timeout', type=int,
metavar='SECONDS', action=StoreWithoutQuotes,
help='Number of seconds to wait when sending ' +
'heartbeats to api. Defaults to 60 seconds.')
parser.add_argument('--sync-offline-activity', metavar='AMOUNT',
dest='sync_offline_activity',
action=StoreWithoutQuotes,
help='Amount of offline activity to sync from your ' +
'local ~/.wakatime.db sqlite3 file to your ' +
'WakaTime Dashboard before exiting. Can be ' +
'"none" or a positive integer number. Defaults ' +
'to 100, meaning for every heartbeat sent ' +
'while online, 100 offline heartbeats are ' +
'synced. Can be used without --entity to only ' +
'sync offline activity without generating new ' +
'heartbeats.')
parser.add_argument('--today', dest='today',
action='store_true',
help='Prints dashboard time for Today, then exits.')
parser.add_argument('--config', dest='config', action=StoreWithoutQuotes,
metavar='FILE', help='Defaults to ~/.wakatime.cfg.')
parser.add_argument('--config-section', dest='config_section',
metavar='SECTION', action=StoreWithoutQuotes,
help='Optional config section when reading or ' +
'writing a config key. Defaults to [settings].')
parser.add_argument('--config-read', dest='config_read',
metavar='KEY', action=StoreWithoutQuotes,
help='Prints value for the given config key, then ' +
'exits.')
parser.add_argument('--config-write', dest='config_write',
nargs=2, metavar=('KEY', 'VALUE'),
action=StoreWithoutQuotes,
help='Writes value to a config key, then exits. ' +
'Expects two arguments, key and value.')
parser.add_argument('--verbose', dest='verbose', action='store_true',
help='Turns on debug messages in log file.')
parser.add_argument('--version', action='version', version=__version__)
# parse command line arguments
args = parser.parse_args()
# parse ~/.wakatime.cfg file
configs = parseConfigFile(args.config)
if args.config_read:
section = args.config_section or 'settings'
key = args.config_read
print(configs.get(section, key))
raise SystemExit(SUCCESS)
if args.config_write:
section = args.config_section or 'settings'
key = args.config_write[0]
val = args.config_write[1]
if not configs.has_section(section):
configs.add_section(section)
configs.set(section, key, val)
with open(args.config or getConfigFile(), 'w', encoding='utf-8') as fh:
configs.write(fh)
raise SystemExit(SUCCESS)
# use current unix epoch timestamp by default
if not args.timestamp:
args.timestamp = time.time()
# update args from configs
if not args.hostname:
if configs.has_option('settings', 'hostname'):
args.hostname = configs.get('settings', 'hostname')
if not args.key:
default_key = None
if configs.has_option('settings', 'api_key'):
default_key = configs.get('settings', 'api_key')
elif configs.has_option('settings', 'apikey'):
default_key = configs.get('settings', 'apikey')
if default_key:
args.key = default_key
else:
try:
parser.error('Missing api key. Find your api key from wakatime.com/settings/api-key.')
except SystemExit:
raise SystemExit(AUTH_ERROR)
is_valid = not not re.match(r'^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$', args.key, re.I)
if not is_valid:
try:
parser.error('Invalid api key. Find your api key from wakatime.com/settings/api-key.')
except SystemExit:
raise SystemExit(AUTH_ERROR)
if not args.entity:
if args.file:
args.entity = args.file
elif (not args.sync_offline_activity or args.sync_offline_activity == 'none') and not args.today:
parser.error('argument --entity is required')
if not args.sync_offline_activity:
args.sync_offline_activity = DEFAULT_SYNC_OFFLINE_ACTIVITY
if args.sync_offline_activity == 'none':
args.sync_offline_activity = 0
try:
args.sync_offline_activity = int(args.sync_offline_activity)
if args.sync_offline_activity < 0:
raise Exception('Error')
except:
parser.error('argument --sync-offline-activity must be "none" or an integer number')
if not args.language and args.alternate_language:
args.language = args.alternate_language
if not args.exclude:
args.exclude = []
if configs.has_option('settings', 'ignore'):
try:
for pattern in configs.get('settings', 'ignore').split("\n"):
if pattern.strip() != '':
args.exclude.append(pattern)
except TypeError: # pragma: nocover
pass
if configs.has_option('settings', 'exclude'):
try:
for pattern in configs.get('settings', 'exclude').split("\n"):
if pattern.strip() != '':
args.exclude.append(pattern)
except TypeError: # pragma: nocover
pass
if not args.include_only_with_project_file and configs.has_option('settings', 'include_only_with_project_file'):
args.include_only_with_project_file = configs.get('settings', 'include_only_with_project_file') == 'true'
if not args.include:
args.include = []
if configs.has_option('settings', 'include'):
try:
for pattern in configs.get('settings', 'include').split("\n"):
if pattern.strip() != '':
args.include.append(pattern)
except TypeError: # pragma: nocover
pass
if not args.exclude_unknown_project and configs.has_option('settings', 'exclude_unknown_project'):
args.exclude_unknown_project = configs.getboolean('settings', 'exclude_unknown_project')
_boolean_or_list('hide_file_names', args, configs, alternative_names=['hide_filenames', 'hidefilenames'])
_boolean_or_list('hide_project_names', args, configs, alternative_names=['hide_projectnames', 'hideprojectnames'])
_boolean_or_list('hide_branch_names', args, configs, alternative_names=['hide_branchnames', 'hidebranchnames'], default=None)
if args.offline_deprecated:
args.offline = False
if args.offline and configs.has_option('settings', 'offline'):
args.offline = configs.getboolean('settings', 'offline')
if not args.proxy and configs.has_option('settings', 'proxy'):
args.proxy = configs.get('settings', 'proxy')
if args.proxy:
pattern = r'^((https?|socks5)://)?([^:@]+(:([^:@])+)?@)?[^:]+(:\d+)?$'
if '\\' in args.proxy:
pattern = r'^.*\\.+$'
is_valid = not not re.match(pattern, args.proxy, re.I)
if not is_valid:
parser.error('Invalid proxy. Must be in format ' +
'https://user:pass@host:port or ' +
'socks5://user:pass@host:port or ' +
'domain\\user:pass.')
if configs.has_option('settings', 'no_ssl_verify'):
args.nosslverify = configs.getboolean('settings', 'no_ssl_verify')
if configs.has_option('settings', 'ssl_certs_file'):
args.ssl_certs_file = configs.get('settings', 'ssl_certs_file')
if not args.verbose and configs.has_option('settings', 'verbose'):
args.verbose = configs.getboolean('settings', 'verbose')
if not args.verbose and configs.has_option('settings', 'debug'):
args.verbose = configs.getboolean('settings', 'debug')
if not args.log_file and args.logfile:
args.log_file = args.logfile
if not args.log_file and configs.has_option('settings', 'log_file'):
args.log_file = configs.get('settings', 'log_file')
if not args.log_file and os.environ.get('WAKATIME_HOME'):
home = os.environ.get('WAKATIME_HOME')
args.log_file = os.path.join(os.path.expanduser(home), '.wakatime.log')
if not args.api_url and args.apiurl:
args.api_url = args.apiurl
if not args.api_url and configs.has_option('settings', 'api_url'):
args.api_url = configs.get('settings', 'api_url')
if not args.timeout and configs.has_option('settings', 'timeout'):
try:
args.timeout = int(configs.get('settings', 'timeout'))
except ValueError:
print(traceback.format_exc())
return args, configs
def _boolean_or_list(config_name, args, configs, alternative_names=[], default=[]):
"""Get a boolean or list of regexes from args and configs."""
# when argument flag present, set to wildcard regex
for key in alternative_names + [config_name]:
if hasattr(args, key) and getattr(args, key):
setattr(args, config_name, ['.*'])
return
setattr(args, config_name, default)
option = None
alternative_names.insert(0, config_name)
for key in alternative_names:
if configs.has_option('settings', key):
option = configs.get('settings', key)
break
if option is not None:
if option.strip().lower() == 'true':
setattr(args, config_name, ['.*'])
elif option.strip().lower() == 'false':
setattr(args, config_name, [])
else:
for pattern in option.split("\n"):
if pattern.strip() != '':
if not getattr(args, config_name):
setattr(args, config_name, [])
getattr(args, config_name).append(pattern)

View File

@ -9,12 +9,18 @@
:license: BSD, see LICENSE for more details.
"""
import codecs
import os
import platform
import subprocess
import sys
is_py2 = (sys.version_info[0] == 2)
is_py26 = (sys.version_info[0] == 2 and sys.version_info[1] == 6)
is_py3 = (sys.version_info[0] == 3)
is_win = platform.system() == 'Windows'
if is_py2: # pragma: nocover
@ -31,7 +37,7 @@ if is_py2: # pragma: nocover
try:
return unicode(text)
except:
return text
return text.decode('utf-8', 'replace')
open = codecs.open
basestring = basestring
@ -52,7 +58,7 @@ elif is_py3: # pragma: nocover
try:
return str(text)
except:
return text
return text.decode('utf-8', 'replace')
open = open
basestring = (str, bytes)
@ -82,7 +88,7 @@ except ImportError: # pragma: nocover
if name.startswith('.'):
if not package:
raise TypeError("relative imports require the 'package' "
+ "argument")
"argument")
level = 0
for character in name:
if character != '.':
@ -91,3 +97,27 @@ except ImportError: # pragma: nocover
name = _resolve_name(name[level:], package, level)
__import__(name)
return sys.modules[name]
try:
from .packages import simplejson as json
except (ImportError, SyntaxError): # pragma: nocover
import json
class Popen(subprocess.Popen):
"""Patched Popen to prevent opening cmd window on Windows platform."""
def __init__(self, *args, **kwargs):
startupinfo = kwargs.get('startupinfo')
if is_win or True:
try:
startupinfo = startupinfo or subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
except AttributeError:
pass
kwargs['startupinfo'] = startupinfo
if 'env' not in kwargs:
kwargs['env'] = os.environ.copy()
kwargs['env']['LANG'] = 'en-US' if is_win else 'en_US.UTF-8'
subprocess.Popen.__init__(self, *args, **kwargs)

View File

@ -0,0 +1,64 @@
# -*- coding: utf-8 -*-
"""
wakatime.configs
~~~~~~~~~~~~~~~~
Config file parser.
:copyright: (c) 2016 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
from __future__ import print_function
import os
import traceback
from .compat import open
from .constants import CONFIG_FILE_PARSE_ERROR
try:
import configparser
except ImportError: # pragma: nocover
from .packages import configparser
def getConfigFile():
"""Returns the config file location.
If $WAKATIME_HOME env varialbe is defined, returns
$WAKATIME_HOME/.wakatime.cfg, otherwise ~/.wakatime.cfg.
"""
fileName = '.wakatime.cfg'
home = os.environ.get('WAKATIME_HOME')
if home:
return os.path.join(os.path.expanduser(home), fileName)
return os.path.join(os.path.expanduser('~'), fileName)
def parseConfigFile(configFile=None):
"""Returns a configparser.SafeConfigParser instance with configs
read from the config file. Default location of the config file is
at ~/.wakatime.cfg.
"""
# get config file location from ENV
if not configFile:
configFile = getConfigFile()
configs = configparser.ConfigParser(delimiters=('='), strict=False)
try:
with open(configFile, 'r', encoding='utf-8') as fh:
try:
configs.read_file(fh)
except configparser.Error:
print(traceback.format_exc())
raise SystemExit(CONFIG_FILE_PARSE_ERROR)
except IOError:
pass
return configs

View File

@ -9,10 +9,48 @@
:license: BSD, see LICENSE for more details.
"""
""" Success
Exit code used when a heartbeat was sent successfully.
"""
SUCCESS = 0
""" Api Error
Exit code used when the WakaTime API returned an error.
"""
API_ERROR = 102
""" Config File Parse Error
Exit code used when the ~/.wakatime.cfg config file could not be parsed.
"""
CONFIG_FILE_PARSE_ERROR = 103
""" Auth Error
Exit code used when our api key is invalid.
"""
AUTH_ERROR = 104
""" Unknown Error
Exit code used when there was an unhandled exception.
"""
UNKNOWN_ERROR = 105
MALFORMED_HEARTBEAT_ERROR = 106
""" Connection Error
Exit code used when there was proxy or other problem connecting to the WakaTime
API servers.
"""
CONNECTION_ERROR = 107
""" Max file size supporting line number count stats.
Files larger than this in bytes will not have a line count stat for performance.
Default is 2MB.
"""
MAX_FILE_SIZE_SUPPORTED = 2000000
""" Default limit of number of offline heartbeats to sync before exiting."""
DEFAULT_SYNC_OFFLINE_ACTIVITY = 100
""" Number of heartbeats per api request.
Even when sending more heartbeats, this is the number of heartbeats sent per
individual https request to the WakaTime API.
"""
HEARTBEATS_PER_REQUEST = 25

View File

@ -12,7 +12,6 @@
import logging
import re
import sys
import traceback
from ..compat import u, open, import_module
from ..exceptions import NotYetImplemented
@ -68,7 +67,7 @@ class TokenParser(object):
pass
try:
with open(self.source_file, 'r', encoding=sys.getfilesystemencoding()) as fh:
return self.lexer.get_tokens_unprocessed(fh.read(512000))
return self.lexer.get_tokens_unprocessed(fh.read(512000)) # pragma: nocover
except:
pass
return []
@ -107,8 +106,8 @@ class DependencyParser(object):
self.lexer = lexer
if self.lexer:
module_name = self.lexer.__module__.rsplit('.', 1)[-1]
class_name = self.lexer.__class__.__name__.replace('Lexer', 'Parser', 1)
module_name = self.root_lexer.__module__.rsplit('.', 1)[-1]
class_name = self.root_lexer.__class__.__name__.replace('Lexer', 'Parser', 1)
else:
module_name = 'unknown'
class_name = 'UnknownParser'
@ -118,13 +117,23 @@ class DependencyParser(object):
try:
self.parser = getattr(module, class_name)
except AttributeError:
log.debug('Module {0} is missing class {1}'.format(module.__name__, class_name))
log.debug('Parsing dependencies not supported for {0}.{1}'.format(module_name, class_name))
except ImportError:
log.debug(traceback.format_exc())
log.debug('Parsing dependencies not supported for {0}.{1}'.format(module_name, class_name))
@property
def root_lexer(self):
if hasattr(self.lexer, 'root_lexer'):
return self.lexer.root_lexer
return self.lexer
def parse(self):
if self.parser:
plugin = self.parser(self.source_file, lexer=self.lexer)
dependencies = plugin.parse()
return list(set(dependencies))
def filter_dependencies(dep):
return dep and len(dep) <= 200
return list(filter(filter_dependencies, set(dependencies)))
return []

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
wakatime.languages.c_cpp
~~~~~~~~~~~~~~~~~~~~~~~~
wakatime.dependencies.c_cpp
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from C++ code.
@ -12,34 +12,6 @@
from . import TokenParser
class CppParser(TokenParser):
exclude = [
r'^stdio\.h$',
r'^stdlib\.h$',
r'^string\.h$',
r'^time\.h$',
]
def parse(self):
for index, token, content in self.tokens:
self._process_token(token, content)
return self.dependencies
def _process_token(self, token, content):
if self.partial(token) == 'Preproc':
self._process_preproc(token, content)
else:
self._process_other(token, content)
def _process_preproc(self, token, content):
if content.strip().startswith('include ') or content.strip().startswith("include\t"):
content = content.replace('include', '', 1).strip().strip('"').strip('<').strip('>').strip()
self.append(content)
def _process_other(self, token, content):
pass
class CParser(TokenParser):
exclude = [
r'^stdio\.h$',
@ -47,6 +19,7 @@ class CParser(TokenParser):
r'^string\.h$',
r'^time\.h$',
]
state = None
def parse(self):
for index, token, content in self.tokens:
@ -54,15 +27,25 @@ class CParser(TokenParser):
return self.dependencies
def _process_token(self, token, content):
if self.partial(token) == 'Preproc':
if self.partial(token) == 'Preproc' or self.partial(token) == 'PreprocFile':
self._process_preproc(token, content)
else:
self._process_other(token, content)
def _process_preproc(self, token, content):
if content.strip().startswith('include ') or content.strip().startswith("include\t"):
content = content.replace('include', '', 1).strip().strip('"').strip('<').strip('>').strip()
self.append(content)
if self.state == 'include':
if content != '\n' and content != '#':
content = content.strip().strip('"').strip('<').strip('>').strip()
self.append(content, truncate=True, separator='/')
self.state = None
elif content.strip().startswith('include'):
self.state = 'include'
else:
self.state = None
def _process_other(self, token, content):
pass
class CppParser(CParser):
pass

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
wakatime.languages.data
~~~~~~~~~~~~~~~~~~~~~~~
wakatime.dependencies.data
~~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from data files.

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
wakatime.languages.dotnet
~~~~~~~~~~~~~~~~~~~~~~~~~
wakatime.dependencies.dotnet
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from .NET code.

View File

@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
"""
wakatime.dependencies.elm
~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from Elm code.
:copyright: (c) 2018 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
from . import TokenParser
class ElmParser(TokenParser):
state = None
def parse(self):
for index, token, content in self.tokens:
self._process_token(token, content)
return self.dependencies
def _process_token(self, token, content):
if self.partial(token) == 'Namespace':
self._process_namespace(token, content)
elif self.partial(token) == 'Text':
self._process_text(token, content)
elif self.partial(token) == 'Class':
self._process_class(token, content)
else:
self._process_other(token, content)
def _process_namespace(self, token, content):
self.state = content.strip()
def _process_class(self, token, content):
if self.state == 'import':
self.append(self._format(content))
def _process_text(self, token, content):
pass
def _process_other(self, token, content):
self.state = None
def _format(self, content):
return content.strip().split('.')[0].strip()

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
wakatime.languages.go
~~~~~~~~~~~~~~~~~~~~~
wakatime.dependencies.go
~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from Go code.

View File

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
"""
wakatime.dependencies.haskell
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from Haskell code.
:copyright: (c) 2018 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
from . import TokenParser
class HaskellParser(TokenParser):
state = None
def parse(self):
for index, token, content in self.tokens:
self._process_token(token, content)
return self.dependencies
def _process_token(self, token, content):
if self.partial(token) == 'Reserved':
self._process_reserved(token, content)
elif self.partial(token) == 'Namespace':
self._process_namespace(token, content)
elif self.partial(token) == 'Keyword':
self._process_keyword(token, content)
elif self.partial(token) == 'Text':
self._process_text(token, content)
else:
self._process_other(token, content)
def _process_reserved(self, token, content):
self.state = content.strip()
def _process_namespace(self, token, content):
if self.state == 'import':
self.append(self._format(content))
def _process_keyword(self, token, content):
if self.state != 'import' or content.strip() != 'qualified':
self.state = None
def _process_text(self, token, content):
pass
def _process_other(self, token, content):
self.state = None
def _format(self, content):
return content.strip().split('.')[0].strip()

View File

@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
"""
wakatime.dependencies.haxe
~~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from Haxe code.
:copyright: (c) 2018 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
from . import TokenParser
class HaxeParser(TokenParser):
exclude = [
r'^haxe$',
]
state = None
def parse(self):
for index, token, content in self.tokens:
self._process_token(token, content)
return self.dependencies
def _process_token(self, token, content):
if self.partial(token) == 'Namespace':
self._process_namespace(token, content)
elif self.partial(token) == 'Text':
self._process_text(token, content)
else:
self._process_other(token, content)
def _process_namespace(self, token, content):
if self.state == 'import':
self.append(self._format(content))
self.state = None
else:
self.state = content
def _process_text(self, token, content):
pass
def _process_other(self, token, content):
self.state = None
def _format(self, content):
return content.strip()

View File

@ -1,9 +1,9 @@
# -*- coding: utf-8 -*-
"""
wakatime.languages.templates
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
wakatime.dependencies.html
~~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from Templates.
Parse dependencies from HTML.
:copyright: (c) 2014 Alan Hamlett.
:license: BSD, see LICENSE for more details.
@ -20,7 +20,7 @@ KEYWORDS = [
'_',
'$',
'angular',
'assert', # probably mocha
'assert', # probably mocha
'backbone',
'batman',
'c3',
@ -29,13 +29,13 @@ KEYWORDS = [
'chai',
'chaplin',
'd3',
'define', # probably require
'describe', # mocha or jasmine
'define', # probably require
'describe', # mocha or jasmine
'eco',
'ember',
'espresso',
'expect', # probably jasmine
'exports', # probably npm
'expect', # probably jasmine
'exports', # probably npm
'express',
'gulp',
'handlebars',
@ -43,8 +43,8 @@ KEYWORDS = [
'jasmine',
'jquery',
'jstz',
'ko', # probably knockout
'm', # probably mithril
'ko', # probably knockout
'm', # probably mithril
'marionette',
'meteor',
'moment',
@ -56,7 +56,7 @@ KEYWORDS = [
'qunit',
'react',
'reactive',
'require', # probably the commonjs spec
'require', # probably the commonjs spec
'ripple',
'rivets',
'socketio',
@ -69,8 +69,9 @@ KEYWORDS = [
]
class HtmlDjangoParser(TokenParser):
class HtmlParser(TokenParser):
tags = []
opening_tag = False
getting_attrs = False
current_attr = None
current_attr_value = None
@ -81,7 +82,9 @@ class HtmlDjangoParser(TokenParser):
return self.dependencies
def _process_token(self, token, content):
if u(token) == 'Token.Name.Tag':
if u(token) == 'Token.Punctuation':
self._process_punctuation(token, content)
elif u(token) == 'Token.Name.Tag':
self._process_tag(token, content)
elif u(token) == 'Token.Literal.String':
self._process_string(token, content)
@ -92,26 +95,30 @@ class HtmlDjangoParser(TokenParser):
def current_tag(self):
return None if len(self.tags) == 0 else self.tags[0]
def _process_tag(self, token, content):
def _process_punctuation(self, token, content):
if content.startswith('</') or content.startswith('/'):
try:
self.tags.pop(0)
except IndexError:
# ignore errors from malformed markup
pass
self.opening_tag = False
self.getting_attrs = False
elif content.startswith('<'):
self.opening_tag = True
elif content.startswith('>'):
self.opening_tag = False
self.getting_attrs = False
def _process_tag(self, token, content):
if self.opening_tag:
self.tags.insert(0, content.replace('<', '', 1).strip().lower())
self.getting_attrs = True
elif content.startswith('>'):
self.getting_attrs = False
self.current_attr = None
def _process_attribute(self, token, content):
if self.getting_attrs:
self.current_attr = content.lower().strip('=')
else:
self.current_attr = None
self.current_attr_value = None
def _process_string(self, token, content):
@ -134,65 +141,3 @@ class HtmlDjangoParser(TokenParser):
elif content.startswith('"') or content.startswith("'"):
if self.current_attr_value is None:
self.current_attr_value = content
else:
self.current_attr_value += content
class VelocityHtmlParser(HtmlDjangoParser):
pass
class MyghtyHtmlParser(HtmlDjangoParser):
pass
class MasonParser(HtmlDjangoParser):
pass
class MakoHtmlParser(HtmlDjangoParser):
pass
class CheetahHtmlParser(HtmlDjangoParser):
pass
class HtmlGenshiParser(HtmlDjangoParser):
pass
class RhtmlParser(HtmlDjangoParser):
pass
class HtmlPhpParser(HtmlDjangoParser):
pass
class HtmlSmartyParser(HtmlDjangoParser):
pass
class EvoqueHtmlParser(HtmlDjangoParser):
pass
class ColdfusionHtmlParser(HtmlDjangoParser):
pass
class LassoHtmlParser(HtmlDjangoParser):
pass
class HandlebarsHtmlParser(HtmlDjangoParser):
pass
class YamlJinjaParser(HtmlDjangoParser):
pass
class TwigHtmlParser(HtmlDjangoParser):
pass

View File

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
"""
wakatime.dependencies.javascript
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from JavaScript code.
:copyright: (c) 2018 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
import re
from . import TokenParser
class JavascriptParser(TokenParser):
state = None
extension = re.compile(r'\.\w{1,4}$')
def parse(self):
for index, token, content in self.tokens:
self._process_token(token, content)
return self.dependencies
def _process_token(self, token, content):
if self.partial(token) == 'Reserved':
self._process_reserved(token, content)
elif self.partial(token) == 'Single':
self._process_string(token, content)
elif self.partial(token) == 'Punctuation':
self._process_punctuation(token, content)
else:
self._process_other(token, content)
def _process_reserved(self, token, content):
if self.state is None:
self.state = content
def _process_string(self, token, content):
if self.state == 'import':
self.append(self._format_module(content))
self.state = None
def _process_punctuation(self, token, content):
if content == ';':
self.state = None
def _process_other(self, token, content):
pass
def _format_module(self, content):
content = content.strip().strip('"').strip("'").strip()
content = content.split('/')[-1].split('\\')[-1]
content = self.extension.sub('', content, count=1)
return content
class TypeScriptParser(JavascriptParser):
pass

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
wakatime.languages.java
~~~~~~~~~~~~~~~~~~~~~~~
wakatime.dependencies.java
~~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from Java code.
@ -43,7 +43,7 @@ class JavaParser(TokenParser):
self._process_other(token, content)
def _process_namespace(self, token, content):
if u(content) == u('import'):
if u(content).split() and u(content).split()[0] == u('import'):
self.state = 'import'
elif self.state == 'import':
@ -94,3 +94,89 @@ class JavaParser(TokenParser):
def _process_other(self, token, content):
pass
class KotlinParser(TokenParser):
state = None
exclude = [
r'^java\.',
]
def parse(self):
for index, token, content in self.tokens:
self._process_token(token, content)
return self.dependencies
def _process_token(self, token, content):
if self.partial(token) == 'Keyword':
self._process_keyword(token, content)
elif self.partial(token) == 'Text':
self._process_text(token, content)
elif self.partial(token) == 'Namespace':
self._process_namespace(token, content)
else:
self._process_other(token, content)
def _process_keyword(self, token, content):
self.state = content
def _process_text(self, token, content):
pass
def _process_namespace(self, token, content):
if self.state == 'import':
self.append(self._format(content))
self.state = None
def _process_other(self, token, content):
self.state = None
def _format(self, content):
content = content.split(u('.'))
if content[-1] == u('*'):
content = content[:len(content) - 1]
if len(content) == 0:
return None
if len(content) == 1:
return content[0]
return u('.').join(content[:2])
class ScalaParser(TokenParser):
state = None
def parse(self):
for index, token, content in self.tokens:
self._process_token(token, content)
return self.dependencies
def _process_token(self, token, content):
if self.partial(token) == 'Keyword':
self._process_keyword(token, content)
elif self.partial(token) == 'Text':
self._process_text(token, content)
elif self.partial(token) == 'Namespace':
self._process_namespace(token, content)
else:
self._process_other(token, content)
def _process_keyword(self, token, content):
self.state = content
def _process_text(self, token, content):
pass
def _process_namespace(self, token, content):
if self.state == 'import':
self.append(self._format(content))
self.state = None
def _process_other(self, token, content):
self.state = None
def _format(self, content):
return content.strip().lstrip('__root__').strip('_').strip('.')

View File

@ -0,0 +1,84 @@
# -*- coding: utf-8 -*-
"""
wakatime.dependencies.objective
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from Objective-C and Swift code.
:copyright: (c) 2018 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
import re
from . import TokenParser
class SwiftParser(TokenParser):
state = None
exclude = [
r'^foundation$',
]
def parse(self):
for index, token, content in self.tokens:
self._process_token(token, content)
return self.dependencies
def _process_token(self, token, content):
if self.partial(token) == 'Declaration':
self._process_declaration(token, content)
elif self.partial(token) == 'Class':
self._process_class(token, content)
else:
self._process_other(token, content)
def _process_declaration(self, token, content):
if self.state is None:
self.state = content
def _process_class(self, token, content):
if self.state == 'import':
self.append(content)
self.state = None
def _process_other(self, token, content):
pass
class ObjectiveCParser(TokenParser):
state = None
extension = re.compile(r'\.[mh]$')
def parse(self):
for index, token, content in self.tokens:
self._process_token(token, content)
return self.dependencies
def _process_token(self, token, content):
if self.partial(token) == 'Preproc':
self._process_preproc(token, content)
else:
self._process_other(token, content)
def _process_preproc(self, token, content):
if self.state:
self._process_import(token, content)
self.state = content
def _process_import(self, token, content):
if self.state == '#' and content.startswith('import '):
self.append(self._format(content))
self.state = None
def _process_other(self, token, content):
pass
def _format(self, content):
content = content.strip().lstrip('import ').strip()
content = content.strip('"').strip("'").strip()
content = content.strip('<').strip('>').strip()
content = content.split('/')[0]
content = self.extension.sub('', content, count=1)
return content

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
wakatime.languages.php
~~~~~~~~~~~~~~~~~~~~~~
wakatime.dependencies.php
~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from PHP code.
@ -16,6 +16,10 @@ from ..compat import u
class PhpParser(TokenParser):
state = None
parens = 0
exclude = [
r'^app$',
r'app\.php$',
]
def parse(self):
for index, token, content in self.tokens:

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
wakatime.languages.python
~~~~~~~~~~~~~~~~~~~~~~~~~
wakatime.dependencies.python
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from Python code.
@ -18,7 +18,9 @@ class PythonParser(TokenParser):
nonpackage = False
exclude = [
r'^os$',
r'^sys$',
r'^sys\.',
r'^__future__$',
]
def parse(self):
@ -48,9 +50,7 @@ class PythonParser(TokenParser):
self._process_import(token, content)
def _process_operator(self, token, content):
if self.state is not None:
if content == '.':
self.nonpackage = True
pass
def _process_punctuation(self, token, content):
if content == '(':
@ -73,8 +73,6 @@ class PythonParser(TokenParser):
if self.state == 'from':
self.append(content, truncate=True, truncate_to=1)
self.state = 'from-2'
elif self.state == 'from-2' and content != 'import':
self.append(content, truncate=True, truncate_to=1)
elif self.state == 'import':
self.append(content, truncate=True, truncate_to=1)
self.state = 'import-2'

View File

@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
"""
wakatime.dependencies.rust
~~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from Rust code.
:copyright: (c) 2018 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
from . import TokenParser
class RustParser(TokenParser):
state = None
def parse(self):
for index, token, content in self.tokens:
self._process_token(token, content)
return self.dependencies
def _process_token(self, token, content):
if self.partial(token) == 'Keyword':
self._process_keyword(token, content)
elif self.partial(token) == 'Whitespace':
self._process_whitespace(token, content)
elif self.partial(token) == 'Name':
self._process_name(token, content)
else:
self._process_other(token, content)
def _process_keyword(self, token, content):
if self.state == 'extern' and content == 'crate':
self.state = 'extern crate'
else:
self.state = content
def _process_whitespace(self, token, content):
pass
def _process_name(self, token, content):
if self.state == 'extern crate':
self.append(content)
self.state = None
def _process_other(self, token, content):
self.state = None

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
wakatime.languages.unknown
~~~~~~~~~~~~~~~~~~~~~~~~~~
wakatime.dependencies.unknown
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Parse dependencies from files of unknown language.

View File

@ -12,3 +12,8 @@
class NotYetImplemented(Exception):
"""This method needs to be implemented."""
class SkipHeartbeat(Exception):
"""Raised to prevent the current heartbeat from being sent."""
pass

View File

@ -0,0 +1,388 @@
# -*- coding: utf-8 -*-
"""
wakatime.heartbeat
~~~~~~~~~~~~~~~~~~
:copyright: (c) 2017 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
import os
import logging
import re
from subprocess import PIPE
from .compat import u, json, is_win, Popen
from .exceptions import SkipHeartbeat
from .project import get_project_info
from .stats import get_file_stats
from .utils import get_user_agent, should_exclude, format_file_path, find_project_file
log = logging.getLogger('WakaTime')
class Heartbeat(object):
"""Heartbeat data for sending to API or storing in offline cache."""
skip = False
args = None
configs = None
time = None
entity = None
type = None
category = None
is_write = None
project = None
branch = None
language = None
dependencies = None
lines = None
lineno = None
cursorpos = None
user_agent = None
_sensitive_when_hiding_filename = (
'dependencies',
'lines',
'lineno',
'cursorpos',
)
_sensitive_when_hiding_branch = (
'branch',
)
def __init__(self, data, args, configs, _clone=None):
if not data:
self.skip = u('Skipping because heartbeat data is missing.')
return
self.args = args
self.configs = configs
self.entity = data.get('entity')
self.time = data.get('time', data.get('timestamp'))
self.is_write = data.get('is_write')
self.user_agent = data.get('user_agent') or get_user_agent(args.plugin)
self.type = data.get('type', data.get('entity_type'))
if self.type not in ['file', 'domain', 'app']:
self.type = 'file'
self.category = data.get('category')
allowed_categories = [
'coding',
'building',
'indexing',
'debugging',
'running tests',
'manual testing',
'writing tests',
'browsing',
'code reviewing',
'designing',
]
if self.category not in allowed_categories:
self.category = None
if not _clone:
exclude = self._excluded_by_pattern()
if exclude:
self.skip = u('Skipping because matches exclude pattern: {pattern}').format(
pattern=u(exclude),
)
return
if self.type == 'file':
self.entity = format_file_path(self.entity)
self._format_local_file()
if not self._file_exists():
self.skip = u('File does not exist; ignoring this heartbeat.')
return
if self._excluded_by_missing_project_file():
self.skip = u('Skipping because missing .wakatime-project file in parent path.')
return
if args.local_file and not os.path.isfile(args.local_file):
args.local_file = None
project, branch = get_project_info(configs, self, data)
self.project = project
self.branch = branch
if self._excluded_by_unknown_project():
self.skip = u('Skipping because project unknown.')
return
try:
stats = get_file_stats(self.entity,
entity_type=self.type,
lineno=data.get('lineno'),
cursorpos=data.get('cursorpos'),
plugin=args.plugin,
language=data.get('language'),
local_file=args.local_file)
except SkipHeartbeat as ex:
self.skip = u(ex) or 'Skipping'
return
else:
self.project = data.get('project')
self.branch = data.get('branch')
stats = data
for key in ['language', 'dependencies', 'lines', 'lineno', 'cursorpos']:
if stats.get(key) is not None:
setattr(self, key, stats[key])
def update(self, attrs):
"""Return a copy of the current Heartbeat with updated attributes."""
data = self.dict()
data.update(attrs)
heartbeat = Heartbeat(data, self.args, self.configs, _clone=True)
return heartbeat
def sanitize(self):
"""Removes sensitive data including file names and dependencies.
Returns a Heartbeat.
"""
if self.entity is None:
return self
if self.type != 'file':
return self
if self._should_obfuscate_filename():
self._sanitize_metadata(keys=self._sensitive_when_hiding_filename)
if self._should_obfuscate_branch(default=True):
self._sanitize_metadata(keys=self._sensitive_when_hiding_branch)
extension = u(os.path.splitext(self.entity)[1])
self.entity = u('HIDDEN{0}').format(extension)
elif self.should_obfuscate_project():
self._sanitize_metadata(keys=self._sensitive_when_hiding_filename)
if self._should_obfuscate_branch(default=True):
self._sanitize_metadata(keys=self._sensitive_when_hiding_branch)
elif self._should_obfuscate_branch():
self._sanitize_metadata(keys=self._sensitive_when_hiding_branch)
return self
def json(self):
return json.dumps(self.dict())
def dict(self):
return {
'time': self.time,
'entity': self._unicode(self.entity),
'type': self.type,
'category': self.category,
'is_write': self.is_write,
'project': self._unicode(self.project),
'branch': self._unicode(self.branch),
'language': self._unicode(self.language),
'dependencies': self._unicode_list(self.dependencies),
'lines': self.lines,
'lineno': self.lineno,
'cursorpos': self.cursorpos,
'user_agent': self._unicode(self.user_agent),
}
def items(self):
return self.dict().items()
def get_id(self):
return u('{time}-{type}-{category}-{project}-{branch}-{entity}-{is_write}').format(
time=self.time,
type=self.type,
category=self.category,
project=self._unicode(self.project),
branch=self._unicode(self.branch),
entity=self._unicode(self.entity),
is_write=self.is_write,
)
def should_obfuscate_project(self):
"""Returns True if hide_project_names is true or the entity file path
matches one in the list of obfuscated project paths."""
for pattern in self.args.hide_project_names:
try:
compiled = re.compile(pattern, re.IGNORECASE)
if compiled.search(self.entity):
return True
except re.error as ex:
log.warning(u('Regex error ({msg}) for hide_project_names pattern: {pattern}').format(
msg=u(ex),
pattern=u(pattern),
))
return False
def _should_obfuscate_filename(self):
"""Returns True if hide_file_names is true or the entity file path
matches one in the list of obfuscated file paths."""
for pattern in self.args.hide_file_names:
try:
compiled = re.compile(pattern, re.IGNORECASE)
if compiled.search(self.entity):
return True
except re.error as ex:
log.warning(u('Regex error ({msg}) for hide_file_names pattern: {pattern}').format(
msg=u(ex),
pattern=u(pattern),
))
return False
def _should_obfuscate_branch(self, default=False):
"""Returns True if hide_file_names is true or the entity file path
matches one in the list of obfuscated file paths."""
# when project names or file names are hidden and hide_branch_names is
# not set, we default to hiding branch names along with file/project.
if default and self.args.hide_branch_names is None:
return True
if not self.branch or not self.args.hide_branch_names:
return False
for pattern in self.args.hide_branch_names:
try:
compiled = re.compile(pattern, re.IGNORECASE)
if compiled.search(self.entity) or compiled.search(self.branch):
return True
except re.error as ex:
log.warning(u('Regex error ({msg}) for hide_branch_names pattern: {pattern}').format(
msg=u(ex),
pattern=u(pattern),
))
return False
def _unicode(self, value):
if value is None:
return None
return u(value)
def _unicode_list(self, values):
if values is None:
return None
return [self._unicode(value) for value in values]
def _file_exists(self):
return (self.entity and os.path.isfile(self.entity) or
self.args.local_file and os.path.isfile(self.args.local_file))
def _format_local_file(self):
"""When args.local_file empty on Windows, tries to map args.entity to a
unc path.
Updates args.local_file in-place without returning anything.
"""
if self.type != 'file':
return
if not self.entity:
return
if not is_win:
return
if self._file_exists():
return
self.args.local_file = self._to_unc_path(self.entity)
def _to_unc_path(self, filepath):
drive, rest = self._splitdrive(filepath)
if not drive:
return filepath
stdout = None
try:
stdout, stderr = Popen(['net', 'use'], stdout=PIPE, stderr=PIPE).communicate()
except OSError:
pass
else:
if stdout:
cols = None
for line in stdout.strip().splitlines()[1:]:
line = u(line)
if not line.strip():
continue
if not cols:
cols = self._unc_columns(line)
continue
start, end = cols.get('local', (0, 0))
if not start and not end:
break
local = line[start:end].strip().split(':')[0].upper()
if not local.isalpha():
continue
if local == drive:
start, end = cols.get('remote', (0, 0))
if not start and not end:
break
remote = line[start:end].strip()
return remote + rest
return filepath
def _unc_columns(self, line):
cols = {}
current_col = u('')
newcol = False
start, end = 0, 0
for char in line:
if char.isalpha():
if newcol:
cols[current_col.strip().lower()] = (start, end)
current_col = u('')
start = end
newcol = False
current_col += u(char)
else:
newcol = True
end += 1
if start != end and current_col:
cols[current_col.strip().lower()] = (start, -1)
return cols
def _splitdrive(self, filepath):
if filepath[1:2] != ':' or not filepath[0].isalpha():
return None, filepath
return filepath[0].upper(), filepath[2:]
def _excluded_by_pattern(self):
return should_exclude(self.entity, self.args.include, self.args.exclude)
def _excluded_by_unknown_project(self):
if self.project:
return False
return self.args.exclude_unknown_project
def _excluded_by_missing_project_file(self):
if not self.args.include_only_with_project_file:
return False
return find_project_file(self.entity) is None
def _sanitize_metadata(self, keys=[]):
for key in keys:
setattr(self, key, None)
def __repr__(self):
return self.json()
def __bool__(self):
return not self.skip
def __nonzero__(self):
return self.__bool__()
def __getitem__(self, key):
return self.dict()[key]

View File

@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
"""
wakatime.language_priorities
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Overwrite pygments Lexer.priority attribute for specific languages.
:copyright: (c) 2017 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
LANGUAGES = {
'f#': 0.01,
'perl': 0.01,
'perl6': 0.01,
'typescript': 0.01,
}

View File

@ -1,80 +1,81 @@
{
"ActionScript": "ActionScript",
"ApacheConf": "ApacheConf",
"AppleScript": "AppleScript",
"ASP": "ASP",
"Assembly": "Assembly",
"Awk": "Awk",
"Bash": "Bash",
"Basic": "Basic",
"BrightScript": "BrightScript",
"C": "C",
"C#": "C#",
"C++": "C++",
"Clojure": "Clojure",
"Cocoa": "Cocoa",
"CoffeeScript": "CoffeeScript",
"ColdFusion": "ColdFusion",
"Common Lisp": "Common Lisp",
"CSHTML": "CSHTML",
"CSS": "CSS",
"Dart": "Dart",
"Delphi": "Delphi",
"Elixir": "Elixir",
"Elm": "Elm",
"Emacs Lisp": "Emacs Lisp",
"Erlang": "Erlang",
"F#": "F#",
"Fortran": "Fortran",
"Go": "Go",
"Gous": "Gosu",
"Groovy": "Groovy",
"Haml": "Haml",
"HaXe": "HaXe",
"Haskell": "Haskell",
"HTML": "HTML",
"INI": "INI",
"Jade": "Jade",
"Java": "Java",
"JavaScript": "JavaScript",
"JSON": "JSON",
"JSX": "JSX",
"Kotlin": "Kotlin",
"LESS": "LESS",
"Lua": "Lua",
"Markdown": "Markdown",
"Matlab": "Matlab",
"Mustache": "Mustache",
"OCaml": "OCaml",
"Objective-C": "Objective-C",
"Objective-C++": "Objective-C++",
"Objective-J": "Objective-J",
"Perl": "Perl",
"PHP": "PHP",
"PowerShell": "PowerShell",
"Prolog": "Prolog",
"Puppet": "Puppet",
"Python": "Python",
"R": "R",
"reStructuredText": "reStructuredText",
"Ruby": "Ruby",
"Rust": "Rust",
"Sass": "Sass",
"Scala": "Scala",
"Scheme": "Scheme",
"SCSS": "SCSS",
"Shell": "Shell",
"Slim": "Slim",
"Smalltalk": "Smalltalk",
"SQL": "SQL",
"Swift": "Swift",
"Text": "Text",
"Turing": "Turing",
"Twig": "Twig",
"TypeScript": "TypeScript",
"VB.net": "VB.net",
"VimL": "VimL",
"XAML": "XAML",
"XML": "XML",
"YAML": "YAML"
"actionscript": "ActionScript",
"apacheconf": "ApacheConf",
"applescript": "AppleScript",
"asp": "ASP",
"assembly": "Assembly",
"awk": "Awk",
"bash": "Bash",
"basic": "Basic",
"brightscript": "BrightScript",
"c": "C",
"c#": "C#",
"c++": "C++",
"clojure": "Clojure",
"cocoa": "Cocoa",
"coffeescript": "CoffeeScript",
"coldfusion": "ColdFusion",
"common lisp": "Common Lisp",
"cshtml": "CSHTML",
"css": "CSS",
"dart": "Dart",
"delphi": "Delphi",
"elixir": "Elixir",
"elm": "Elm",
"emacs lisp": "Emacs Lisp",
"erlang": "Erlang",
"f#": "F#",
"fortran": "Fortran",
"go": "Go",
"gosu": "Gosu",
"groovy": "Groovy",
"haml": "Haml",
"haskell": "Haskell",
"haxe": "Haxe",
"html": "HTML",
"ini": "INI",
"jade": "Jade",
"java": "Java",
"javascript": "JavaScript",
"json": "JSON",
"jsx": "JSX",
"kotlin": "Kotlin",
"less": "LESS",
"lua": "Lua",
"markdown": "Markdown",
"matlab": "Matlab",
"mustache": "Mustache",
"objective-c": "Objective-C",
"objective-c++": "Objective-C++",
"objective-j": "Objective-J",
"ocaml": "OCaml",
"perl": "Perl",
"php": "PHP",
"powershell": "PowerShell",
"prolog": "Prolog",
"puppet": "Puppet",
"python": "Python",
"r": "R",
"restructuredtext": "reStructuredText",
"ruby": "Ruby",
"rust": "Rust",
"sass": "Sass",
"scala": "Scala",
"scheme": "Scheme",
"scss": "SCSS",
"shell": "Shell",
"slim": "Slim",
"smalltalk": "Smalltalk",
"sql": "SQL",
"swift": "Swift",
"text": "Text",
"turing": "Turing",
"twig": "Twig",
"typescript": "TypeScript",
"typoscript": "TypoScript",
"vb.net": "VB.net",
"viml": "VimL",
"xaml": "XAML",
"xml": "XML",
"yaml": "YAML"
}

View File

@ -25,20 +25,6 @@ except (ImportError, SyntaxError): # pragma: nocover
import json
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, bytes): # pragma: nocover
obj = u(obj)
return json.dumps(obj)
try: # pragma: nocover
encoded = super(CustomEncoder, self).default(obj)
except UnicodeDecodeError: # pragma: nocover
obj = u(obj)
encoded = super(CustomEncoder, self).default(obj)
return encoded
class JsonFormatter(logging.Formatter):
def setup(self, timestamp, is_write, entity, version, plugin, verbose,
@ -55,32 +41,25 @@ class JsonFormatter(logging.Formatter):
data = OrderedDict([
('now', self.formatTime(record, self.datefmt)),
])
data['version'] = self.version
data['plugin'] = self.plugin
data['version'] = u(self.version)
if self.plugin:
data['plugin'] = u(self.plugin)
data['time'] = self.timestamp
if self.verbose:
data['caller'] = record.pathname
data['caller'] = u(record.pathname)
data['lineno'] = record.lineno
data['is_write'] = self.is_write
data['file'] = self.entity
if not self.is_write:
del data['is_write']
if self.is_write:
data['is_write'] = self.is_write
data['file'] = u(self.entity)
data['level'] = record.levelname
data['message'] = record.getMessage() if self.warnings else record.msg
if not self.plugin:
del data['plugin']
return CustomEncoder().encode(data)
data['message'] = u(record.getMessage() if self.warnings else record.msg)
return json.dumps(data)
def traceback_formatter(*args, **kwargs):
if 'level' in kwargs and (kwargs['level'].lower() == 'warn' or kwargs['level'].lower() == 'warning'):
logging.getLogger('WakaTime').warning(traceback.format_exc())
elif 'level' in kwargs and kwargs['level'].lower() == 'info':
logging.getLogger('WakaTime').info(traceback.format_exc())
elif 'level' in kwargs and kwargs['level'].lower() == 'debug':
logging.getLogger('WakaTime').debug(traceback.format_exc())
else:
logging.getLogger('WakaTime').error(traceback.format_exc())
def traceback(self, lvl=None):
logger = logging.getLogger('WakaTime')
if not lvl:
lvl = logger.getEffectiveLevel()
logger.log(lvl, traceback.format_exc())
def set_log_level(logger, args):
@ -96,7 +75,7 @@ def setup_logging(args, version):
for handler in logger.handlers:
logger.removeHandler(handler)
set_log_level(logger, args)
logfile = args.logfile
logfile = args.log_file
if not logfile:
logfile = '~/.wakatime.log'
handler = logging.FileHandler(os.path.expanduser(logfile))
@ -113,7 +92,7 @@ def setup_logging(args, version):
logger.addHandler(handler)
# add custom traceback logging method
logger.traceback = traceback_formatter
logger.traceback = formatter.traceback
warnings_formatter = JsonFormatter(datefmt='%Y/%m/%d %H:%M:%S %z')
warnings_formatter.setup(

View File

@ -3,7 +3,7 @@
wakatime.main
~~~~~~~~~~~~~
wakatime module entry point.
Module entry point.
:copyright: (c) 2013 Alan Hamlett.
:license: BSD, see LICENSE for more details.
@ -11,525 +11,96 @@
from __future__ import print_function
import base64
import logging
import os
import platform
import re
import sys
import time
import traceback
import socket
try:
import ConfigParser as configparser
except ImportError: # pragma: nocover
import configparser
pwd = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, os.path.dirname(pwd))
sys.path.insert(0, os.path.join(pwd, 'packages'))
from .compat import is_py26
if not is_py26:
sys.path.insert(0, os.path.join(pwd, 'packages', 'py27'))
from .__about__ import __version__
from .compat import u, open, is_py3
from .constants import (
API_ERROR,
AUTH_ERROR,
CONFIG_FILE_PARSE_ERROR,
SUCCESS,
UNKNOWN_ERROR,
MALFORMED_HEARTBEAT_ERROR,
)
from .api import send_heartbeats, get_time_today
from .arguments import parse_arguments
from .compat import u, json
from .constants import SUCCESS, UNKNOWN_ERROR, HEARTBEATS_PER_REQUEST
from .logger import setup_logging
from .offlinequeue import Queue
from .packages import argparse
from .packages import requests
from .packages.requests.exceptions import RequestException
from .project import get_project_info
from .session_cache import SessionCache
from .stats import get_file_stats
try:
from .packages import simplejson as json # pragma: nocover
except (ImportError, SyntaxError): # pragma: nocover
import json
from .packages import tzlocal
log = logging.getLogger('WakaTime')
class FileAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
try:
if os.path.isfile(values):
values = os.path.realpath(values)
except: # pragma: nocover
pass
setattr(namespace, self.dest, values)
def parseConfigFile(configFile=None):
"""Returns a configparser.SafeConfigParser instance with configs
read from the config file. Default location of the config file is
at ~/.wakatime.cfg.
"""
if not configFile:
configFile = os.path.join(os.path.expanduser('~'), '.wakatime.cfg')
configs = configparser.SafeConfigParser()
try:
with open(configFile, 'r', encoding='utf-8') as fh:
try:
configs.readfp(fh)
except configparser.Error:
print(traceback.format_exc())
return None
except IOError:
print(u('Error: Could not read from config file {0}').format(u(configFile)))
return configs
def parseArguments():
"""Parse command line arguments and configs from ~/.wakatime.cfg.
Command line arguments take precedence over config file settings.
Returns instances of ArgumentParser and SafeConfigParser.
"""
# define supported command line arguments
parser = argparse.ArgumentParser(
description='Common interface for the WakaTime api.')
parser.add_argument('--entity', dest='entity', metavar='FILE',
action=FileAction,
help='absolute path to file for the heartbeat; can also be a '+
'url, domain, or app when --entity-type is not file')
parser.add_argument('--file', dest='file', action=FileAction,
help=argparse.SUPPRESS)
parser.add_argument('--key', dest='key',
help='your wakatime api key; uses api_key from '+
'~/.wakatime.cfg by default')
parser.add_argument('--write', dest='is_write',
action='store_true',
help='when set, tells api this heartbeat was triggered from '+
'writing to a file')
parser.add_argument('--plugin', dest='plugin',
help='optional text editor plugin name and version '+
'for User-Agent header')
parser.add_argument('--time', dest='timestamp', metavar='time',
type=float,
help='optional floating-point unix epoch timestamp; '+
'uses current time by default')
parser.add_argument('--lineno', dest='lineno',
help='optional line number; current line being edited')
parser.add_argument('--cursorpos', dest='cursorpos',
help='optional cursor position in the current file')
parser.add_argument('--entity-type', dest='entity_type',
help='entity type for this heartbeat. can be one of "file", '+
'"domain", or "app"; defaults to file.')
parser.add_argument('--proxy', dest='proxy',
help='optional proxy configuration. Supports HTTPS '+
'and SOCKS proxies. For example: '+
'https://user:pass@host:port or '+
'socks5://user:pass@host:port')
parser.add_argument('--project', dest='project',
help='optional project name')
parser.add_argument('--alternate-project', dest='alternate_project',
help='optional alternate project name; auto-discovered project '+
'takes priority')
parser.add_argument('--alternate-language', dest='alternate_language',
help='optional alternate language name; auto-detected language'+
'takes priority')
parser.add_argument('--hostname', dest='hostname', help='hostname of '+
'current machine.')
parser.add_argument('--disableoffline', dest='offline',
action='store_false',
help='disables offline time logging instead of queuing logged time')
parser.add_argument('--hidefilenames', dest='hidefilenames',
action='store_true',
help='obfuscate file names; will not send file names to api')
parser.add_argument('--exclude', dest='exclude', action='append',
help='filename patterns to exclude from logging; POSIX regex '+
'syntax; can be used more than once')
parser.add_argument('--include', dest='include', action='append',
help='filename patterns to log; when used in combination with '+
'--exclude, files matching include will still be logged; '+
'POSIX regex syntax; can be used more than once')
parser.add_argument('--ignore', dest='ignore', action='append',
help=argparse.SUPPRESS)
parser.add_argument('--extra-heartbeats', dest='extra_heartbeats',
action='store_true',
help='reads extra heartbeats from STDIN as a JSON array until EOF')
parser.add_argument('--logfile', dest='logfile',
help='defaults to ~/.wakatime.log')
parser.add_argument('--apiurl', dest='api_url',
help='heartbeats api url; for debugging with a local server')
parser.add_argument('--timeout', dest='timeout', type=int,
help='number of seconds to wait when sending heartbeats to api; '+
'defaults to 60 seconds')
parser.add_argument('--config', dest='config',
help='defaults to ~/.wakatime.cfg')
parser.add_argument('--verbose', dest='verbose', action='store_true',
help='turns on debug messages in log file')
parser.add_argument('--version', action='version', version=__version__)
# parse command line arguments
args = parser.parse_args()
# use current unix epoch timestamp by default
if not args.timestamp:
args.timestamp = time.time()
# parse ~/.wakatime.cfg file
configs = parseConfigFile(args.config)
if configs is None:
return args, configs
# update args from configs
if not args.key:
default_key = None
if configs.has_option('settings', 'api_key'):
default_key = configs.get('settings', 'api_key')
elif configs.has_option('settings', 'apikey'):
default_key = configs.get('settings', 'apikey')
if default_key:
args.key = default_key
else:
parser.error('Missing api key')
if not args.entity:
if args.file:
args.entity = args.file
else:
parser.error('argument --entity is required')
if not args.exclude:
args.exclude = []
if configs.has_option('settings', 'ignore'):
try:
for pattern in configs.get('settings', 'ignore').split("\n"):
if pattern.strip() != '':
args.exclude.append(pattern)
except TypeError: # pragma: nocover
pass
if configs.has_option('settings', 'exclude'):
try:
for pattern in configs.get('settings', 'exclude').split("\n"):
if pattern.strip() != '':
args.exclude.append(pattern)
except TypeError: # pragma: nocover
pass
if not args.include:
args.include = []
if configs.has_option('settings', 'include'):
try:
for pattern in configs.get('settings', 'include').split("\n"):
if pattern.strip() != '':
args.include.append(pattern)
except TypeError: # pragma: nocover
pass
if args.offline and configs.has_option('settings', 'offline'):
args.offline = configs.getboolean('settings', 'offline')
if not args.hidefilenames and configs.has_option('settings', 'hidefilenames'):
args.hidefilenames = configs.getboolean('settings', 'hidefilenames')
if not args.proxy and configs.has_option('settings', 'proxy'):
args.proxy = configs.get('settings', 'proxy')
if not args.verbose and configs.has_option('settings', 'verbose'):
args.verbose = configs.getboolean('settings', 'verbose')
if not args.verbose and configs.has_option('settings', 'debug'):
args.verbose = configs.getboolean('settings', 'debug')
if not args.logfile and configs.has_option('settings', 'logfile'):
args.logfile = configs.get('settings', 'logfile')
if not args.api_url and configs.has_option('settings', 'api_url'):
args.api_url = configs.get('settings', 'api_url')
if not args.timeout and configs.has_option('settings', 'timeout'):
try:
args.timeout = int(configs.get('settings', 'timeout'))
except ValueError:
print(traceback.format_exc())
return args, configs
def should_exclude(entity, include, exclude):
if entity is not None and entity.strip() != '':
try:
for pattern in include:
try:
compiled = re.compile(pattern, re.IGNORECASE)
if compiled.search(entity):
return False
except re.error as ex:
log.warning(u('Regex error ({msg}) for include pattern: {pattern}').format(
msg=u(ex),
pattern=u(pattern),
))
except TypeError: # pragma: nocover
pass
try:
for pattern in exclude:
try:
compiled = re.compile(pattern, re.IGNORECASE)
if compiled.search(entity):
return pattern
except re.error as ex:
log.warning(u('Regex error ({msg}) for exclude pattern: {pattern}').format(
msg=u(ex),
pattern=u(pattern),
))
except TypeError: # pragma: nocover
pass
return False
def get_user_agent(plugin):
ver = sys.version_info
python_version = '%d.%d.%d.%s.%d' % (ver[0], ver[1], ver[2], ver[3], ver[4])
user_agent = u('wakatime/{ver} ({platform}) Python{py_ver}').format(
ver=u(__version__),
platform=u(platform.platform()),
py_ver=python_version,
)
if plugin:
user_agent = u('{user_agent} {plugin}').format(
user_agent=user_agent,
plugin=u(plugin),
)
else:
user_agent = u('{user_agent} Unknown/0').format(
user_agent=user_agent,
)
return user_agent
def send_heartbeat(project=None, branch=None, hostname=None, stats={}, key=None,
entity=None, timestamp=None, is_write=None, plugin=None,
offline=None, entity_type='file', hidefilenames=None,
proxy=None, api_url=None, timeout=None, **kwargs):
"""Sends heartbeat as POST request to WakaTime api server.
Returns `SUCCESS` when heartbeat was sent, otherwise returns an
error code constant.
"""
if not api_url:
api_url = 'https://api.wakatime.com/api/v1/heartbeats'
if not timeout:
timeout = 60
log.debug('Sending heartbeat to api at %s' % api_url)
data = {
'time': timestamp,
'entity': entity,
'type': entity_type,
}
if hidefilenames and entity is not None and entity_type == 'file':
extension = u(os.path.splitext(data['entity'])[1])
data['entity'] = u('HIDDEN{0}').format(extension)
if stats.get('lines'):
data['lines'] = stats['lines']
if stats.get('language'):
data['language'] = stats['language']
if stats.get('dependencies'):
data['dependencies'] = stats['dependencies']
if stats.get('lineno'):
data['lineno'] = stats['lineno']
if stats.get('cursorpos'):
data['cursorpos'] = stats['cursorpos']
if is_write:
data['is_write'] = is_write
if project:
data['project'] = project
if branch:
data['branch'] = branch
log.debug(data)
# setup api request
request_body = json.dumps(data)
api_key = u(base64.b64encode(str.encode(key) if is_py3 else key))
auth = u('Basic {api_key}').format(api_key=api_key)
headers = {
'User-Agent': get_user_agent(plugin),
'Content-Type': 'application/json',
'Accept': 'application/json',
'Authorization': auth,
}
if hostname:
headers['X-Machine-Name'] = u(hostname).encode('utf-8')
proxies = {}
if proxy:
proxies['https'] = proxy
# add Olson timezone to request
try:
tz = tzlocal.get_localzone()
except:
tz = None
if tz:
headers['TimeZone'] = u(tz.zone).encode('utf-8')
session_cache = SessionCache()
session = session_cache.get()
# log time to api
response = None
try:
response = session.post(api_url, data=request_body, headers=headers,
proxies=proxies, timeout=timeout)
except RequestException:
exception_data = {
sys.exc_info()[0].__name__: u(sys.exc_info()[1]),
}
if log.isEnabledFor(logging.DEBUG):
exception_data['traceback'] = traceback.format_exc()
if offline:
queue = Queue()
queue.push(data, json.dumps(stats), plugin)
if log.isEnabledFor(logging.DEBUG):
log.warn(exception_data)
else:
log.error(exception_data)
else:
code = response.status_code if response is not None else None
content = response.text if response is not None else None
if code == requests.codes.created or code == requests.codes.accepted:
log.debug({
'response_code': code,
})
session_cache.save(session)
return SUCCESS
if offline:
if code != 400:
queue = Queue()
queue.push(data, json.dumps(stats), plugin)
if code == 401:
log.error({
'response_code': code,
'response_content': content,
})
session_cache.delete()
return AUTH_ERROR
elif log.isEnabledFor(logging.DEBUG):
log.warn({
'response_code': code,
'response_content': content,
})
else:
log.error({
'response_code': code,
'response_content': content,
})
else:
log.error({
'response_code': code,
'response_content': content,
})
session_cache.delete()
return API_ERROR
def sync_offline_heartbeats(args, hostname):
"""Sends all heartbeats which were cached in the offline Queue."""
queue = Queue()
while True:
heartbeat = queue.pop()
if heartbeat is None:
break
status = send_heartbeat(
project=heartbeat['project'],
entity=heartbeat['entity'],
timestamp=heartbeat['time'],
branch=heartbeat['branch'],
hostname=hostname,
stats=json.loads(heartbeat['stats']),
key=args.key,
is_write=heartbeat['is_write'],
plugin=heartbeat['plugin'],
offline=args.offline,
hidefilenames=args.hidefilenames,
entity_type=heartbeat['type'],
proxy=args.proxy,
api_url=args.api_url,
timeout=args.timeout,
)
if status != SUCCESS:
if status == AUTH_ERROR:
return AUTH_ERROR
break
return SUCCESS
def process_heartbeat(args, configs, hostname, heartbeat):
exclude = should_exclude(heartbeat['entity'], args.include, args.exclude)
if exclude is not False:
log.debug(u('Skipping because matches exclude pattern: {pattern}').format(
pattern=u(exclude),
))
return SUCCESS
if heartbeat.get('entity_type') not in ['file', 'domain', 'app']:
heartbeat['entity_type'] = 'file'
if heartbeat['entity_type'] != 'file' or os.path.isfile(heartbeat['entity']):
stats = get_file_stats(heartbeat['entity'],
entity_type=heartbeat['entity_type'],
lineno=heartbeat.get('lineno'),
cursorpos=heartbeat.get('cursorpos'),
plugin=args.plugin,
alternate_language=heartbeat.get('alternate_language'))
project = heartbeat.get('project') or heartbeat.get('alternate_project')
branch = None
if heartbeat['entity_type'] == 'file':
project, branch = get_project_info(configs, heartbeat)
heartbeat['project'] = project
heartbeat['branch'] = branch
heartbeat['stats'] = stats
heartbeat['hostname'] = hostname
heartbeat['timeout'] = args.timeout
heartbeat['key'] = args.key
heartbeat['plugin'] = args.plugin
heartbeat['offline'] = args.offline
heartbeat['hidefilenames'] = args.hidefilenames
heartbeat['proxy'] = args.proxy
heartbeat['api_url'] = args.api_url
return send_heartbeat(**heartbeat)
else:
log.debug('File does not exist; ignoring this heartbeat.')
return SUCCESS
from .heartbeat import Heartbeat
from .offlinequeue import Queue
def execute(argv=None):
if argv:
sys.argv = ['wakatime'] + argv
args, configs = parseArguments()
if configs is None:
return CONFIG_FILE_PARSE_ERROR
try:
args, configs = parse_arguments()
except SystemExit as ex:
return ex.code
setup_logging(args, __version__)
if args.today:
text, retval = get_time_today(args)
if text:
print(text)
return retval
try:
heartbeats = []
hostname = args.hostname or socket.gethostname()
heartbeat = vars(args)
retval = process_heartbeat(args, configs, hostname, heartbeat)
hb = Heartbeat(vars(args), args, configs)
if hb:
heartbeats.append(hb)
elif args.entity:
log.debug(hb.skip)
if args.extra_heartbeats:
try:
for heartbeat in json.loads(sys.stdin.readline()):
retval = process_heartbeat(args, configs, hostname, heartbeat)
except json.JSONDecodeError:
retval = MALFORMED_HEARTBEAT_ERROR
for extra_data in json.loads(sys.stdin.readline()):
hb = Heartbeat(extra_data, args, configs)
if hb:
heartbeats.append(hb)
else:
log.debug(hb.skip)
except json.JSONDecodeError as ex:
log.warning(u('Malformed extra heartbeats json: {msg}').format(
msg=u(ex),
))
retval = SUCCESS
while heartbeats:
retval = send_heartbeats(heartbeats[:HEARTBEATS_PER_REQUEST], args, configs)
heartbeats = heartbeats[HEARTBEATS_PER_REQUEST:]
if retval != SUCCESS:
break
if heartbeats:
Queue(args, configs).push_many(heartbeats)
if retval == SUCCESS:
retval = sync_offline_heartbeats(args, hostname)
queue = Queue(args, configs)
for offline_heartbeats in queue.pop_many(args.sync_offline_activity):
time.sleep(1)
retval = send_heartbeats(offline_heartbeats, args, configs)
if retval != SUCCESS:
break
return retval
except:
log.traceback()
log.traceback(logging.ERROR)
print(traceback.format_exc())
return UNKNOWN_ERROR

View File

@ -12,77 +12,74 @@
import logging
import os
import traceback
from time import sleep
from .compat import json
from .constants import DEFAULT_SYNC_OFFLINE_ACTIVITY, HEARTBEATS_PER_REQUEST
from .heartbeat import Heartbeat
try:
import sqlite3
HAS_SQL = True
except ImportError: # pragma: nocover
HAS_SQL = False
from .compat import u
log = logging.getLogger('WakaTime')
class Queue(object):
db_file = os.path.join(os.path.expanduser('~'), '.wakatime.db')
table_name = 'heartbeat_1'
db_file = '.wakatime.db'
table_name = 'heartbeat_2'
def get_db_file(self):
return self.db_file
args = None
configs = None
def __init__(self, args, configs):
self.args = args
self.configs = configs
def connect(self):
conn = sqlite3.connect(self.get_db_file())
conn = sqlite3.connect(self._get_db_file(), isolation_level=None)
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS {0} (
entity text,
type text,
time real,
project text,
branch text,
is_write integer,
stats text,
misc text,
plugin text)
id text,
heartbeat text)
'''.format(self.table_name))
return (conn, c)
def push(self, data, stats, plugin, misc=None):
if not HAS_SQL: # pragma: nocover
def push(self, heartbeat):
if not HAS_SQL:
return
try:
conn, c = self.connect()
heartbeat = {
'entity': u(data.get('entity')),
'type': u(data.get('type')),
'time': data.get('time'),
'project': u(data.get('project')),
'branch': u(data.get('branch')),
'is_write': 1 if data.get('is_write') else 0,
'stats': u(stats),
'misc': u(misc),
'plugin': u(plugin),
data = {
'id': heartbeat.get_id(),
'heartbeat': heartbeat.json(),
}
c.execute('INSERT INTO {0} VALUES (:entity,:type,:time,:project,:branch,:is_write,:stats,:misc,:plugin)'.format(self.table_name), heartbeat)
c.execute('INSERT INTO {0} VALUES (:id,:heartbeat)'.format(self.table_name), data)
conn.commit()
conn.close()
except sqlite3.Error:
log.error(traceback.format_exc())
log.traceback()
try:
conn.close()
except: # pragma: nocover
pass
def pop(self):
if not HAS_SQL: # pragma: nocover
if not HAS_SQL:
return None
tries = 3
wait = 0.1
heartbeat = None
try:
conn, c = self.connect()
except sqlite3.Error:
log.debug(traceback.format_exc())
log.traceback(logging.DEBUG)
return None
heartbeat = None
loop = True
while loop and tries > -1:
try:
@ -90,40 +87,47 @@ class Queue(object):
c.execute('SELECT * FROM {0} LIMIT 1'.format(self.table_name))
row = c.fetchone()
if row is not None:
values = []
clauses = []
index = 0
for row_name in ['entity', 'type', 'time', 'project', 'branch', 'is_write']:
if row[index] is not None:
clauses.append('{0}=?'.format(row_name))
values.append(row[index])
else: # pragma: nocover
clauses.append('{0} IS NULL'.format(row_name))
index += 1
if len(values) > 0:
c.execute('DELETE FROM {0} WHERE {1}'.format(self.table_name, ' AND '.join(clauses)), values)
else: # pragma: nocover
c.execute('DELETE FROM {0} WHERE {1}'.format(self.table_name, ' AND '.join(clauses)))
id = row[0]
heartbeat = Heartbeat(json.loads(row[1]), self.args, self.configs, _clone=True)
c.execute('DELETE FROM {0} WHERE id=?'.format(self.table_name), [id])
conn.commit()
if row is not None:
heartbeat = {
'entity': row[0],
'type': row[1],
'time': row[2],
'project': row[3],
'branch': row[4],
'is_write': True if row[5] is 1 else False,
'stats': row[6],
'misc': row[7],
'plugin': row[8],
}
loop = False
except sqlite3.Error: # pragma: nocover
log.debug(traceback.format_exc())
except sqlite3.Error:
log.traceback(logging.DEBUG)
sleep(wait)
tries -= 1
try:
conn.close()
except sqlite3.Error: # pragma: nocover
log.debug(traceback.format_exc())
except sqlite3.Error:
log.traceback(logging.DEBUG)
return heartbeat
def push_many(self, heartbeats):
for heartbeat in heartbeats:
self.push(heartbeat)
def pop_many(self, limit=None):
if limit is None:
limit = DEFAULT_SYNC_OFFLINE_ACTIVITY
heartbeats = []
count = 0
while count < limit:
heartbeat = self.pop()
if not heartbeat:
break
heartbeats.append(heartbeat)
count += 1
if count % HEARTBEATS_PER_REQUEST == 0:
yield heartbeats
heartbeats = []
if heartbeats:
yield heartbeats
def _get_db_file(self):
home = '~'
if os.environ.get('WAKATIME_HOME'):
home = os.environ.get('WAKATIME_HOME')
return os.path.join(os.path.expanduser(home), '.wakatime.db')

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
# Copyright (C) AB Strakt
# See LICENSE for details.
"""
pyOpenSSL - A simple wrapper around the OpenSSL library
"""
from OpenSSL import crypto, SSL
from OpenSSL.version import (
__author__, __copyright__, __email__, __license__, __summary__, __title__,
__uri__, __version__,
)
__all__ = [
"SSL", "crypto",
"__author__", "__copyright__", "__email__", "__license__", "__summary__",
"__title__", "__uri__", "__version__",
]

View File

@ -0,0 +1,161 @@
import sys
import warnings
from six import PY2, binary_type, text_type
from cryptography.hazmat.bindings.openssl.binding import Binding
binding = Binding()
binding.init_static_locks()
ffi = binding.ffi
lib = binding.lib
# This is a special CFFI allocator that does not bother to zero its memory
# after allocation. This has vastly better performance on large allocations and
# so should be used whenever we don't need the memory zeroed out.
no_zero_allocator = ffi.new_allocator(should_clear_after_alloc=False)
def text(charp):
"""
Get a native string type representing of the given CFFI ``char*`` object.
:param charp: A C-style string represented using CFFI.
:return: :class:`str`
"""
if not charp:
return ""
return native(ffi.string(charp))
def exception_from_error_queue(exception_type):
"""
Convert an OpenSSL library failure into a Python exception.
When a call to the native OpenSSL library fails, this is usually signalled
by the return value, and an error code is stored in an error queue
associated with the current thread. The err library provides functions to
obtain these error codes and textual error messages.
"""
errors = []
while True:
error = lib.ERR_get_error()
if error == 0:
break
errors.append((
text(lib.ERR_lib_error_string(error)),
text(lib.ERR_func_error_string(error)),
text(lib.ERR_reason_error_string(error))))
raise exception_type(errors)
def make_assert(error):
"""
Create an assert function that uses :func:`exception_from_error_queue` to
raise an exception wrapped by *error*.
"""
def openssl_assert(ok):
"""
If *ok* is not True, retrieve the error from OpenSSL and raise it.
"""
if ok is not True:
exception_from_error_queue(error)
return openssl_assert
def native(s):
"""
Convert :py:class:`bytes` or :py:class:`unicode` to the native
:py:class:`str` type, using UTF-8 encoding if conversion is necessary.
:raise UnicodeError: The input string is not UTF-8 decodeable.
:raise TypeError: The input is neither :py:class:`bytes` nor
:py:class:`unicode`.
"""
if not isinstance(s, (binary_type, text_type)):
raise TypeError("%r is neither bytes nor unicode" % s)
if PY2:
if isinstance(s, text_type):
return s.encode("utf-8")
else:
if isinstance(s, binary_type):
return s.decode("utf-8")
return s
def path_string(s):
"""
Convert a Python string to a :py:class:`bytes` string identifying the same
path and which can be passed into an OpenSSL API accepting a filename.
:param s: An instance of :py:class:`bytes` or :py:class:`unicode`.
:return: An instance of :py:class:`bytes`.
"""
if isinstance(s, binary_type):
return s
elif isinstance(s, text_type):
return s.encode(sys.getfilesystemencoding())
else:
raise TypeError("Path must be represented as bytes or unicode string")
if PY2:
def byte_string(s):
return s
else:
def byte_string(s):
return s.encode("charmap")
# A marker object to observe whether some optional arguments are passed any
# value or not.
UNSPECIFIED = object()
_TEXT_WARNING = (
text_type.__name__ + " for {0} is no longer accepted, use bytes"
)
def text_to_bytes_and_warn(label, obj):
"""
If ``obj`` is text, emit a warning that it should be bytes instead and try
to convert it to bytes automatically.
:param str label: The name of the parameter from which ``obj`` was taken
(so a developer can easily find the source of the problem and correct
it).
:return: If ``obj`` is the text string type, a ``bytes`` object giving the
UTF-8 encoding of that text is returned. Otherwise, ``obj`` itself is
returned.
"""
if isinstance(obj, text_type):
warnings.warn(
_TEXT_WARNING.format(label),
category=DeprecationWarning,
stacklevel=3
)
return obj.encode('utf-8')
return obj
try:
# newer versions of cffi free the buffer deterministically
with ffi.from_buffer(b""):
pass
from_buffer = ffi.from_buffer
except AttributeError:
# cffi < 0.12 frees the buffer with refcounting gc
from contextlib import contextmanager
@contextmanager
def from_buffer(*args):
yield ffi.from_buffer(*args)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,42 @@
from __future__ import print_function
import ssl
import sys
import OpenSSL.SSL
import cffi
import cryptography
from . import version
_env_info = u"""\
pyOpenSSL: {pyopenssl}
cryptography: {cryptography}
cffi: {cffi}
cryptography's compiled against OpenSSL: {crypto_openssl_compile}
cryptography's linked OpenSSL: {crypto_openssl_link}
Pythons's OpenSSL: {python_openssl}
Python executable: {python}
Python version: {python_version}
Platform: {platform}
sys.path: {sys_path}""".format(
pyopenssl=version.__version__,
crypto_openssl_compile=OpenSSL._util.ffi.string(
OpenSSL._util.lib.OPENSSL_VERSION_TEXT,
).decode("ascii"),
crypto_openssl_link=OpenSSL.SSL.SSLeay_version(
OpenSSL.SSL.SSLEAY_VERSION
).decode("ascii"),
python_openssl=getattr(ssl, "OPENSSL_VERSION", "n/a"),
cryptography=cryptography.__version__,
cffi=cffi.__version__,
python=sys.executable,
python_version=sys.version,
platform=sys.platform,
sys_path=sys.path,
)
if __name__ == "__main__":
print(_env_info)

View File

@ -0,0 +1,40 @@
"""
PRNG management routines, thin wrappers.
"""
from OpenSSL._util import lib as _lib
def add(buffer, entropy):
"""
Mix bytes from *string* into the PRNG state.
The *entropy* argument is (the lower bound of) an estimate of how much
randomness is contained in *string*, measured in bytes.
For more information, see e.g. :rfc:`1750`.
This function is only relevant if you are forking Python processes and
need to reseed the CSPRNG after fork.
:param buffer: Buffer with random data.
:param entropy: The entropy (in bytes) measurement of the buffer.
:return: :obj:`None`
"""
if not isinstance(buffer, bytes):
raise TypeError("buffer must be a byte string")
if not isinstance(entropy, int):
raise TypeError("entropy must be an integer")
_lib.RAND_add(buffer, len(buffer), entropy)
def status():
"""
Check whether the PRNG has been seeded with enough data.
:return: 1 if the PRNG is seeded enough, 0 otherwise.
"""
return _lib.RAND_status()

View File

@ -0,0 +1,31 @@
import warnings
from threading import RLock as _RLock
from OpenSSL import SSL as _ssl
warnings.warn(
"OpenSSL.tsafe is deprecated and will be removed",
DeprecationWarning, stacklevel=3
)
class Connection:
def __init__(self, *args):
self._ssl_conn = _ssl.Connection(*args)
self._lock = _RLock()
for f in ('get_context', 'pending', 'send', 'write', 'recv', 'read',
'renegotiate', 'bind', 'listen', 'connect', 'accept',
'setblocking', 'fileno', 'shutdown', 'close', 'get_cipher_list',
'getpeername', 'getsockname', 'getsockopt', 'setsockopt',
'makefile', 'get_app_data', 'set_app_data', 'state_string',
'sock_shutdown', 'get_peer_certificate', 'get_peer_cert_chain',
'want_read', 'want_write', 'set_connect_state',
'set_accept_state', 'connect_ex', 'sendall'):
exec("""def %s(self, *args):
self._lock.acquire()
try:
return self._ssl_conn.%s(*args)
finally:
self._lock.release()\n""" % (f, f))

View File

@ -0,0 +1,22 @@
# Copyright (C) AB Strakt
# Copyright (C) Jean-Paul Calderone
# See LICENSE for details.
"""
pyOpenSSL - A simple wrapper around the OpenSSL library
"""
__all__ = [
"__author__", "__copyright__", "__email__", "__license__", "__summary__",
"__title__", "__uri__", "__version__",
]
__version__ = "19.1.0"
__title__ = "pyOpenSSL"
__uri__ = "https://pyopenssl.org/"
__summary__ = "Python wrapper module around the OpenSSL library"
__author__ = "The pyOpenSSL developers"
__email__ = "cryptography-dev@python.org"
__license__ = "Apache License, Version 2.0"
__copyright__ = "Copyright 2001-2017 {0}".format(__author__)

View File

@ -1,14 +0,0 @@
import os
import sys
from ..compat import is_py2
if is_py2:
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'py2'))
else:
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'py3'))
import tzlocal
from pygments.lexers import get_lexer_by_name, guess_lexer_for_filename
from pygments.modeline import get_filetype_from_buffer
from pygments.util import ClassNotFound

View File

@ -0,0 +1,3 @@
from .core import where
__version__ = "2019.03.09"

View File

@ -0,0 +1,2 @@
from . import where
print(where())

View File

@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
"""
certifi.py
~~~~~~~~~~
This module returns the installation location of cacert.pem.
"""
import os
def where():
f = os.path.dirname(__file__)
return os.path.join(f, 'cacert.pem')

View File

@ -15,18 +15,25 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
__version__ = "2.3.0"
from sys import version_info
from .compat import PY2, PY3
from .universaldetector import UniversalDetector
from .version import __version__, VERSION
def detect(aBuf):
if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
(version_info >= (3, 0) and not isinstance(aBuf, bytes))):
raise ValueError('Expected a bytes object, not a unicode object')
def detect(byte_str):
"""
Detect the encoding of the given byte string.
from . import universaldetector
u = universaldetector.UniversalDetector()
u.reset()
u.feed(aBuf)
u.close()
return u.result
:param byte_str: The byte sequence to examine.
:type byte_str: ``bytes`` or ``bytearray``
"""
if not isinstance(byte_str, bytearray):
if not isinstance(byte_str, bytes):
raise TypeError('Expected object of type bytes or bytearray, got: '
'{0}'.format(type(byte_str)))
else:
byte_str = bytearray(byte_str)
detector = UniversalDetector()
detector.feed(byte_str)
return detector.close()

View File

@ -0,0 +1,386 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# Big5 frequency table
# by Taiwan's Mandarin Promotion Council
# <http://www.edu.tw:81/mandr/>
#
# 128 --> 0.42261
# 256 --> 0.57851
# 512 --> 0.74851
# 1024 --> 0.89384
# 2048 --> 0.97583
#
# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98
# Random Distribution Ration = 512/(5401-512)=0.105
#
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
#Char to FreqOrder table
BIG5_TABLE_SIZE = 5376
BIG5_CHAR_TO_FREQ_ORDER = (
1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16
3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32
1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48
63,5010,5011, 317,1614, 75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, # 64
3682, 3, 10,3973,1471, 29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, # 80
4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947, 34,3556,3204, 64, 604, # 96
5015,2501,1977,1978, 155,1991, 645, 641,1606,5016,3452, 337, 72, 406,5017, 80, # 112
630, 238,3205,1509, 263, 939,1092,2654, 756,1440,1094,3453, 449, 69,2987, 591, # 128
179,2096, 471, 115,2035,1844, 60, 50,2988, 134, 806,1869, 734,2036,3454, 180, # 144
995,1607, 156, 537,2907, 688,5018, 319,1305, 779,2145, 514,2379, 298,4512, 359, # 160
2502, 90,2716,1338, 663, 11, 906,1099,2553, 20,2441, 182, 532,1716,5019, 732, # 176
1376,4204,1311,1420,3206, 25,2317,1056, 113, 399, 382,1950, 242,3455,2474, 529, # 192
3276, 475,1447,3683,5020, 117, 21, 656, 810,1297,2300,2334,3557,5021, 126,4205, # 208
706, 456, 150, 613,4513, 71,1118,2037,4206, 145,3092, 85, 835, 486,2115,1246, # 224
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,5022,2128,2359, 347,3815, 221, # 240
3558,3135,5023,1956,1153,4207, 83, 296,1199,3093, 192, 624, 93,5024, 822,1898, # 256
2823,3136, 795,2065, 991,1554,1542,1592, 27, 43,2867, 859, 139,1456, 860,4514, # 272
437, 712,3974, 164,2397,3137, 695, 211,3037,2097, 195,3975,1608,3559,3560,3684, # 288
3976, 234, 811,2989,2098,3977,2233,1441,3561,1615,2380, 668,2077,1638, 305, 228, # 304
1664,4515, 467, 415,5025, 262,2099,1593, 239, 108, 300, 200,1033, 512,1247,2078, # 320
5026,5027,2176,3207,3685,2682, 593, 845,1062,3277, 88,1723,2038,3978,1951, 212, # 336
266, 152, 149, 468,1899,4208,4516, 77, 187,5028,3038, 37, 5,2990,5029,3979, # 352
5030,5031, 39,2524,4517,2908,3208,2079, 55, 148, 74,4518, 545, 483,1474,1029, # 368
1665, 217,1870,1531,3138,1104,2655,4209, 24, 172,3562, 900,3980,3563,3564,4519, # 384
32,1408,2824,1312, 329, 487,2360,2251,2717, 784,2683, 4,3039,3351,1427,1789, # 400
188, 109, 499,5032,3686,1717,1790, 888,1217,3040,4520,5033,3565,5034,3352,1520, # 416
3687,3981, 196,1034, 775,5035,5036, 929,1816, 249, 439, 38,5037,1063,5038, 794, # 432
3982,1435,2301, 46, 178,3278,2066,5039,2381,5040, 214,1709,4521, 804, 35, 707, # 448
324,3688,1601,2554, 140, 459,4210,5041,5042,1365, 839, 272, 978,2262,2580,3456, # 464
2129,1363,3689,1423, 697, 100,3094, 48, 70,1231, 495,3139,2196,5043,1294,5044, # 480
2080, 462, 586,1042,3279, 853, 256, 988, 185,2382,3457,1698, 434,1084,5045,3458, # 496
314,2625,2788,4522,2335,2336, 569,2285, 637,1817,2525, 757,1162,1879,1616,3459, # 512
287,1577,2116, 768,4523,1671,2868,3566,2526,1321,3816, 909,2418,5046,4211, 933, # 528
3817,4212,2053,2361,1222,4524, 765,2419,1322, 786,4525,5047,1920,1462,1677,2909, # 544
1699,5048,4526,1424,2442,3140,3690,2600,3353,1775,1941,3460,3983,4213, 309,1369, # 560
1130,2825, 364,2234,1653,1299,3984,3567,3985,3986,2656, 525,1085,3041, 902,2001, # 576
1475, 964,4527, 421,1845,1415,1057,2286, 940,1364,3141, 376,4528,4529,1381, 7, # 592
2527, 983,2383, 336,1710,2684,1846, 321,3461, 559,1131,3042,2752,1809,1132,1313, # 608
265,1481,1858,5049, 352,1203,2826,3280, 167,1089, 420,2827, 776, 792,1724,3568, # 624
4214,2443,3281,5050,4215,5051, 446, 229, 333,2753, 901,3818,1200,1557,4530,2657, # 640
1921, 395,2754,2685,3819,4216,1836, 125, 916,3209,2626,4531,5052,5053,3820,5054, # 656
5055,5056,4532,3142,3691,1133,2555,1757,3462,1510,2318,1409,3569,5057,2146, 438, # 672
2601,2910,2384,3354,1068, 958,3043, 461, 311,2869,2686,4217,1916,3210,4218,1979, # 688
383, 750,2755,2627,4219, 274, 539, 385,1278,1442,5058,1154,1965, 384, 561, 210, # 704
98,1295,2556,3570,5059,1711,2420,1482,3463,3987,2911,1257, 129,5060,3821, 642, # 720
523,2789,2790,2658,5061, 141,2235,1333, 68, 176, 441, 876, 907,4220, 603,2602, # 736
710, 171,3464, 404, 549, 18,3143,2398,1410,3692,1666,5062,3571,4533,2912,4534, # 752
5063,2991, 368,5064, 146, 366, 99, 871,3693,1543, 748, 807,1586,1185, 22,2263, # 768
379,3822,3211,5065,3212, 505,1942,2628,1992,1382,2319,5066, 380,2362, 218, 702, # 784
1818,1248,3465,3044,3572,3355,3282,5067,2992,3694, 930,3283,3823,5068, 59,5069, # 800
585, 601,4221, 497,3466,1112,1314,4535,1802,5070,1223,1472,2177,5071, 749,1837, # 816
690,1900,3824,1773,3988,1476, 429,1043,1791,2236,2117, 917,4222, 447,1086,1629, # 832
5072, 556,5073,5074,2021,1654, 844,1090, 105, 550, 966,1758,2828,1008,1783, 686, # 848
1095,5075,2287, 793,1602,5076,3573,2603,4536,4223,2948,2302,4537,3825, 980,2503, # 864
544, 353, 527,4538, 908,2687,2913,5077, 381,2629,1943,1348,5078,1341,1252, 560, # 880
3095,5079,3467,2870,5080,2054, 973, 886,2081, 143,4539,5081,5082, 157,3989, 496, # 896
4224, 57, 840, 540,2039,4540,4541,3468,2118,1445, 970,2264,1748,1966,2082,4225, # 912
3144,1234,1776,3284,2829,3695, 773,1206,2130,1066,2040,1326,3990,1738,1725,4226, # 928
279,3145, 51,1544,2604, 423,1578,2131,2067, 173,4542,1880,5083,5084,1583, 264, # 944
610,3696,4543,2444, 280, 154,5085,5086,5087,1739, 338,1282,3096, 693,2871,1411, # 960
1074,3826,2445,5088,4544,5089,5090,1240, 952,2399,5091,2914,1538,2688, 685,1483, # 976
4227,2475,1436, 953,4228,2055,4545, 671,2400, 79,4229,2446,3285, 608, 567,2689, # 992
3469,4230,4231,1691, 393,1261,1792,2401,5092,4546,5093,5094,5095,5096,1383,1672, # 1008
3827,3213,1464, 522,1119, 661,1150, 216, 675,4547,3991,1432,3574, 609,4548,2690, # 1024
2402,5097,5098,5099,4232,3045, 0,5100,2476, 315, 231,2447, 301,3356,4549,2385, # 1040
5101, 233,4233,3697,1819,4550,4551,5102, 96,1777,1315,2083,5103, 257,5104,1810, # 1056
3698,2718,1139,1820,4234,2022,1124,2164,2791,1778,2659,5105,3097, 363,1655,3214, # 1072
5106,2993,5107,5108,5109,3992,1567,3993, 718, 103,3215, 849,1443, 341,3357,2949, # 1088
1484,5110,1712, 127, 67, 339,4235,2403, 679,1412, 821,5111,5112, 834, 738, 351, # 1104
2994,2147, 846, 235,1497,1881, 418,1993,3828,2719, 186,1100,2148,2756,3575,1545, # 1120
1355,2950,2872,1377, 583,3994,4236,2581,2995,5113,1298,3699,1078,2557,3700,2363, # 1136
78,3829,3830, 267,1289,2100,2002,1594,4237, 348, 369,1274,2197,2178,1838,4552, # 1152
1821,2830,3701,2757,2288,2003,4553,2951,2758, 144,3358, 882,4554,3995,2759,3470, # 1168
4555,2915,5114,4238,1726, 320,5115,3996,3046, 788,2996,5116,2831,1774,1327,2873, # 1184
3997,2832,5117,1306,4556,2004,1700,3831,3576,2364,2660, 787,2023, 506, 824,3702, # 1200
534, 323,4557,1044,3359,2024,1901, 946,3471,5118,1779,1500,1678,5119,1882,4558, # 1216
165, 243,4559,3703,2528, 123, 683,4239, 764,4560, 36,3998,1793, 589,2916, 816, # 1232
626,1667,3047,2237,1639,1555,1622,3832,3999,5120,4000,2874,1370,1228,1933, 891, # 1248
2084,2917, 304,4240,5121, 292,2997,2720,3577, 691,2101,4241,1115,4561, 118, 662, # 1264
5122, 611,1156, 854,2386,1316,2875, 2, 386, 515,2918,5123,5124,3286, 868,2238, # 1280
1486, 855,2661, 785,2216,3048,5125,1040,3216,3578,5126,3146, 448,5127,1525,5128, # 1296
2165,4562,5129,3833,5130,4242,2833,3579,3147, 503, 818,4001,3148,1568, 814, 676, # 1312
1444, 306,1749,5131,3834,1416,1030, 197,1428, 805,2834,1501,4563,5132,5133,5134, # 1328
1994,5135,4564,5136,5137,2198, 13,2792,3704,2998,3149,1229,1917,5138,3835,2132, # 1344
5139,4243,4565,2404,3580,5140,2217,1511,1727,1120,5141,5142, 646,3836,2448, 307, # 1360
5143,5144,1595,3217,5145,5146,5147,3705,1113,1356,4002,1465,2529,2530,5148, 519, # 1376
5149, 128,2133, 92,2289,1980,5150,4003,1512, 342,3150,2199,5151,2793,2218,1981, # 1392
3360,4244, 290,1656,1317, 789, 827,2365,5152,3837,4566, 562, 581,4004,5153, 401, # 1408
4567,2252, 94,4568,5154,1399,2794,5155,1463,2025,4569,3218,1944,5156, 828,1105, # 1424
4245,1262,1394,5157,4246, 605,4570,5158,1784,2876,5159,2835, 819,2102, 578,2200, # 1440
2952,5160,1502, 436,3287,4247,3288,2836,4005,2919,3472,3473,5161,2721,2320,5162, # 1456
5163,2337,2068, 23,4571, 193, 826,3838,2103, 699,1630,4248,3098, 390,1794,1064, # 1472
3581,5164,1579,3099,3100,1400,5165,4249,1839,1640,2877,5166,4572,4573, 137,4250, # 1488
598,3101,1967, 780, 104, 974,2953,5167, 278, 899, 253, 402, 572, 504, 493,1339, # 1504
5168,4006,1275,4574,2582,2558,5169,3706,3049,3102,2253, 565,1334,2722, 863, 41, # 1520
5170,5171,4575,5172,1657,2338, 19, 463,2760,4251, 606,5173,2999,3289,1087,2085, # 1536
1323,2662,3000,5174,1631,1623,1750,4252,2691,5175,2878, 791,2723,2663,2339, 232, # 1552
2421,5176,3001,1498,5177,2664,2630, 755,1366,3707,3290,3151,2026,1609, 119,1918, # 1568
3474, 862,1026,4253,5178,4007,3839,4576,4008,4577,2265,1952,2477,5179,1125, 817, # 1584
4254,4255,4009,1513,1766,2041,1487,4256,3050,3291,2837,3840,3152,5180,5181,1507, # 1600
5182,2692, 733, 40,1632,1106,2879, 345,4257, 841,2531, 230,4578,3002,1847,3292, # 1616
3475,5183,1263, 986,3476,5184, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562, # 1632
4010,4011,2954, 967,2761,2665,1349, 592,2134,1692,3361,3003,1995,4258,1679,4012, # 1648
1902,2188,5185, 739,3708,2724,1296,1290,5186,4259,2201,2202,1922,1563,2605,2559, # 1664
1871,2762,3004,5187, 435,5188, 343,1108, 596, 17,1751,4579,2239,3477,3709,5189, # 1680
4580, 294,3582,2955,1693, 477, 979, 281,2042,3583, 643,2043,3710,2631,2795,2266, # 1696
1031,2340,2135,2303,3584,4581, 367,1249,2560,5190,3585,5191,4582,1283,3362,2005, # 1712
240,1762,3363,4583,4584, 836,1069,3153, 474,5192,2149,2532, 268,3586,5193,3219, # 1728
1521,1284,5194,1658,1546,4260,5195,3587,3588,5196,4261,3364,2693,1685,4262, 961, # 1744
1673,2632, 190,2006,2203,3841,4585,4586,5197, 570,2504,3711,1490,5198,4587,2633, # 1760
3293,1957,4588, 584,1514, 396,1045,1945,5199,4589,1968,2449,5200,5201,4590,4013, # 1776
619,5202,3154,3294, 215,2007,2796,2561,3220,4591,3221,4592, 763,4263,3842,4593, # 1792
5203,5204,1958,1767,2956,3365,3712,1174, 452,1477,4594,3366,3155,5205,2838,1253, # 1808
2387,2189,1091,2290,4264, 492,5206, 638,1169,1825,2136,1752,4014, 648, 926,1021, # 1824
1324,4595, 520,4596, 997, 847,1007, 892,4597,3843,2267,1872,3713,2405,1785,4598, # 1840
1953,2957,3103,3222,1728,4265,2044,3714,4599,2008,1701,3156,1551, 30,2268,4266, # 1856
5207,2027,4600,3589,5208, 501,5209,4267, 594,3478,2166,1822,3590,3479,3591,3223, # 1872
829,2839,4268,5210,1680,3157,1225,4269,5211,3295,4601,4270,3158,2341,5212,4602, # 1888
4271,5213,4015,4016,5214,1848,2388,2606,3367,5215,4603, 374,4017, 652,4272,4273, # 1904
375,1140, 798,5216,5217,5218,2366,4604,2269, 546,1659, 138,3051,2450,4605,5219, # 1920
2254, 612,1849, 910, 796,3844,1740,1371, 825,3845,3846,5220,2920,2562,5221, 692, # 1936
444,3052,2634, 801,4606,4274,5222,1491, 244,1053,3053,4275,4276, 340,5223,4018, # 1952
1041,3005, 293,1168, 87,1357,5224,1539, 959,5225,2240, 721, 694,4277,3847, 219, # 1968
1478, 644,1417,3368,2666,1413,1401,1335,1389,4019,5226,5227,3006,2367,3159,1826, # 1984
730,1515, 184,2840, 66,4607,5228,1660,2958, 246,3369, 378,1457, 226,3480, 975, # 2000
4020,2959,1264,3592, 674, 696,5229, 163,5230,1141,2422,2167, 713,3593,3370,4608, # 2016
4021,5231,5232,1186, 15,5233,1079,1070,5234,1522,3224,3594, 276,1050,2725, 758, # 2032
1126, 653,2960,3296,5235,2342, 889,3595,4022,3104,3007, 903,1250,4609,4023,3481, # 2048
3596,1342,1681,1718, 766,3297, 286, 89,2961,3715,5236,1713,5237,2607,3371,3008, # 2064
5238,2962,2219,3225,2880,5239,4610,2505,2533, 181, 387,1075,4024, 731,2190,3372, # 2080
5240,3298, 310, 313,3482,2304, 770,4278, 54,3054, 189,4611,3105,3848,4025,5241, # 2096
1230,1617,1850, 355,3597,4279,4612,3373, 111,4280,3716,1350,3160,3483,3055,4281, # 2112
2150,3299,3598,5242,2797,4026,4027,3009, 722,2009,5243,1071, 247,1207,2343,2478, # 2128
1378,4613,2010, 864,1437,1214,4614, 373,3849,1142,2220, 667,4615, 442,2763,2563, # 2144
3850,4028,1969,4282,3300,1840, 837, 170,1107, 934,1336,1883,5244,5245,2119,4283, # 2160
2841, 743,1569,5246,4616,4284, 582,2389,1418,3484,5247,1803,5248, 357,1395,1729, # 2176
3717,3301,2423,1564,2241,5249,3106,3851,1633,4617,1114,2086,4285,1532,5250, 482, # 2192
2451,4618,5251,5252,1492, 833,1466,5253,2726,3599,1641,2842,5254,1526,1272,3718, # 2208
4286,1686,1795, 416,2564,1903,1954,1804,5255,3852,2798,3853,1159,2321,5256,2881, # 2224
4619,1610,1584,3056,2424,2764, 443,3302,1163,3161,5257,5258,4029,5259,4287,2506, # 2240
3057,4620,4030,3162,2104,1647,3600,2011,1873,4288,5260,4289, 431,3485,5261, 250, # 2256
97, 81,4290,5262,1648,1851,1558, 160, 848,5263, 866, 740,1694,5264,2204,2843, # 2272
3226,4291,4621,3719,1687, 950,2479, 426, 469,3227,3720,3721,4031,5265,5266,1188, # 2288
424,1996, 861,3601,4292,3854,2205,2694, 168,1235,3602,4293,5267,2087,1674,4622, # 2304
3374,3303, 220,2565,1009,5268,3855, 670,3010, 332,1208, 717,5269,5270,3603,2452, # 2320
4032,3375,5271, 513,5272,1209,2882,3376,3163,4623,1080,5273,5274,5275,5276,2534, # 2336
3722,3604, 815,1587,4033,4034,5277,3605,3486,3856,1254,4624,1328,3058,1390,4035, # 2352
1741,4036,3857,4037,5278, 236,3858,2453,3304,5279,5280,3723,3859,1273,3860,4625, # 2368
5281, 308,5282,4626, 245,4627,1852,2480,1307,2583, 430, 715,2137,2454,5283, 270, # 2384
199,2883,4038,5284,3606,2727,1753, 761,1754, 725,1661,1841,4628,3487,3724,5285, # 2400
5286, 587, 14,3305, 227,2608, 326, 480,2270, 943,2765,3607, 291, 650,1884,5287, # 2416
1702,1226, 102,1547, 62,3488, 904,4629,3489,1164,4294,5288,5289,1224,1548,2766, # 2432
391, 498,1493,5290,1386,1419,5291,2056,1177,4630, 813, 880,1081,2368, 566,1145, # 2448
4631,2291,1001,1035,2566,2609,2242, 394,1286,5292,5293,2069,5294, 86,1494,1730, # 2464
4039, 491,1588, 745, 897,2963, 843,3377,4040,2767,2884,3306,1768, 998,2221,2070, # 2480
397,1827,1195,1970,3725,3011,3378, 284,5295,3861,2507,2138,2120,1904,5296,4041, # 2496
2151,4042,4295,1036,3490,1905, 114,2567,4296, 209,1527,5297,5298,2964,2844,2635, # 2512
2390,2728,3164, 812,2568,5299,3307,5300,1559, 737,1885,3726,1210, 885, 28,2695, # 2528
3608,3862,5301,4297,1004,1780,4632,5302, 346,1982,2222,2696,4633,3863,1742, 797, # 2544
1642,4043,1934,1072,1384,2152, 896,4044,3308,3727,3228,2885,3609,5303,2569,1959, # 2560
4634,2455,1786,5304,5305,5306,4045,4298,1005,1308,3728,4299,2729,4635,4636,1528, # 2576
2610, 161,1178,4300,1983, 987,4637,1101,4301, 631,4046,1157,3229,2425,1343,1241, # 2592
1016,2243,2570, 372, 877,2344,2508,1160, 555,1935, 911,4047,5307, 466,1170, 169, # 2608
1051,2921,2697,3729,2481,3012,1182,2012,2571,1251,2636,5308, 992,2345,3491,1540, # 2624
2730,1201,2071,2406,1997,2482,5309,4638, 528,1923,2191,1503,1874,1570,2369,3379, # 2640
3309,5310, 557,1073,5311,1828,3492,2088,2271,3165,3059,3107, 767,3108,2799,4639, # 2656
1006,4302,4640,2346,1267,2179,3730,3230, 778,4048,3231,2731,1597,2667,5312,4641, # 2672
5313,3493,5314,5315,5316,3310,2698,1433,3311, 131, 95,1504,4049, 723,4303,3166, # 2688
1842,3610,2768,2192,4050,2028,2105,3731,5317,3013,4051,1218,5318,3380,3232,4052, # 2704
4304,2584, 248,1634,3864, 912,5319,2845,3732,3060,3865, 654, 53,5320,3014,5321, # 2720
1688,4642, 777,3494,1032,4053,1425,5322, 191, 820,2121,2846, 971,4643, 931,3233, # 2736
135, 664, 783,3866,1998, 772,2922,1936,4054,3867,4644,2923,3234, 282,2732, 640, # 2752
1372,3495,1127, 922, 325,3381,5323,5324, 711,2045,5325,5326,4055,2223,2800,1937, # 2768
4056,3382,2224,2255,3868,2305,5327,4645,3869,1258,3312,4057,3235,2139,2965,4058, # 2784
4059,5328,2225, 258,3236,4646, 101,1227,5329,3313,1755,5330,1391,3314,5331,2924, # 2800
2057, 893,5332,5333,5334,1402,4305,2347,5335,5336,3237,3611,5337,5338, 878,1325, # 2816
1781,2801,4647, 259,1385,2585, 744,1183,2272,4648,5339,4060,2509,5340, 684,1024, # 2832
4306,5341, 472,3612,3496,1165,3315,4061,4062, 322,2153, 881, 455,1695,1152,1340, # 2848
660, 554,2154,4649,1058,4650,4307, 830,1065,3383,4063,4651,1924,5342,1703,1919, # 2864
5343, 932,2273, 122,5344,4652, 947, 677,5345,3870,2637, 297,1906,1925,2274,4653, # 2880
2322,3316,5346,5347,4308,5348,4309, 84,4310, 112, 989,5349, 547,1059,4064, 701, # 2896
3613,1019,5350,4311,5351,3497, 942, 639, 457,2306,2456, 993,2966, 407, 851, 494, # 2912
4654,3384, 927,5352,1237,5353,2426,3385, 573,4312, 680, 921,2925,1279,1875, 285, # 2928
790,1448,1984, 719,2168,5354,5355,4655,4065,4066,1649,5356,1541, 563,5357,1077, # 2944
5358,3386,3061,3498, 511,3015,4067,4068,3733,4069,1268,2572,3387,3238,4656,4657, # 2960
5359, 535,1048,1276,1189,2926,2029,3167,1438,1373,2847,2967,1134,2013,5360,4313, # 2976
1238,2586,3109,1259,5361, 700,5362,2968,3168,3734,4314,5363,4315,1146,1876,1907, # 2992
4658,2611,4070, 781,2427, 132,1589, 203, 147, 273,2802,2407, 898,1787,2155,4071, # 3008
4072,5364,3871,2803,5365,5366,4659,4660,5367,3239,5368,1635,3872, 965,5369,1805, # 3024
2699,1516,3614,1121,1082,1329,3317,4073,1449,3873, 65,1128,2848,2927,2769,1590, # 3040
3874,5370,5371, 12,2668, 45, 976,2587,3169,4661, 517,2535,1013,1037,3240,5372, # 3056
3875,2849,5373,3876,5374,3499,5375,2612, 614,1999,2323,3877,3110,2733,2638,5376, # 3072
2588,4316, 599,1269,5377,1811,3735,5378,2700,3111, 759,1060, 489,1806,3388,3318, # 3088
1358,5379,5380,2391,1387,1215,2639,2256, 490,5381,5382,4317,1759,2392,2348,5383, # 3104
4662,3878,1908,4074,2640,1807,3241,4663,3500,3319,2770,2349, 874,5384,5385,3501, # 3120
3736,1859, 91,2928,3737,3062,3879,4664,5386,3170,4075,2669,5387,3502,1202,1403, # 3136
3880,2969,2536,1517,2510,4665,3503,2511,5388,4666,5389,2701,1886,1495,1731,4076, # 3152
2370,4667,5390,2030,5391,5392,4077,2702,1216, 237,2589,4318,2324,4078,3881,4668, # 3168
4669,2703,3615,3504, 445,4670,5393,5394,5395,5396,2771, 61,4079,3738,1823,4080, # 3184
5397, 687,2046, 935, 925, 405,2670, 703,1096,1860,2734,4671,4081,1877,1367,2704, # 3200
3389, 918,2106,1782,2483, 334,3320,1611,1093,4672, 564,3171,3505,3739,3390, 945, # 3216
2641,2058,4673,5398,1926, 872,4319,5399,3506,2705,3112, 349,4320,3740,4082,4674, # 3232
3882,4321,3741,2156,4083,4675,4676,4322,4677,2408,2047, 782,4084, 400, 251,4323, # 3248
1624,5400,5401, 277,3742, 299,1265, 476,1191,3883,2122,4324,4325,1109, 205,5402, # 3264
2590,1000,2157,3616,1861,5403,5404,5405,4678,5406,4679,2573, 107,2484,2158,4085, # 3280
3507,3172,5407,1533, 541,1301, 158, 753,4326,2886,3617,5408,1696, 370,1088,4327, # 3296
4680,3618, 579, 327, 440, 162,2244, 269,1938,1374,3508, 968,3063, 56,1396,3113, # 3312
2107,3321,3391,5409,1927,2159,4681,3016,5410,3619,5411,5412,3743,4682,2485,5413, # 3328
2804,5414,1650,4683,5415,2613,5416,5417,4086,2671,3392,1149,3393,4087,3884,4088, # 3344
5418,1076, 49,5419, 951,3242,3322,3323, 450,2850, 920,5420,1812,2805,2371,4328, # 3360
1909,1138,2372,3885,3509,5421,3243,4684,1910,1147,1518,2428,4685,3886,5422,4686, # 3376
2393,2614, 260,1796,3244,5423,5424,3887,3324, 708,5425,3620,1704,5426,3621,1351, # 3392
1618,3394,3017,1887, 944,4329,3395,4330,3064,3396,4331,5427,3744, 422, 413,1714, # 3408
3325, 500,2059,2350,4332,2486,5428,1344,1911, 954,5429,1668,5430,5431,4089,2409, # 3424
4333,3622,3888,4334,5432,2307,1318,2512,3114, 133,3115,2887,4687, 629, 31,2851, # 3440
2706,3889,4688, 850, 949,4689,4090,2970,1732,2089,4335,1496,1853,5433,4091, 620, # 3456
3245, 981,1242,3745,3397,1619,3746,1643,3326,2140,2457,1971,1719,3510,2169,5434, # 3472
3246,5435,5436,3398,1829,5437,1277,4690,1565,2048,5438,1636,3623,3116,5439, 869, # 3488
2852, 655,3890,3891,3117,4092,3018,3892,1310,3624,4691,5440,5441,5442,1733, 558, # 3504
4692,3747, 335,1549,3065,1756,4336,3748,1946,3511,1830,1291,1192, 470,2735,2108, # 3520
2806, 913,1054,4093,5443,1027,5444,3066,4094,4693, 982,2672,3399,3173,3512,3247, # 3536
3248,1947,2807,5445, 571,4694,5446,1831,5447,3625,2591,1523,2429,5448,2090, 984, # 3552
4695,3749,1960,5449,3750, 852, 923,2808,3513,3751, 969,1519, 999,2049,2325,1705, # 3568
5450,3118, 615,1662, 151, 597,4095,2410,2326,1049, 275,4696,3752,4337, 568,3753, # 3584
3626,2487,4338,3754,5451,2430,2275, 409,3249,5452,1566,2888,3514,1002, 769,2853, # 3600
194,2091,3174,3755,2226,3327,4339, 628,1505,5453,5454,1763,2180,3019,4096, 521, # 3616
1161,2592,1788,2206,2411,4697,4097,1625,4340,4341, 412, 42,3119, 464,5455,2642, # 3632
4698,3400,1760,1571,2889,3515,2537,1219,2207,3893,2643,2141,2373,4699,4700,3328, # 3648
1651,3401,3627,5456,5457,3628,2488,3516,5458,3756,5459,5460,2276,2092, 460,5461, # 3664
4701,5462,3020, 962, 588,3629, 289,3250,2644,1116, 52,5463,3067,1797,5464,5465, # 3680
5466,1467,5467,1598,1143,3757,4342,1985,1734,1067,4702,1280,3402, 465,4703,1572, # 3696
510,5468,1928,2245,1813,1644,3630,5469,4704,3758,5470,5471,2673,1573,1534,5472, # 3712
5473, 536,1808,1761,3517,3894,3175,2645,5474,5475,5476,4705,3518,2929,1912,2809, # 3728
5477,3329,1122, 377,3251,5478, 360,5479,5480,4343,1529, 551,5481,2060,3759,1769, # 3744
2431,5482,2930,4344,3330,3120,2327,2109,2031,4706,1404, 136,1468,1479, 672,1171, # 3760
3252,2308, 271,3176,5483,2772,5484,2050, 678,2736, 865,1948,4707,5485,2014,4098, # 3776
2971,5486,2737,2227,1397,3068,3760,4708,4709,1735,2931,3403,3631,5487,3895, 509, # 3792
2854,2458,2890,3896,5488,5489,3177,3178,4710,4345,2538,4711,2309,1166,1010, 552, # 3808
681,1888,5490,5491,2972,2973,4099,1287,1596,1862,3179, 358, 453, 736, 175, 478, # 3824
1117, 905,1167,1097,5492,1854,1530,5493,1706,5494,2181,3519,2292,3761,3520,3632, # 3840
4346,2093,4347,5495,3404,1193,2489,4348,1458,2193,2208,1863,1889,1421,3331,2932, # 3856
3069,2182,3521, 595,2123,5496,4100,5497,5498,4349,1707,2646, 223,3762,1359, 751, # 3872
3121, 183,3522,5499,2810,3021, 419,2374, 633, 704,3897,2394, 241,5500,5501,5502, # 3888
838,3022,3763,2277,2773,2459,3898,1939,2051,4101,1309,3122,2246,1181,5503,1136, # 3904
2209,3899,2375,1446,4350,2310,4712,5504,5505,4351,1055,2615, 484,3764,5506,4102, # 3920
625,4352,2278,3405,1499,4353,4103,5507,4104,4354,3253,2279,2280,3523,5508,5509, # 3936
2774, 808,2616,3765,3406,4105,4355,3123,2539, 526,3407,3900,4356, 955,5510,1620, # 3952
4357,2647,2432,5511,1429,3766,1669,1832, 994, 928,5512,3633,1260,5513,5514,5515, # 3968
1949,2293, 741,2933,1626,4358,2738,2460, 867,1184, 362,3408,1392,5516,5517,4106, # 3984
4359,1770,1736,3254,2934,4713,4714,1929,2707,1459,1158,5518,3070,3409,2891,1292, # 4000
1930,2513,2855,3767,1986,1187,2072,2015,2617,4360,5519,2574,2514,2170,3768,2490, # 4016
3332,5520,3769,4715,5521,5522, 666,1003,3023,1022,3634,4361,5523,4716,1814,2257, # 4032
574,3901,1603, 295,1535, 705,3902,4362, 283, 858, 417,5524,5525,3255,4717,4718, # 4048
3071,1220,1890,1046,2281,2461,4107,1393,1599, 689,2575, 388,4363,5526,2491, 802, # 4064
5527,2811,3903,2061,1405,2258,5528,4719,3904,2110,1052,1345,3256,1585,5529, 809, # 4080
5530,5531,5532, 575,2739,3524, 956,1552,1469,1144,2328,5533,2329,1560,2462,3635, # 4096
3257,4108, 616,2210,4364,3180,2183,2294,5534,1833,5535,3525,4720,5536,1319,3770, # 4112
3771,1211,3636,1023,3258,1293,2812,5537,5538,5539,3905, 607,2311,3906, 762,2892, # 4128
1439,4365,1360,4721,1485,3072,5540,4722,1038,4366,1450,2062,2648,4367,1379,4723, # 4144
2593,5541,5542,4368,1352,1414,2330,2935,1172,5543,5544,3907,3908,4724,1798,1451, # 4160
5545,5546,5547,5548,2936,4109,4110,2492,2351, 411,4111,4112,3637,3333,3124,4725, # 4176
1561,2674,1452,4113,1375,5549,5550, 47,2974, 316,5551,1406,1591,2937,3181,5552, # 4192
1025,2142,3125,3182, 354,2740, 884,2228,4369,2412, 508,3772, 726,3638, 996,2433, # 4208
3639, 729,5553, 392,2194,1453,4114,4726,3773,5554,5555,2463,3640,2618,1675,2813, # 4224
919,2352,2975,2353,1270,4727,4115, 73,5556,5557, 647,5558,3259,2856,2259,1550, # 4240
1346,3024,5559,1332, 883,3526,5560,5561,5562,5563,3334,2775,5564,1212, 831,1347, # 4256
4370,4728,2331,3909,1864,3073, 720,3910,4729,4730,3911,5565,4371,5566,5567,4731, # 4272
5568,5569,1799,4732,3774,2619,4733,3641,1645,2376,4734,5570,2938, 669,2211,2675, # 4288
2434,5571,2893,5572,5573,1028,3260,5574,4372,2413,5575,2260,1353,5576,5577,4735, # 4304
3183, 518,5578,4116,5579,4373,1961,5580,2143,4374,5581,5582,3025,2354,2355,3912, # 4320
516,1834,1454,4117,2708,4375,4736,2229,2620,1972,1129,3642,5583,2776,5584,2976, # 4336
1422, 577,1470,3026,1524,3410,5585,5586, 432,4376,3074,3527,5587,2594,1455,2515, # 4352
2230,1973,1175,5588,1020,2741,4118,3528,4737,5589,2742,5590,1743,1361,3075,3529, # 4368
2649,4119,4377,4738,2295, 895, 924,4378,2171, 331,2247,3076, 166,1627,3077,1098, # 4384
5591,1232,2894,2231,3411,4739, 657, 403,1196,2377, 542,3775,3412,1600,4379,3530, # 4400
5592,4740,2777,3261, 576, 530,1362,4741,4742,2540,2676,3776,4120,5593, 842,3913, # 4416
5594,2814,2032,1014,4121, 213,2709,3413, 665, 621,4380,5595,3777,2939,2435,5596, # 4432
2436,3335,3643,3414,4743,4381,2541,4382,4744,3644,1682,4383,3531,1380,5597, 724, # 4448
2282, 600,1670,5598,1337,1233,4745,3126,2248,5599,1621,4746,5600, 651,4384,5601, # 4464
1612,4385,2621,5602,2857,5603,2743,2312,3078,5604, 716,2464,3079, 174,1255,2710, # 4480
4122,3645, 548,1320,1398, 728,4123,1574,5605,1891,1197,3080,4124,5606,3081,3082, # 4496
3778,3646,3779, 747,5607, 635,4386,4747,5608,5609,5610,4387,5611,5612,4748,5613, # 4512
3415,4749,2437, 451,5614,3780,2542,2073,4388,2744,4389,4125,5615,1764,4750,5616, # 4528
4390, 350,4751,2283,2395,2493,5617,4391,4126,2249,1434,4127, 488,4752, 458,4392, # 4544
4128,3781, 771,1330,2396,3914,2576,3184,2160,2414,1553,2677,3185,4393,5618,2494, # 4560
2895,2622,1720,2711,4394,3416,4753,5619,2543,4395,5620,3262,4396,2778,5621,2016, # 4576
2745,5622,1155,1017,3782,3915,5623,3336,2313, 201,1865,4397,1430,5624,4129,5625, # 4592
5626,5627,5628,5629,4398,1604,5630, 414,1866, 371,2595,4754,4755,3532,2017,3127, # 4608
4756,1708, 960,4399, 887, 389,2172,1536,1663,1721,5631,2232,4130,2356,2940,1580, # 4624
5632,5633,1744,4757,2544,4758,4759,5634,4760,5635,2074,5636,4761,3647,3417,2896, # 4640
4400,5637,4401,2650,3418,2815, 673,2712,2465, 709,3533,4131,3648,4402,5638,1148, # 4656
502, 634,5639,5640,1204,4762,3649,1575,4763,2623,3783,5641,3784,3128, 948,3263, # 4672
121,1745,3916,1110,5642,4403,3083,2516,3027,4132,3785,1151,1771,3917,1488,4133, # 4688
1987,5643,2438,3534,5644,5645,2094,5646,4404,3918,1213,1407,2816, 531,2746,2545, # 4704
3264,1011,1537,4764,2779,4405,3129,1061,5647,3786,3787,1867,2897,5648,2018, 120, # 4720
4406,4407,2063,3650,3265,2314,3919,2678,3419,1955,4765,4134,5649,3535,1047,2713, # 4736
1266,5650,1368,4766,2858, 649,3420,3920,2546,2747,1102,2859,2679,5651,5652,2000, # 4752
5653,1111,3651,2977,5654,2495,3921,3652,2817,1855,3421,3788,5655,5656,3422,2415, # 4768
2898,3337,3266,3653,5657,2577,5658,3654,2818,4135,1460, 856,5659,3655,5660,2899, # 4784
2978,5661,2900,3922,5662,4408, 632,2517, 875,3923,1697,3924,2296,5663,5664,4767, # 4800
3028,1239, 580,4768,4409,5665, 914, 936,2075,1190,4136,1039,2124,5666,5667,5668, # 4816
5669,3423,1473,5670,1354,4410,3925,4769,2173,3084,4137, 915,3338,4411,4412,3339, # 4832
1605,1835,5671,2748, 398,3656,4413,3926,4138, 328,1913,2860,4139,3927,1331,4414, # 4848
3029, 937,4415,5672,3657,4140,4141,3424,2161,4770,3425, 524, 742, 538,3085,1012, # 4864
5673,5674,3928,2466,5675, 658,1103, 225,3929,5676,5677,4771,5678,4772,5679,3267, # 4880
1243,5680,4142, 963,2250,4773,5681,2714,3658,3186,5682,5683,2596,2332,5684,4774, # 4896
5685,5686,5687,3536, 957,3426,2547,2033,1931,2941,2467, 870,2019,3659,1746,2780, # 4912
2781,2439,2468,5688,3930,5689,3789,3130,3790,3537,3427,3791,5690,1179,3086,5691, # 4928
3187,2378,4416,3792,2548,3188,3131,2749,4143,5692,3428,1556,2549,2297, 977,2901, # 4944
2034,4144,1205,3429,5693,1765,3430,3189,2125,1271, 714,1689,4775,3538,5694,2333, # 4960
3931, 533,4417,3660,2184, 617,5695,2469,3340,3539,2315,5696,5697,3190,5698,5699, # 4976
3932,1988, 618, 427,2651,3540,3431,5700,5701,1244,1690,5702,2819,4418,4776,5703, # 4992
3541,4777,5704,2284,1576, 473,3661,4419,3432, 972,5705,3662,5706,3087,5707,5708, # 5008
4778,4779,5709,3793,4145,4146,5710, 153,4780, 356,5711,1892,2902,4420,2144, 408, # 5024
803,2357,5712,3933,5713,4421,1646,2578,2518,4781,4782,3934,5714,3935,4422,5715, # 5040
2416,3433, 752,5716,5717,1962,3341,2979,5718, 746,3030,2470,4783,4423,3794, 698, # 5056
4784,1893,4424,3663,2550,4785,3664,3936,5719,3191,3434,5720,1824,1302,4147,2715, # 5072
3937,1974,4425,5721,4426,3192, 823,1303,1288,1236,2861,3542,4148,3435, 774,3938, # 5088
5722,1581,4786,1304,2862,3939,4787,5723,2440,2162,1083,3268,4427,4149,4428, 344, # 5104
1173, 288,2316, 454,1683,5724,5725,1461,4788,4150,2597,5726,5727,4789, 985, 894, # 5120
5728,3436,3193,5729,1914,2942,3795,1989,5730,2111,1975,5731,4151,5732,2579,1194, # 5136
425,5733,4790,3194,1245,3796,4429,5734,5735,2863,5736, 636,4791,1856,3940, 760, # 5152
1800,5737,4430,2212,1508,4792,4152,1894,1684,2298,5738,5739,4793,4431,4432,2213, # 5168
479,5740,5741, 832,5742,4153,2496,5743,2980,2497,3797, 990,3132, 627,1815,2652, # 5184
4433,1582,4434,2126,2112,3543,4794,5744, 799,4435,3195,5745,4795,2113,1737,3031, # 5200
1018, 543, 754,4436,3342,1676,4796,4797,4154,4798,1489,5746,3544,5747,2624,2903, # 5216
4155,5748,5749,2981,5750,5751,5752,5753,3196,4799,4800,2185,1722,5754,3269,3270, # 5232
1843,3665,1715, 481, 365,1976,1857,5755,5756,1963,2498,4801,5757,2127,3666,3271, # 5248
433,1895,2064,2076,5758, 602,2750,5759,5760,5761,5762,5763,3032,1628,3437,5764, # 5264
3197,4802,4156,2904,4803,2519,5765,2551,2782,5766,5767,5768,3343,4804,2905,5769, # 5280
4805,5770,2864,4806,4807,1221,2982,4157,2520,5771,5772,5773,1868,1990,5774,5775, # 5296
5776,1896,5777,5778,4808,1897,4158, 318,5779,2095,4159,4437,5780,5781, 485,5782, # 5312
938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328
3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344
890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376
)

View File

@ -28,15 +28,20 @@
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import Big5DistributionAnalysis
from .mbcssm import Big5SMModel
from .mbcssm import BIG5_SM_MODEL
class Big5Prober(MultiByteCharSetProber):
def __init__(self):
MultiByteCharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(Big5SMModel)
self._mDistributionAnalyzer = Big5DistributionAnalysis()
super(Big5Prober, self).__init__()
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
self.distribution_analyzer = Big5DistributionAnalysis()
self.reset()
def get_charset_name(self):
@property
def charset_name(self):
return "Big5"
@property
def language(self):
return "Chinese"

View File

@ -25,82 +25,84 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,
from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE,
EUCTW_TYPICAL_DISTRIBUTION_RATIO)
from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,
from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE,
EUCKR_TYPICAL_DISTRIBUTION_RATIO)
from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,
from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE,
GB2312_TYPICAL_DISTRIBUTION_RATIO)
from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,
from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE,
BIG5_TYPICAL_DISTRIBUTION_RATIO)
from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,
from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE,
JIS_TYPICAL_DISTRIBUTION_RATIO)
from .compat import wrap_ord
ENOUGH_DATA_THRESHOLD = 1024
SURE_YES = 0.99
SURE_NO = 0.01
MINIMUM_DATA_THRESHOLD = 3
class CharDistributionAnalysis:
class CharDistributionAnalysis(object):
ENOUGH_DATA_THRESHOLD = 1024
SURE_YES = 0.99
SURE_NO = 0.01
MINIMUM_DATA_THRESHOLD = 3
def __init__(self):
# Mapping table to get frequency order from char order (get from
# GetOrder())
self._mCharToFreqOrder = None
self._mTableSize = None # Size of above table
self._char_to_freq_order = None
self._table_size = None # Size of above table
# This is a constant value which varies from language to language,
# used in calculating confidence. See
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
# for further detail.
self._mTypicalDistributionRatio = None
self.typical_distribution_ratio = None
self._done = None
self._total_chars = None
self._freq_chars = None
self.reset()
def reset(self):
"""reset analyser, clear any state"""
# If this flag is set to True, detection is done and conclusion has
# been made
self._mDone = False
self._mTotalChars = 0 # Total characters encountered
self._done = False
self._total_chars = 0 # Total characters encountered
# The number of characters whose frequency order is less than 512
self._mFreqChars = 0
self._freq_chars = 0
def feed(self, aBuf, aCharLen):
def feed(self, char, char_len):
"""feed a character with known length"""
if aCharLen == 2:
if char_len == 2:
# we only care about 2-bytes character in our distribution analysis
order = self.get_order(aBuf)
order = self.get_order(char)
else:
order = -1
if order >= 0:
self._mTotalChars += 1
self._total_chars += 1
# order is valid
if order < self._mTableSize:
if 512 > self._mCharToFreqOrder[order]:
self._mFreqChars += 1
if order < self._table_size:
if 512 > self._char_to_freq_order[order]:
self._freq_chars += 1
def get_confidence(self):
"""return confidence based on existing data"""
# if we didn't receive any character in our consideration range,
# return negative answer
if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD:
return SURE_NO
if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD:
return self.SURE_NO
if self._mTotalChars != self._mFreqChars:
r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)
* self._mTypicalDistributionRatio))
if r < SURE_YES:
if self._total_chars != self._freq_chars:
r = (self._freq_chars / ((self._total_chars - self._freq_chars)
* self.typical_distribution_ratio))
if r < self.SURE_YES:
return r
# normalize confidence (we don't want to be 100% sure)
return SURE_YES
return self.SURE_YES
def got_enough_data(self):
# It is not necessary to receive all data to draw conclusion.
# For charset detection, certain amount of data is enough
return self._mTotalChars > ENOUGH_DATA_THRESHOLD
return self._total_chars > self.ENOUGH_DATA_THRESHOLD
def get_order(self, aBuf):
def get_order(self, byte_str):
# We do not handle characters based on the original encoding string,
# but convert this encoding string to a number, here called order.
# This allows multiple encodings of a language to share one frequency
@ -110,55 +112,55 @@ class CharDistributionAnalysis:
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = EUCTWCharToFreqOrder
self._mTableSize = EUCTW_TABLE_SIZE
self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
super(EUCTWDistributionAnalysis, self).__init__()
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
self._table_size = EUCTW_TABLE_SIZE
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, aBuf):
def get_order(self, byte_str):
# for euc-TW encoding, we are interested
# first byte range: 0xc4 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char = wrap_ord(aBuf[0])
first_char = byte_str[0]
if first_char >= 0xC4:
return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1
return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1
else:
return -1
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = EUCKRCharToFreqOrder
self._mTableSize = EUCKR_TABLE_SIZE
self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
super(EUCKRDistributionAnalysis, self).__init__()
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
self._table_size = EUCKR_TABLE_SIZE
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, aBuf):
def get_order(self, byte_str):
# for euc-KR encoding, we are interested
# first byte range: 0xb0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char = wrap_ord(aBuf[0])
first_char = byte_str[0]
if first_char >= 0xB0:
return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1
return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1
else:
return -1
class GB2312DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = GB2312CharToFreqOrder
self._mTableSize = GB2312_TABLE_SIZE
self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
super(GB2312DistributionAnalysis, self).__init__()
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
self._table_size = GB2312_TABLE_SIZE
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, aBuf):
def get_order(self, byte_str):
# for GB2312 encoding, we are interested
# first byte range: 0xb0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
first_char, second_char = byte_str[0], byte_str[1]
if (first_char >= 0xB0) and (second_char >= 0xA1):
return 94 * (first_char - 0xB0) + second_char - 0xA1
else:
@ -167,17 +169,17 @@ class GB2312DistributionAnalysis(CharDistributionAnalysis):
class Big5DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = Big5CharToFreqOrder
self._mTableSize = BIG5_TABLE_SIZE
self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
super(Big5DistributionAnalysis, self).__init__()
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
self._table_size = BIG5_TABLE_SIZE
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, aBuf):
def get_order(self, byte_str):
# for big5 encoding, we are interested
# first byte range: 0xa4 -- 0xfe
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
first_char, second_char = byte_str[0], byte_str[1]
if first_char >= 0xA4:
if second_char >= 0xA1:
return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
@ -189,17 +191,17 @@ class Big5DistributionAnalysis(CharDistributionAnalysis):
class SJISDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = JISCharToFreqOrder
self._mTableSize = JIS_TABLE_SIZE
self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
super(SJISDistributionAnalysis, self).__init__()
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
self._table_size = JIS_TABLE_SIZE
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, aBuf):
def get_order(self, byte_str):
# for sjis encoding, we are interested
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
# no validation needed here. State machine has done that
first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
first_char, second_char = byte_str[0], byte_str[1]
if (first_char >= 0x81) and (first_char <= 0x9F):
order = 188 * (first_char - 0x81)
elif (first_char >= 0xE0) and (first_char <= 0xEF):
@ -214,18 +216,18 @@ class SJISDistributionAnalysis(CharDistributionAnalysis):
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = JISCharToFreqOrder
self._mTableSize = JIS_TABLE_SIZE
self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
super(EUCJPDistributionAnalysis, self).__init__()
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
self._table_size = JIS_TABLE_SIZE
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, aBuf):
def get_order(self, byte_str):
# for euc-JP encoding, we are interested
# first byte range: 0xa0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
char = wrap_ord(aBuf[0])
char = byte_str[0]
if char >= 0xA0:
return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1
return 94 * (char - 0xA1) + byte_str[1] - 0xa1
else:
return -1

View File

@ -0,0 +1,106 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .enums import ProbingState
from .charsetprober import CharSetProber
class CharSetGroupProber(CharSetProber):
def __init__(self, lang_filter=None):
super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
self._active_num = 0
self.probers = []
self._best_guess_prober = None
def reset(self):
super(CharSetGroupProber, self).reset()
self._active_num = 0
for prober in self.probers:
if prober:
prober.reset()
prober.active = True
self._active_num += 1
self._best_guess_prober = None
@property
def charset_name(self):
if not self._best_guess_prober:
self.get_confidence()
if not self._best_guess_prober:
return None
return self._best_guess_prober.charset_name
@property
def language(self):
if not self._best_guess_prober:
self.get_confidence()
if not self._best_guess_prober:
return None
return self._best_guess_prober.language
def feed(self, byte_str):
for prober in self.probers:
if not prober:
continue
if not prober.active:
continue
state = prober.feed(byte_str)
if not state:
continue
if state == ProbingState.FOUND_IT:
self._best_guess_prober = prober
return self.state
elif state == ProbingState.NOT_ME:
prober.active = False
self._active_num -= 1
if self._active_num <= 0:
self._state = ProbingState.NOT_ME
return self.state
return self.state
def get_confidence(self):
state = self.state
if state == ProbingState.FOUND_IT:
return 0.99
elif state == ProbingState.NOT_ME:
return 0.01
best_conf = 0.0
self._best_guess_prober = None
for prober in self.probers:
if not prober:
continue
if not prober.active:
self.logger.debug('%s not active', prober.charset_name)
continue
conf = prober.get_confidence()
self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
if best_conf < conf:
best_conf = conf
self._best_guess_prober = prober
if not self._best_guess_prober:
return 0.0
return best_conf

View File

@ -0,0 +1,145 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import logging
import re
from .enums import ProbingState
class CharSetProber(object):
SHORTCUT_THRESHOLD = 0.95
def __init__(self, lang_filter=None):
self._state = None
self.lang_filter = lang_filter
self.logger = logging.getLogger(__name__)
def reset(self):
self._state = ProbingState.DETECTING
@property
def charset_name(self):
return None
def feed(self, buf):
pass
@property
def state(self):
return self._state
def get_confidence(self):
return 0.0
@staticmethod
def filter_high_byte_only(buf):
buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
return buf
@staticmethod
def filter_international_words(buf):
"""
We define three types of bytes:
alphabet: english alphabets [a-zA-Z]
international: international characters [\x80-\xFF]
marker: everything else [^a-zA-Z\x80-\xFF]
The input buffer can be thought to contain a series of words delimited
by markers. This function works to filter all words that contain at
least one international character. All contiguous sequences of markers
are replaced by a single space ascii character.
This filter applies to all scripts which do not use English characters.
"""
filtered = bytearray()
# This regex expression filters out only words that have at-least one
# international character. The word may include one marker character at
# the end.
words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
buf)
for word in words:
filtered.extend(word[:-1])
# If the last character in the word is a marker, replace it with a
# space as markers shouldn't affect our analysis (they are used
# similarly across all languages and may thus have similar
# frequencies).
last_char = word[-1:]
if not last_char.isalpha() and last_char < b'\x80':
last_char = b' '
filtered.extend(last_char)
return filtered
@staticmethod
def filter_with_english_letters(buf):
"""
Returns a copy of ``buf`` that retains only the sequences of English
alphabet and high byte characters that are not between <> characters.
Also retains English alphabet and high byte characters immediately
before occurrences of >.
This filter can be applied to all scripts which contain both English
characters and extended ASCII characters, but is currently only used by
``Latin1Prober``.
"""
filtered = bytearray()
in_tag = False
prev = 0
for curr in range(len(buf)):
# Slice here to get bytes instead of an int with Python 3
buf_char = buf[curr:curr + 1]
# Check if we're coming out of or entering an HTML tag
if buf_char == b'>':
in_tag = False
elif buf_char == b'<':
in_tag = True
# If current character is not extended-ASCII and not alphabetic...
if buf_char < b'\x80' and not buf_char.isalpha():
# ...and we're not in a tag
if curr > prev and not in_tag:
# Keep everything after last non-extended-ASCII,
# non-alphabetic character
filtered.extend(buf[prev:curr])
# Output a space to delimit stretch we kept
filtered.extend(b' ')
prev = curr + 1
# If we're not in a tag...
if not in_tag:
# Keep everything after last non-extended-ASCII, non-alphabetic
# character
filtered.extend(buf[prev:])
return filtered

View File

@ -0,0 +1 @@

View File

@ -17,9 +17,9 @@ from __future__ import absolute_import, print_function, unicode_literals
import argparse
import sys
from io import open
from chardet import __version__
from chardet.compat import PY2
from chardet.universaldetector import UniversalDetector
@ -35,9 +35,15 @@ def description_of(lines, name='stdin'):
"""
u = UniversalDetector()
for line in lines:
line = bytearray(line)
u.feed(line)
# shortcut out of the loop to save reading further - particularly useful if we read a BOM.
if u.done:
break
u.close()
result = u.result
if PY2:
name = name.decode(sys.getfilesystemencoding(), 'ignore')
if result['encoding']:
return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
result['confidence'])
@ -46,23 +52,22 @@ def description_of(lines, name='stdin'):
def main(argv=None):
'''
"""
Handles command line arguments and gets things started.
:param argv: List of arguments, as if specified on the command-line.
If None, ``sys.argv[1:]`` is used instead.
:type argv: list of str
'''
"""
# Get command line arguments
parser = argparse.ArgumentParser(
description="Takes one or more file paths and reports their detected \
encodings",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
conflict_handler='resolve')
encodings")
parser.add_argument('input',
help='File whose encoding we would like to determine.',
help='File whose encoding we would like to determine. \
(default: stdin)',
type=argparse.FileType('rb'), nargs='*',
default=[sys.stdin])
default=[sys.stdin if PY2 else sys.stdin.buffer])
parser.add_argument('--version', action='version',
version='%(prog)s {0}'.format(__version__))
args = parser.parse_args(argv)

View File

@ -0,0 +1,88 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import logging
from .enums import MachineState
class CodingStateMachine(object):
"""
A state machine to verify a byte sequence for a particular encoding. For
each byte the detector receives, it will feed that byte to every active
state machine available, one byte at a time. The state machine changes its
state based on its previous state and the byte it receives. There are 3
states in a state machine that are of interest to an auto-detector:
START state: This is the state to start with, or a legal byte sequence
(i.e. a valid code point) for character has been identified.
ME state: This indicates that the state machine identified a byte sequence
that is specific to the charset it is designed for and that
there is no other possible encoding which can contain this byte
sequence. This will to lead to an immediate positive answer for
the detector.
ERROR state: This indicates the state machine identified an illegal byte
sequence for that encoding. This will lead to an immediate
negative answer for this encoding. Detector will exclude this
encoding from consideration from here on.
"""
def __init__(self, sm):
self._model = sm
self._curr_byte_pos = 0
self._curr_char_len = 0
self._curr_state = None
self.logger = logging.getLogger(__name__)
self.reset()
def reset(self):
self._curr_state = MachineState.START
def next_state(self, c):
# for each byte we get its class
# if it is first byte, we also get byte length
byte_class = self._model['class_table'][c]
if self._curr_state == MachineState.START:
self._curr_byte_pos = 0
self._curr_char_len = self._model['char_len_table'][byte_class]
# from byte's class and state_table, we get its next state
curr_state = (self._curr_state * self._model['class_factor']
+ byte_class)
self._curr_state = self._model['state_table'][curr_state]
self._curr_byte_pos += 1
return self._curr_state
def get_current_charlen(self):
return self._curr_char_len
def get_coding_state_machine(self):
return self._model['name']
@property
def language(self):
return self._model['language']

View File

@ -1,6 +1,7 @@
######################## BEGIN LICENSE BLOCK ########################
# Contributor(s):
# Ian Cordasco - port to Python
# Dan Blanchard
# Ian Cordasco
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@ -22,13 +23,12 @@ import sys
if sys.version_info < (3, 0):
PY2 = True
PY3 = False
base_str = (str, unicode)
text_type = unicode
else:
PY2 = False
PY3 = True
base_str = (bytes, str)
def wrap_ord(a):
if sys.version_info < (3, 0) and isinstance(a, base_str):
return ord(a)
else:
return a
text_type = str

View File

@ -25,20 +25,25 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCKRDistributionAnalysis
from .mbcssm import CP949SMModel
from .codingstatemachine import CodingStateMachine
from .mbcharsetprober import MultiByteCharSetProber
from .mbcssm import CP949_SM_MODEL
class CP949Prober(MultiByteCharSetProber):
def __init__(self):
MultiByteCharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(CP949SMModel)
super(CP949Prober, self).__init__()
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
# not different.
self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
self.distribution_analyzer = EUCKRDistributionAnalysis()
self.reset()
def get_charset_name(self):
@property
def charset_name(self):
return "CP949"
@property
def language(self):
return "Korean"

View File

@ -0,0 +1,76 @@
"""
All of the Enums that are used throughout the chardet package.
:author: Dan Blanchard (dan.blanchard@gmail.com)
"""
class InputState(object):
"""
This enum represents the different states a universal detector can be in.
"""
PURE_ASCII = 0
ESC_ASCII = 1
HIGH_BYTE = 2
class LanguageFilter(object):
"""
This enum represents the different language filters we can apply to a
``UniversalDetector``.
"""
CHINESE_SIMPLIFIED = 0x01
CHINESE_TRADITIONAL = 0x02
JAPANESE = 0x04
KOREAN = 0x08
NON_CJK = 0x10
ALL = 0x1F
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
CJK = CHINESE | JAPANESE | KOREAN
class ProbingState(object):
"""
This enum represents the different states a prober can be in.
"""
DETECTING = 0
FOUND_IT = 1
NOT_ME = 2
class MachineState(object):
"""
This enum represents the different states a state machine can be in.
"""
START = 0
ERROR = 1
ITS_ME = 2
class SequenceLikelihood(object):
"""
This enum represents the likelihood of a character following the previous one.
"""
NEGATIVE = 0
UNLIKELY = 1
LIKELY = 2
POSITIVE = 3
@classmethod
def get_num_categories(cls):
""":returns: The number of likelihood categories in the enum."""
return 4
class CharacterCategory(object):
"""
This enum represents the different categories language models for
``SingleByteCharsetProber`` put characters into.
Anything less than CONTROL is considered a letter.
"""
UNDEFINED = 255
LINE_BREAK = 254
SYMBOL = 253
DIGIT = 252
CONTROL = 251

View File

@ -0,0 +1,101 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .charsetprober import CharSetProber
from .codingstatemachine import CodingStateMachine
from .enums import LanguageFilter, ProbingState, MachineState
from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,
ISO2022KR_SM_MODEL)
class EscCharSetProber(CharSetProber):
"""
This CharSetProber uses a "code scheme" approach for detecting encodings,
whereby easily recognizable escape or shift sequences are relied on to
identify these encodings.
"""
def __init__(self, lang_filter=None):
super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
self.coding_sm = []
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
if self.lang_filter & LanguageFilter.JAPANESE:
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
if self.lang_filter & LanguageFilter.KOREAN:
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
self.active_sm_count = None
self._detected_charset = None
self._detected_language = None
self._state = None
self.reset()
def reset(self):
super(EscCharSetProber, self).reset()
for coding_sm in self.coding_sm:
if not coding_sm:
continue
coding_sm.active = True
coding_sm.reset()
self.active_sm_count = len(self.coding_sm)
self._detected_charset = None
self._detected_language = None
@property
def charset_name(self):
return self._detected_charset
@property
def language(self):
return self._detected_language
def get_confidence(self):
if self._detected_charset:
return 0.99
else:
return 0.00
def feed(self, byte_str):
for c in byte_str:
for coding_sm in self.coding_sm:
if not coding_sm or not coding_sm.active:
continue
coding_state = coding_sm.next_state(c)
if coding_state == MachineState.ERROR:
coding_sm.active = False
self.active_sm_count -= 1
if self.active_sm_count <= 0:
self._state = ProbingState.NOT_ME
return self.state
elif coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
self._detected_charset = coding_sm.get_coding_state_machine()
self._detected_language = coding_sm.language
return self.state
return self.state

View File

@ -0,0 +1,246 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .enums import MachineState
HZ_CLS = (
1,0,0,0,0,0,0,0, # 00 - 07
0,0,0,0,0,0,0,0, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,1,0,0,0,0, # 18 - 1f
0,0,0,0,0,0,0,0, # 20 - 27
0,0,0,0,0,0,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
0,0,0,0,0,0,0,0, # 40 - 47
0,0,0,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,4,0,5,2,0, # 78 - 7f
1,1,1,1,1,1,1,1, # 80 - 87
1,1,1,1,1,1,1,1, # 88 - 8f
1,1,1,1,1,1,1,1, # 90 - 97
1,1,1,1,1,1,1,1, # 98 - 9f
1,1,1,1,1,1,1,1, # a0 - a7
1,1,1,1,1,1,1,1, # a8 - af
1,1,1,1,1,1,1,1, # b0 - b7
1,1,1,1,1,1,1,1, # b8 - bf
1,1,1,1,1,1,1,1, # c0 - c7
1,1,1,1,1,1,1,1, # c8 - cf
1,1,1,1,1,1,1,1, # d0 - d7
1,1,1,1,1,1,1,1, # d8 - df
1,1,1,1,1,1,1,1, # e0 - e7
1,1,1,1,1,1,1,1, # e8 - ef
1,1,1,1,1,1,1,1, # f0 - f7
1,1,1,1,1,1,1,1, # f8 - ff
)
HZ_ST = (
MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17
5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f
4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27
4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f
)
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
HZ_SM_MODEL = {'class_table': HZ_CLS,
'class_factor': 6,
'state_table': HZ_ST,
'char_len_table': HZ_CHAR_LEN_TABLE,
'name': "HZ-GB-2312",
'language': 'Chinese'}
ISO2022CN_CLS = (
2,0,0,0,0,0,0,0, # 00 - 07
0,0,0,0,0,0,0,0, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,1,0,0,0,0, # 18 - 1f
0,0,0,0,0,0,0,0, # 20 - 27
0,3,0,0,0,0,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
0,0,0,4,0,0,0,0, # 40 - 47
0,0,0,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,0,0,0,0,0, # 78 - 7f
2,2,2,2,2,2,2,2, # 80 - 87
2,2,2,2,2,2,2,2, # 88 - 8f
2,2,2,2,2,2,2,2, # 90 - 97
2,2,2,2,2,2,2,2, # 98 - 9f
2,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,2,2,2, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,2,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
2,2,2,2,2,2,2,2, # e0 - e7
2,2,2,2,2,2,2,2, # e8 - ef
2,2,2,2,2,2,2,2, # f0 - f7
2,2,2,2,2,2,2,2, # f8 - ff
)
ISO2022CN_ST = (
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27
5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f
)
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS,
'class_factor': 9,
'state_table': ISO2022CN_ST,
'char_len_table': ISO2022CN_CHAR_LEN_TABLE,
'name': "ISO-2022-CN",
'language': 'Chinese'}
ISO2022JP_CLS = (
2,0,0,0,0,0,0,0, # 00 - 07
0,0,0,0,0,0,2,2, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,1,0,0,0,0, # 18 - 1f
0,0,0,0,7,0,0,0, # 20 - 27
3,0,0,0,0,0,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
6,0,4,0,8,0,0,0, # 40 - 47
0,9,5,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,0,0,0,0,0, # 78 - 7f
2,2,2,2,2,2,2,2, # 80 - 87
2,2,2,2,2,2,2,2, # 88 - 8f
2,2,2,2,2,2,2,2, # 90 - 97
2,2,2,2,2,2,2,2, # 98 - 9f
2,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,2,2,2, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,2,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
2,2,2,2,2,2,2,2, # e0 - e7
2,2,2,2,2,2,2,2, # e8 - ef
2,2,2,2,2,2,2,2, # f0 - f7
2,2,2,2,2,2,2,2, # f8 - ff
)
ISO2022JP_ST = (
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f
MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47
)
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS,
'class_factor': 10,
'state_table': ISO2022JP_ST,
'char_len_table': ISO2022JP_CHAR_LEN_TABLE,
'name': "ISO-2022-JP",
'language': 'Japanese'}
ISO2022KR_CLS = (
2,0,0,0,0,0,0,0, # 00 - 07
0,0,0,0,0,0,0,0, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,1,0,0,0,0, # 18 - 1f
0,0,0,0,3,0,0,0, # 20 - 27
0,4,0,0,0,0,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
0,0,0,5,0,0,0,0, # 40 - 47
0,0,0,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,0,0,0,0,0, # 78 - 7f
2,2,2,2,2,2,2,2, # 80 - 87
2,2,2,2,2,2,2,2, # 88 - 8f
2,2,2,2,2,2,2,2, # 90 - 97
2,2,2,2,2,2,2,2, # 98 - 9f
2,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,2,2,2, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,2,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
2,2,2,2,2,2,2,2, # e0 - e7
2,2,2,2,2,2,2,2, # e8 - ef
2,2,2,2,2,2,2,2, # f0 - f7
2,2,2,2,2,2,2,2, # f8 - ff
)
ISO2022KR_ST = (
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27
)
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS,
'class_factor': 6,
'state_table': ISO2022KR_ST,
'char_len_table': ISO2022KR_CHAR_LEN_TABLE,
'name': "ISO-2022-KR",
'language': 'Korean'}

View File

@ -0,0 +1,92 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .enums import ProbingState, MachineState
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCJPDistributionAnalysis
from .jpcntx import EUCJPContextAnalysis
from .mbcssm import EUCJP_SM_MODEL
class EUCJPProber(MultiByteCharSetProber):
def __init__(self):
super(EUCJPProber, self).__init__()
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
self.distribution_analyzer = EUCJPDistributionAnalysis()
self.context_analyzer = EUCJPContextAnalysis()
self.reset()
def reset(self):
super(EUCJPProber, self).reset()
self.context_analyzer.reset()
@property
def charset_name(self):
return "EUC-JP"
@property
def language(self):
return "Japanese"
def feed(self, byte_str):
for i in range(len(byte_str)):
# PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
coding_state = self.coding_sm.next_state(byte_str[i])
if coding_state == MachineState.ERROR:
self.logger.debug('%s %s prober hit error at byte %s',
self.charset_name, self.language, i)
self._state = ProbingState.NOT_ME
break
elif coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
break
elif coding_state == MachineState.START:
char_len = self.coding_sm.get_current_charlen()
if i == 0:
self._last_char[1] = byte_str[0]
self.context_analyzer.feed(self._last_char, char_len)
self.distribution_analyzer.feed(self._last_char, char_len)
else:
self.context_analyzer.feed(byte_str[i - 1:i + 1],
char_len)
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
char_len)
self._last_char[0] = byte_str[-1]
if self.state == ProbingState.DETECTING:
if (self.context_analyzer.got_enough_data() and
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
self._state = ProbingState.FOUND_IT
return self.state
def get_confidence(self):
context_conf = self.context_analyzer.get_confidence()
distrib_conf = self.distribution_analyzer.get_confidence()
return max(context_conf, distrib_conf)

View File

@ -0,0 +1,195 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# Sampling from about 20M text materials include literature and computer technology
# 128 --> 0.79
# 256 --> 0.92
# 512 --> 0.986
# 1024 --> 0.99944
# 2048 --> 0.99999
#
# Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24
# Random Distribution Ration = 512 / (2350-512) = 0.279.
#
# Typical Distribution Ratio
EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0
EUCKR_TABLE_SIZE = 2352
# Char to FreqOrder table ,
EUCKR_CHAR_TO_FREQ_ORDER = (
13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87,
1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398,
1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734,
945,1400,1735, 47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739,
116, 987, 813,1401, 683, 75,1204, 145,1740,1741,1742,1743, 16, 847, 667, 622,
708,1744,1745,1746, 966, 787, 304, 129,1747, 60, 820, 123, 676,1748,1749,1750,
1751, 617,1752, 626,1753,1754,1755,1756, 653,1757,1758,1759,1760,1761,1762, 856,
344,1763,1764,1765,1766, 89, 401, 418, 806, 905, 848,1767,1768,1769, 946,1205,
709,1770,1118,1771, 241,1772,1773,1774,1271,1775, 569,1776, 999,1777,1778,1779,
1780, 337, 751,1058, 28, 628, 254,1781, 177, 906, 270, 349, 891,1079,1782, 19,
1783, 379,1784, 315,1785, 629, 754,1402, 559,1786, 636, 203,1206,1787, 710, 567,
1788, 935, 814,1789,1790,1207, 766, 528,1791,1792,1208,1793,1794,1795,1796,1797,
1403,1798,1799, 533,1059,1404,1405,1156,1406, 936, 884,1080,1800, 351,1801,1802,
1803,1804,1805, 801,1806,1807,1808,1119,1809,1157, 714, 474,1407,1810, 298, 899,
885,1811,1120, 802,1158,1812, 892,1813,1814,1408, 659,1815,1816,1121,1817,1818,
1819,1820,1821,1822, 319,1823, 594, 545,1824, 815, 937,1209,1825,1826, 573,1409,
1022,1827,1210,1828,1829,1830,1831,1832,1833, 556, 722, 807,1122,1060,1834, 697,
1835, 900, 557, 715,1836,1410, 540,1411, 752,1159, 294, 597,1211, 976, 803, 770,
1412,1837,1838, 39, 794,1413, 358,1839, 371, 925,1840, 453, 661, 788, 531, 723,
544,1023,1081, 869, 91,1841, 392, 430, 790, 602,1414, 677,1082, 457,1415,1416,
1842,1843, 475, 327,1024,1417, 795, 121,1844, 733, 403,1418,1845,1846,1847, 300,
119, 711,1212, 627,1848,1272, 207,1849,1850, 796,1213, 382,1851, 519,1852,1083,
893,1853,1854,1855, 367, 809, 487, 671,1856, 663,1857,1858, 956, 471, 306, 857,
1859,1860,1160,1084,1861,1862,1863,1864,1865,1061,1866,1867,1868,1869,1870,1871,
282, 96, 574,1872, 502,1085,1873,1214,1874, 907,1875,1876, 827, 977,1419,1420,
1421, 268,1877,1422,1878,1879,1880, 308,1881, 2, 537,1882,1883,1215,1884,1885,
127, 791,1886,1273,1423,1887, 34, 336, 404, 643,1888, 571, 654, 894, 840,1889,
0, 886,1274, 122, 575, 260, 908, 938,1890,1275, 410, 316,1891,1892, 100,1893,
1894,1123, 48,1161,1124,1025,1895, 633, 901,1276,1896,1897, 115, 816,1898, 317,
1899, 694,1900, 909, 734,1424, 572, 866,1425, 691, 85, 524,1010, 543, 394, 841,
1901,1902,1903,1026,1904,1905,1906,1907,1908,1909, 30, 451, 651, 988, 310,1910,
1911,1426, 810,1216, 93,1912,1913,1277,1217,1914, 858, 759, 45, 58, 181, 610,
269,1915,1916, 131,1062, 551, 443,1000, 821,1427, 957, 895,1086,1917,1918, 375,
1919, 359,1920, 687,1921, 822,1922, 293,1923,1924, 40, 662, 118, 692, 29, 939,
887, 640, 482, 174,1925, 69,1162, 728,1428, 910,1926,1278,1218,1279, 386, 870,
217, 854,1163, 823,1927,1928,1929,1930, 834,1931, 78,1932, 859,1933,1063,1934,
1935,1936,1937, 438,1164, 208, 595,1938,1939,1940,1941,1219,1125,1942, 280, 888,
1429,1430,1220,1431,1943,1944,1945,1946,1947,1280, 150, 510,1432,1948,1949,1950,
1951,1952,1953,1954,1011,1087,1955,1433,1043,1956, 881,1957, 614, 958,1064,1065,
1221,1958, 638,1001, 860, 967, 896,1434, 989, 492, 553,1281,1165,1959,1282,1002,
1283,1222,1960,1961,1962,1963, 36, 383, 228, 753, 247, 454,1964, 876, 678,1965,
1966,1284, 126, 464, 490, 835, 136, 672, 529, 940,1088,1435, 473,1967,1968, 467,
50, 390, 227, 587, 279, 378, 598, 792, 968, 240, 151, 160, 849, 882,1126,1285,
639,1044, 133, 140, 288, 360, 811, 563,1027, 561, 142, 523,1969,1970,1971, 7,
103, 296, 439, 407, 506, 634, 990,1972,1973,1974,1975, 645,1976,1977,1978,1979,
1980,1981, 236,1982,1436,1983,1984,1089, 192, 828, 618, 518,1166, 333,1127,1985,
818,1223,1986,1987,1988,1989,1990,1991,1992,1993, 342,1128,1286, 746, 842,1994,
1995, 560, 223,1287, 98, 8, 189, 650, 978,1288,1996,1437,1997, 17, 345, 250,
423, 277, 234, 512, 226, 97, 289, 42, 167,1998, 201,1999,2000, 843, 836, 824,
532, 338, 783,1090, 182, 576, 436,1438,1439, 527, 500,2001, 947, 889,2002,2003,
2004,2005, 262, 600, 314, 447,2006, 547,2007, 693, 738,1129,2008, 71,1440, 745,
619, 688,2009, 829,2010,2011, 147,2012, 33, 948,2013,2014, 74, 224,2015, 61,
191, 918, 399, 637,2016,1028,1130, 257, 902,2017,2018,2019,2020,2021,2022,2023,
2024,2025,2026, 837,2027,2028,2029,2030, 179, 874, 591, 52, 724, 246,2031,2032,
2033,2034,1167, 969,2035,1289, 630, 605, 911,1091,1168,2036,2037,2038,1441, 912,
2039, 623,2040,2041, 253,1169,1290,2042,1442, 146, 620, 611, 577, 433,2043,1224,
719,1170, 959, 440, 437, 534, 84, 388, 480,1131, 159, 220, 198, 679,2044,1012,
819,1066,1443, 113,1225, 194, 318,1003,1029,2045,2046,2047,2048,1067,2049,2050,
2051,2052,2053, 59, 913, 112,2054, 632,2055, 455, 144, 739,1291,2056, 273, 681,
499,2057, 448,2058,2059, 760,2060,2061, 970, 384, 169, 245,1132,2062,2063, 414,
1444,2064,2065, 41, 235,2066, 157, 252, 877, 568, 919, 789, 580,2067, 725,2068,
2069,1292,2070,2071,1445,2072,1446,2073,2074, 55, 588, 66,1447, 271,1092,2075,
1226,2076, 960,1013, 372,2077,2078,2079,2080,2081,1293,2082,2083,2084,2085, 850,
2086,2087,2088,2089,2090, 186,2091,1068, 180,2092,2093,2094, 109,1227, 522, 606,
2095, 867,1448,1093, 991,1171, 926, 353,1133,2096, 581,2097,2098,2099,1294,1449,
1450,2100, 596,1172,1014,1228,2101,1451,1295,1173,1229,2102,2103,1296,1134,1452,
949,1135,2104,2105,1094,1453,1454,1455,2106,1095,2107,2108,2109,2110,2111,2112,
2113,2114,2115,2116,2117, 804,2118,2119,1230,1231, 805,1456, 405,1136,2120,2121,
2122,2123,2124, 720, 701,1297, 992,1457, 927,1004,2125,2126,2127,2128,2129,2130,
22, 417,2131, 303,2132, 385,2133, 971, 520, 513,2134,1174, 73,1096, 231, 274,
962,1458, 673,2135,1459,2136, 152,1137,2137,2138,2139,2140,1005,1138,1460,1139,
2141,2142,2143,2144, 11, 374, 844,2145, 154,1232, 46,1461,2146, 838, 830, 721,
1233, 106,2147, 90, 428, 462, 578, 566,1175, 352,2148,2149, 538,1234, 124,1298,
2150,1462, 761, 565,2151, 686,2152, 649,2153, 72, 173,2154, 460, 415,2155,1463,
2156,1235, 305,2157,2158,2159,2160,2161,2162, 579,2163,2164,2165,2166,2167, 747,
2168,2169,2170,2171,1464, 669,2172,2173,2174,2175,2176,1465,2177, 23, 530, 285,
2178, 335, 729,2179, 397,2180,2181,2182,1030,2183,2184, 698,2185,2186, 325,2187,
2188, 369,2189, 799,1097,1015, 348,2190,1069, 680,2191, 851,1466,2192,2193, 10,
2194, 613, 424,2195, 979, 108, 449, 589, 27, 172, 81,1031, 80, 774, 281, 350,
1032, 525, 301, 582,1176,2196, 674,1045,2197,2198,1467, 730, 762,2199,2200,2201,
2202,1468,2203, 993,2204,2205, 266,1070, 963,1140,2206,2207,2208, 664,1098, 972,
2209,2210,2211,1177,1469,1470, 871,2212,2213,2214,2215,2216,1471,2217,2218,2219,
2220,2221,2222,2223,2224,2225,2226,2227,1472,1236,2228,2229,2230,2231,2232,2233,
2234,2235,1299,2236,2237, 200,2238, 477, 373,2239,2240, 731, 825, 777,2241,2242,
2243, 521, 486, 548,2244,2245,2246,1473,1300, 53, 549, 137, 875, 76, 158,2247,
1301,1474, 469, 396,1016, 278, 712,2248, 321, 442, 503, 767, 744, 941,1237,1178,
1475,2249, 82, 178,1141,1179, 973,2250,1302,2251, 297,2252,2253, 570,2254,2255,
2256, 18, 450, 206,2257, 290, 292,1142,2258, 511, 162, 99, 346, 164, 735,2259,
1476,1477, 4, 554, 343, 798,1099,2260,1100,2261, 43, 171,1303, 139, 215,2262,
2263, 717, 775,2264,1033, 322, 216,2265, 831,2266, 149,2267,1304,2268,2269, 702,
1238, 135, 845, 347, 309,2270, 484,2271, 878, 655, 238,1006,1478,2272, 67,2273,
295,2274,2275, 461,2276, 478, 942, 412,2277,1034,2278,2279,2280, 265,2281, 541,
2282,2283,2284,2285,2286, 70, 852,1071,2287,2288,2289,2290, 21, 56, 509, 117,
432,2291,2292, 331, 980, 552,1101, 148, 284, 105, 393,1180,1239, 755,2293, 187,
2294,1046,1479,2295, 340,2296, 63,1047, 230,2297,2298,1305, 763,1306, 101, 800,
808, 494,2299,2300,2301, 903,2302, 37,1072, 14, 5,2303, 79, 675,2304, 312,
2305,2306,2307,2308,2309,1480, 6,1307,2310,2311,2312, 1, 470, 35, 24, 229,
2313, 695, 210, 86, 778, 15, 784, 592, 779, 32, 77, 855, 964,2314, 259,2315,
501, 380,2316,2317, 83, 981, 153, 689,1308,1481,1482,1483,2318,2319, 716,1484,
2320,2321,2322,2323,2324,2325,1485,2326,2327, 128, 57, 68, 261,1048, 211, 170,
1240, 31,2328, 51, 435, 742,2329,2330,2331, 635,2332, 264, 456,2333,2334,2335,
425,2336,1486, 143, 507, 263, 943,2337, 363, 920,1487, 256,1488,1102, 243, 601,
1489,2338,2339,2340,2341,2342,2343,2344, 861,2345,2346,2347,2348,2349,2350, 395,
2351,1490,1491, 62, 535, 166, 225,2352,2353, 668, 419,1241, 138, 604, 928,2354,
1181,2355,1492,1493,2356,2357,2358,1143,2359, 696,2360, 387, 307,1309, 682, 476,
2361,2362, 332, 12, 222, 156,2363, 232,2364, 641, 276, 656, 517,1494,1495,1035,
416, 736,1496,2365,1017, 586,2366,2367,2368,1497,2369, 242,2370,2371,2372,1498,
2373, 965, 713,2374,2375,2376,2377, 740, 982,1499, 944,1500,1007,2378,2379,1310,
1501,2380,2381,2382, 785, 329,2383,2384,1502,2385,2386,2387, 932,2388,1503,2389,
2390,2391,2392,1242,2393,2394,2395,2396,2397, 994, 950,2398,2399,2400,2401,1504,
1311,2402,2403,2404,2405,1049, 749,2406,2407, 853, 718,1144,1312,2408,1182,1505,
2409,2410, 255, 516, 479, 564, 550, 214,1506,1507,1313, 413, 239, 444, 339,1145,
1036,1508,1509,1314,1037,1510,1315,2411,1511,2412,2413,2414, 176, 703, 497, 624,
593, 921, 302,2415, 341, 165,1103,1512,2416,1513,2417,2418,2419, 376,2420, 700,
2421,2422,2423, 258, 768,1316,2424,1183,2425, 995, 608,2426,2427,2428,2429, 221,
2430,2431,2432,2433,2434,2435,2436,2437, 195, 323, 726, 188, 897, 983,1317, 377,
644,1050, 879,2438, 452,2439,2440,2441,2442,2443,2444, 914,2445,2446,2447,2448,
915, 489,2449,1514,1184,2450,2451, 515, 64, 427, 495,2452, 583,2453, 483, 485,
1038, 562, 213,1515, 748, 666,2454,2455,2456,2457, 334,2458, 780, 996,1008, 705,
1243,2459,2460,2461,2462,2463, 114,2464, 493,1146, 366, 163,1516, 961,1104,2465,
291,2466,1318,1105,2467,1517, 365,2468, 355, 951,1244,2469,1319,2470, 631,2471,
2472, 218,1320, 364, 320, 756,1518,1519,1321,1520,1322,2473,2474,2475,2476, 997,
2477,2478,2479,2480, 665,1185,2481, 916,1521,2482,2483,2484, 584, 684,2485,2486,
797,2487,1051,1186,2488,2489,2490,1522,2491,2492, 370,2493,1039,1187, 65,2494,
434, 205, 463,1188,2495, 125, 812, 391, 402, 826, 699, 286, 398, 155, 781, 771,
585,2496, 590, 505,1073,2497, 599, 244, 219, 917,1018, 952, 646,1523,2498,1323,
2499,2500, 49, 984, 354, 741,2501, 625,2502,1324,2503,1019, 190, 357, 757, 491,
95, 782, 868,2504,2505,2506,2507,2508,2509, 134,1524,1074, 422,1525, 898,2510,
161,2511,2512,2513,2514, 769,2515,1526,2516,2517, 411,1325,2518, 472,1527,2519,
2520,2521,2522,2523,2524, 985,2525,2526,2527,2528,2529,2530, 764,2531,1245,2532,
2533, 25, 204, 311,2534, 496,2535,1052,2536,2537,2538,2539,2540,2541,2542, 199,
704, 504, 468, 758, 657,1528, 196, 44, 839,1246, 272, 750,2543, 765, 862,2544,
2545,1326,2546, 132, 615, 933,2547, 732,2548,2549,2550,1189,1529,2551, 283,1247,
1053, 607, 929,2552,2553,2554, 930, 183, 872, 616,1040,1147,2555,1148,1020, 441,
249,1075,2556,2557,2558, 466, 743,2559,2560,2561, 92, 514, 426, 420, 526,2562,
2563,2564,2565,2566,2567,2568, 185,2569,2570,2571,2572, 776,1530, 658,2573, 362,
2574, 361, 922,1076, 793,2575,2576,2577,2578,2579,2580,1531, 251,2581,2582,2583,
2584,1532, 54, 612, 237,1327,2585,2586, 275, 408, 647, 111,2587,1533,1106, 465,
3, 458, 9, 38,2588, 107, 110, 890, 209, 26, 737, 498,2589,1534,2590, 431,
202, 88,1535, 356, 287,1107, 660,1149,2591, 381,1536, 986,1150, 445,1248,1151,
974,2592,2593, 846,2594, 446, 953, 184,1249,1250, 727,2595, 923, 193, 883,2596,
2597,2598, 102, 324, 539, 817,2599, 421,1041,2600, 832,2601, 94, 175, 197, 406,
2602, 459,2603,2604,2605,2606,2607, 330, 555,2608,2609,2610, 706,1108, 389,2611,
2612,2613,2614, 233,2615, 833, 558, 931, 954,1251,2616,2617,1537, 546,2618,2619,
1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628,
2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042,
670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256
)

View File

@ -28,15 +28,20 @@
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCKRDistributionAnalysis
from .mbcssm import EUCKRSMModel
from .mbcssm import EUCKR_SM_MODEL
class EUCKRProber(MultiByteCharSetProber):
def __init__(self):
MultiByteCharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(EUCKRSMModel)
self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
super(EUCKRProber, self).__init__()
self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
self.distribution_analyzer = EUCKRDistributionAnalysis()
self.reset()
def get_charset_name(self):
@property
def charset_name(self):
return "EUC-KR"
@property
def language(self):
return "Korean"

View File

@ -44,385 +44,344 @@
EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
# Char to FreqOrder table ,
EUCTW_TABLE_SIZE = 8102
EUCTW_TABLE_SIZE = 5376
EUCTWCharToFreqOrder = (
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790
3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806
4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822
7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838
630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854
179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870
995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886
2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902
1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918
3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934
706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966
3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982
2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998
437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014
3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030
1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046
7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062
266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078
7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094
1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110
32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126
188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142
3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158
3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174
324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190
2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206
2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222
314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238
287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254
3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270
1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286
1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302
1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318
2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334
265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350
4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366
1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382
7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398
2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414
383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430
98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446
523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462
710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478
7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494
379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510
1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526
585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542
690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558
7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574
1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590
544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606
3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622
4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638
3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654
279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670
610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686
1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702
4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718
3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734
3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750
2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766
7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782
3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798
7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814
1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830
2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846
1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862
78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878
1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894
4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910
3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926
534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942
165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958
626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974
2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990
7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006
1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022
2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038
1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054
1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070
7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086
7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102
7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118
3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134
4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150
1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166
7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182
2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198
7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214
3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230
3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246
7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262
2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278
7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294
862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310
4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326
2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342
7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358
3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374
2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390
2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406
294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422
2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438
1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454
1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470
2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486
1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502
7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518
7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534
2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550
4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566
1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582
7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598
829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614
4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630
375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646
2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662
444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678
1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694
1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710
730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726
3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742
3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758
1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774
3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790
7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806
7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822
1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838
2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854
1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870
3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886
2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902
3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918
2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934
4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950
4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966
3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982
97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998
3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014
424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030
3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046
3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062
3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078
1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094
7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110
199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126
7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142
1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158
391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174
4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190
3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206
397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222
2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238
2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254
3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270
1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286
4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302
2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318
1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334
1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350
2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366
3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382
1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398
7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414
1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430
4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446
1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462
135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478
1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494
3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510
3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526
2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542
1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558
4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574
660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590
7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606
2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622
3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638
4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654
790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670
7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686
7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702
1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718
4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734
3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750
2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766
3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782
3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798
2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814
1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830
4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846
3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862
3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878
2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894
4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910
7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926
3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942
2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958
3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974
1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990
2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006
3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022
4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038
2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054
2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070
7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086
1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102
2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118
1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134
3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150
4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166
2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182
3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198
3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214
2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230
4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246
2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262
3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278
4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294
7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310
3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326
194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342
1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358
4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374
1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390
4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406
7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422
510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438
7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454
2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470
1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486
1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502
3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518
509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534
552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550
478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566
3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582
2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598
751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614
7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630
1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646
3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662
7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678
1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694
7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710
4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726
1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742
2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758
2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774
4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790
802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806
809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822
3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838
3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854
1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870
2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886
7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902
1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918
1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934
3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950
919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966
1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982
4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998
7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014
2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030
3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046
516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062
1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078
2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094
2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110
7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126
7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142
7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158
2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174
2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190
1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206
4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222
3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238
3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254
4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270
4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286
2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302
2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318
7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334
4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350
7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366
2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382
1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398
3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414
4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430
2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446
120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462
2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478
1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494
2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510
2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526
4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542
7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558
1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574
3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590
7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606
1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622
8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638
2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654
8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670
2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686
2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702
8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718
8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734
8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750
408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766
8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782
4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798
3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814
8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830
1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846
8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862
425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878
1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894
479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910
4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926
1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942
4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958
1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974
433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990
3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006
4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022
8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038
938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054
3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102
#Everything below is of no interest for detection purpose
2515,1613,4582,8119,3312,3866,2516,8120,4058,8121,1637,4059,2466,4583,3867,8122, # 8118
2493,3016,3734,8123,8124,2192,8125,8126,2162,8127,8128,8129,8130,8131,8132,8133, # 8134
8134,8135,8136,8137,8138,8139,8140,8141,8142,8143,8144,8145,8146,8147,8148,8149, # 8150
8150,8151,8152,8153,8154,8155,8156,8157,8158,8159,8160,8161,8162,8163,8164,8165, # 8166
8166,8167,8168,8169,8170,8171,8172,8173,8174,8175,8176,8177,8178,8179,8180,8181, # 8182
8182,8183,8184,8185,8186,8187,8188,8189,8190,8191,8192,8193,8194,8195,8196,8197, # 8198
8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,8208,8209,8210,8211,8212,8213, # 8214
8214,8215,8216,8217,8218,8219,8220,8221,8222,8223,8224,8225,8226,8227,8228,8229, # 8230
8230,8231,8232,8233,8234,8235,8236,8237,8238,8239,8240,8241,8242,8243,8244,8245, # 8246
8246,8247,8248,8249,8250,8251,8252,8253,8254,8255,8256,8257,8258,8259,8260,8261, # 8262
8262,8263,8264,8265,8266,8267,8268,8269,8270,8271,8272,8273,8274,8275,8276,8277, # 8278
8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,8290,8291,8292,8293, # 8294
8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,8304,8305,8306,8307,8308,8309, # 8310
8310,8311,8312,8313,8314,8315,8316,8317,8318,8319,8320,8321,8322,8323,8324,8325, # 8326
8326,8327,8328,8329,8330,8331,8332,8333,8334,8335,8336,8337,8338,8339,8340,8341, # 8342
8342,8343,8344,8345,8346,8347,8348,8349,8350,8351,8352,8353,8354,8355,8356,8357, # 8358
8358,8359,8360,8361,8362,8363,8364,8365,8366,8367,8368,8369,8370,8371,8372,8373, # 8374
8374,8375,8376,8377,8378,8379,8380,8381,8382,8383,8384,8385,8386,8387,8388,8389, # 8390
8390,8391,8392,8393,8394,8395,8396,8397,8398,8399,8400,8401,8402,8403,8404,8405, # 8406
8406,8407,8408,8409,8410,8411,8412,8413,8414,8415,8416,8417,8418,8419,8420,8421, # 8422
8422,8423,8424,8425,8426,8427,8428,8429,8430,8431,8432,8433,8434,8435,8436,8437, # 8438
8438,8439,8440,8441,8442,8443,8444,8445,8446,8447,8448,8449,8450,8451,8452,8453, # 8454
8454,8455,8456,8457,8458,8459,8460,8461,8462,8463,8464,8465,8466,8467,8468,8469, # 8470
8470,8471,8472,8473,8474,8475,8476,8477,8478,8479,8480,8481,8482,8483,8484,8485, # 8486
8486,8487,8488,8489,8490,8491,8492,8493,8494,8495,8496,8497,8498,8499,8500,8501, # 8502
8502,8503,8504,8505,8506,8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8517, # 8518
8518,8519,8520,8521,8522,8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533, # 8534
8534,8535,8536,8537,8538,8539,8540,8541,8542,8543,8544,8545,8546,8547,8548,8549, # 8550
8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,8560,8561,8562,8563,8564,8565, # 8566
8566,8567,8568,8569,8570,8571,8572,8573,8574,8575,8576,8577,8578,8579,8580,8581, # 8582
8582,8583,8584,8585,8586,8587,8588,8589,8590,8591,8592,8593,8594,8595,8596,8597, # 8598
8598,8599,8600,8601,8602,8603,8604,8605,8606,8607,8608,8609,8610,8611,8612,8613, # 8614
8614,8615,8616,8617,8618,8619,8620,8621,8622,8623,8624,8625,8626,8627,8628,8629, # 8630
8630,8631,8632,8633,8634,8635,8636,8637,8638,8639,8640,8641,8642,8643,8644,8645, # 8646
8646,8647,8648,8649,8650,8651,8652,8653,8654,8655,8656,8657,8658,8659,8660,8661, # 8662
8662,8663,8664,8665,8666,8667,8668,8669,8670,8671,8672,8673,8674,8675,8676,8677, # 8678
8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,8688,8689,8690,8691,8692,8693, # 8694
8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, # 8710
8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, # 8726
8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741) # 8742
EUCTW_CHAR_TO_FREQ_ORDER = (
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790
3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806
4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822
7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838
630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854
179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870
995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886
2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902
1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918
3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934
706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966
3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982
2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998
437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014
3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030
1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046
7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062
266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078
7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094
1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110
32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126
188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142
3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158
3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174
324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190
2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206
2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222
314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238
287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254
3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270
1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286
1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302
1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318
2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334
265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350
4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366
1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382
7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398
2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414
383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430
98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446
523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462
710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478
7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494
379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510
1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526
585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542
690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558
7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574
1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590
544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606
3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622
4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638
3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654
279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670
610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686
1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702
4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718
3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734
3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750
2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766
7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782
3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798
7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814
1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830
2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846
1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862
78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878
1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894
4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910
3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926
534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942
165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958
626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974
2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990
7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006
1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022
2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038
1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054
1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070
7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086
7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102
7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118
3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134
4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150
1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166
7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182
2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198
7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214
3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230
3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246
7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262
2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278
7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294
862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310
4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326
2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342
7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358
3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374
2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390
2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406
294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422
2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438
1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454
1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470
2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486
1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502
7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518
7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534
2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550
4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566
1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582
7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598
829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614
4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630
375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646
2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662
444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678
1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694
1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710
730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726
3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742
3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758
1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774
3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790
7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806
7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822
1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838
2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854
1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870
3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886
2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902
3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918
2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934
4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950
4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966
3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982
97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998
3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014
424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030
3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046
3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062
3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078
1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094
7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110
199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126
7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142
1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158
391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174
4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190
3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206
397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222
2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238
2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254
3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270
1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286
4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302
2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318
1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334
1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350
2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366
3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382
1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398
7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414
1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430
4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446
1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462
135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478
1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494
3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510
3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526
2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542
1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558
4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574
660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590
7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606
2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622
3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638
4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654
790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670
7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686
7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702
1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718
4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734
3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750
2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766
3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782
3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798
2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814
1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830
4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846
3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862
3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878
2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894
4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910
7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926
3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942
2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958
3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974
1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990
2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006
3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022
4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038
2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054
2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070
7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086
1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102
2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118
1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134
3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150
4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166
2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182
3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198
3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214
2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230
4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246
2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262
3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278
4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294
7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310
3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326
194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342
1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358
4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374
1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390
4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406
7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422
510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438
7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454
2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470
1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486
1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502
3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518
509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534
552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550
478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566
3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582
2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598
751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614
7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630
1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646
3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662
7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678
1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694
7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710
4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726
1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742
2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758
2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774
4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790
802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806
809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822
3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838
3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854
1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870
2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886
7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902
1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918
1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934
3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950
919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966
1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982
4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998
7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014
2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030
3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046
516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062
1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078
2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094
2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110
7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126
7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142
7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158
2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174
2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190
1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206
4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222
3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238
3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254
4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270
4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286
2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302
2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318
7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334
4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350
7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366
2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382
1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398
3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414
4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430
2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446
120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462
2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478
1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494
2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510
2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526
4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542
7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558
1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574
3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590
7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606
1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622
8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638
2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654
8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670
2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686
2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702
8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718
8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734
8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750
408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766
8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782
4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798
3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814
8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830
1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846
8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862
425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878
1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894
479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910
4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926
1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942
4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958
1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974
433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990
3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006
4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022
8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038
938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054
3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102
)
# flake8: noqa

View File

@ -13,12 +13,12 @@
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
@ -28,14 +28,19 @@
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import EUCTWDistributionAnalysis
from .mbcssm import EUCTWSMModel
from .mbcssm import EUCTW_SM_MODEL
class EUCTWProber(MultiByteCharSetProber):
def __init__(self):
MultiByteCharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(EUCTWSMModel)
self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
super(EUCTWProber, self).__init__()
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
self.distribution_analyzer = EUCTWDistributionAnalysis()
self.reset()
def get_charset_name(self):
@property
def charset_name(self):
return "EUC-TW"
@property
def language(self):
return "Taiwan"

View File

@ -43,7 +43,7 @@ GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9
GB2312_TABLE_SIZE = 3760
GB2312CharToFreqOrder = (
GB2312_CHAR_TO_FREQ_ORDER = (
1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
@ -278,195 +278,6 @@ GB2312CharToFreqOrder = (
1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232,
1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624,
381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189,
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, # last 512
#Everything below is of no interest for detection purpose
5508,6484,3900,3414,3974,4441,4024,3537,4037,5628,5099,3633,6485,3148,6486,3636,
5509,3257,5510,5973,5445,5872,4941,4403,3174,4627,5873,6276,2286,4230,5446,5874,
5122,6102,6103,4162,5447,5123,5323,4849,6277,3980,3851,5066,4246,5774,5067,6278,
3001,2807,5695,3346,5775,5974,5158,5448,6487,5975,5976,5776,3598,6279,5696,4806,
4211,4154,6280,6488,6489,6490,6281,4212,5037,3374,4171,6491,4562,4807,4722,4827,
5977,6104,4532,4079,5159,5324,5160,4404,3858,5359,5875,3975,4288,4610,3486,4512,
5325,3893,5360,6282,6283,5560,2522,4231,5978,5186,5449,2569,3878,6284,5401,3578,
4415,6285,4656,5124,5979,2506,4247,4449,3219,3417,4334,4969,4329,6492,4576,4828,
4172,4416,4829,5402,6286,3927,3852,5361,4369,4830,4477,4867,5876,4173,6493,6105,
4657,6287,6106,5877,5450,6494,4155,4868,5451,3700,5629,4384,6288,6289,5878,3189,
4881,6107,6290,6495,4513,6496,4692,4515,4723,5100,3356,6497,6291,3810,4080,5561,
3570,4430,5980,6498,4355,5697,6499,4724,6108,6109,3764,4050,5038,5879,4093,3226,
6292,5068,5217,4693,3342,5630,3504,4831,4377,4466,4309,5698,4431,5777,6293,5778,
4272,3706,6110,5326,3752,4676,5327,4273,5403,4767,5631,6500,5699,5880,3475,5039,
6294,5562,5125,4348,4301,4482,4068,5126,4593,5700,3380,3462,5981,5563,3824,5404,
4970,5511,3825,4738,6295,6501,5452,4516,6111,5881,5564,6502,6296,5982,6503,4213,
4163,3454,6504,6112,4009,4450,6113,4658,6297,6114,3035,6505,6115,3995,4904,4739,
4563,4942,4110,5040,3661,3928,5362,3674,6506,5292,3612,4791,5565,4149,5983,5328,
5259,5021,4725,4577,4564,4517,4364,6298,5405,4578,5260,4594,4156,4157,5453,3592,
3491,6507,5127,5512,4709,4922,5984,5701,4726,4289,6508,4015,6116,5128,4628,3424,
4241,5779,6299,4905,6509,6510,5454,5702,5780,6300,4365,4923,3971,6511,5161,3270,
3158,5985,4100, 867,5129,5703,6117,5363,3695,3301,5513,4467,6118,6512,5455,4232,
4242,4629,6513,3959,4478,6514,5514,5329,5986,4850,5162,5566,3846,4694,6119,5456,
4869,5781,3779,6301,5704,5987,5515,4710,6302,5882,6120,4392,5364,5705,6515,6121,
6516,6517,3736,5988,5457,5989,4695,2457,5883,4551,5782,6303,6304,6305,5130,4971,
6122,5163,6123,4870,3263,5365,3150,4871,6518,6306,5783,5069,5706,3513,3498,4409,
5330,5632,5366,5458,5459,3991,5990,4502,3324,5991,5784,3696,4518,5633,4119,6519,
4630,5634,4417,5707,4832,5992,3418,6124,5993,5567,4768,5218,6520,4595,3458,5367,
6125,5635,6126,4202,6521,4740,4924,6307,3981,4069,4385,6308,3883,2675,4051,3834,
4302,4483,5568,5994,4972,4101,5368,6309,5164,5884,3922,6127,6522,6523,5261,5460,
5187,4164,5219,3538,5516,4111,3524,5995,6310,6311,5369,3181,3386,2484,5188,3464,
5569,3627,5708,6524,5406,5165,4677,4492,6312,4872,4851,5885,4468,5996,6313,5709,
5710,6128,2470,5886,6314,5293,4882,5785,3325,5461,5101,6129,5711,5786,6525,4906,
6526,6527,4418,5887,5712,4808,2907,3701,5713,5888,6528,3765,5636,5331,6529,6530,
3593,5889,3637,4943,3692,5714,5787,4925,6315,6130,5462,4405,6131,6132,6316,5262,
6531,6532,5715,3859,5716,5070,4696,5102,3929,5788,3987,4792,5997,6533,6534,3920,
4809,5000,5998,6535,2974,5370,6317,5189,5263,5717,3826,6536,3953,5001,4883,3190,
5463,5890,4973,5999,4741,6133,6134,3607,5570,6000,4711,3362,3630,4552,5041,6318,
6001,2950,2953,5637,4646,5371,4944,6002,2044,4120,3429,6319,6537,5103,4833,6538,
6539,4884,4647,3884,6003,6004,4758,3835,5220,5789,4565,5407,6540,6135,5294,4697,
4852,6320,6321,3206,4907,6541,6322,4945,6542,6136,6543,6323,6005,4631,3519,6544,
5891,6545,5464,3784,5221,6546,5571,4659,6547,6324,6137,5190,6548,3853,6549,4016,
4834,3954,6138,5332,3827,4017,3210,3546,4469,5408,5718,3505,4648,5790,5131,5638,
5791,5465,4727,4318,6325,6326,5792,4553,4010,4698,3439,4974,3638,4335,3085,6006,
5104,5042,5166,5892,5572,6327,4356,4519,5222,5573,5333,5793,5043,6550,5639,5071,
4503,6328,6139,6551,6140,3914,3901,5372,6007,5640,4728,4793,3976,3836,4885,6552,
4127,6553,4451,4102,5002,6554,3686,5105,6555,5191,5072,5295,4611,5794,5296,6556,
5893,5264,5894,4975,5466,5265,4699,4976,4370,4056,3492,5044,4886,6557,5795,4432,
4769,4357,5467,3940,4660,4290,6141,4484,4770,4661,3992,6329,4025,4662,5022,4632,
4835,4070,5297,4663,4596,5574,5132,5409,5895,6142,4504,5192,4664,5796,5896,3885,
5575,5797,5023,4810,5798,3732,5223,4712,5298,4084,5334,5468,6143,4052,4053,4336,
4977,4794,6558,5335,4908,5576,5224,4233,5024,4128,5469,5225,4873,6008,5045,4729,
4742,4633,3675,4597,6559,5897,5133,5577,5003,5641,5719,6330,6560,3017,2382,3854,
4406,4811,6331,4393,3964,4946,6561,2420,3722,6562,4926,4378,3247,1736,4442,6332,
5134,6333,5226,3996,2918,5470,4319,4003,4598,4743,4744,4485,3785,3902,5167,5004,
5373,4394,5898,6144,4874,1793,3997,6334,4085,4214,5106,5642,4909,5799,6009,4419,
4189,3330,5899,4165,4420,5299,5720,5227,3347,6145,4081,6335,2876,3930,6146,3293,
3786,3910,3998,5900,5300,5578,2840,6563,5901,5579,6147,3531,5374,6564,6565,5580,
4759,5375,6566,6148,3559,5643,6336,6010,5517,6337,6338,5721,5902,3873,6011,6339,
6567,5518,3868,3649,5722,6568,4771,4947,6569,6149,4812,6570,2853,5471,6340,6341,
5644,4795,6342,6012,5723,6343,5724,6013,4349,6344,3160,6150,5193,4599,4514,4493,
5168,4320,6345,4927,3666,4745,5169,5903,5005,4928,6346,5725,6014,4730,4203,5046,
4948,3395,5170,6015,4150,6016,5726,5519,6347,5047,3550,6151,6348,4197,4310,5904,
6571,5581,2965,6152,4978,3960,4291,5135,6572,5301,5727,4129,4026,5905,4853,5728,
5472,6153,6349,4533,2700,4505,5336,4678,3583,5073,2994,4486,3043,4554,5520,6350,
6017,5800,4487,6351,3931,4103,5376,6352,4011,4321,4311,4190,5136,6018,3988,3233,
4350,5906,5645,4198,6573,5107,3432,4191,3435,5582,6574,4139,5410,6353,5411,3944,
5583,5074,3198,6575,6354,4358,6576,5302,4600,5584,5194,5412,6577,6578,5585,5413,
5303,4248,5414,3879,4433,6579,4479,5025,4854,5415,6355,4760,4772,3683,2978,4700,
3797,4452,3965,3932,3721,4910,5801,6580,5195,3551,5907,3221,3471,3029,6019,3999,
5908,5909,5266,5267,3444,3023,3828,3170,4796,5646,4979,4259,6356,5647,5337,3694,
6357,5648,5338,4520,4322,5802,3031,3759,4071,6020,5586,4836,4386,5048,6581,3571,
4679,4174,4949,6154,4813,3787,3402,3822,3958,3215,3552,5268,4387,3933,4950,4359,
6021,5910,5075,3579,6358,4234,4566,5521,6359,3613,5049,6022,5911,3375,3702,3178,
4911,5339,4521,6582,6583,4395,3087,3811,5377,6023,6360,6155,4027,5171,5649,4421,
4249,2804,6584,2270,6585,4000,4235,3045,6156,5137,5729,4140,4312,3886,6361,4330,
6157,4215,6158,3500,3676,4929,4331,3713,4930,5912,4265,3776,3368,5587,4470,4855,
3038,4980,3631,6159,6160,4132,4680,6161,6362,3923,4379,5588,4255,6586,4121,6587,
6363,4649,6364,3288,4773,4774,6162,6024,6365,3543,6588,4274,3107,3737,5050,5803,
4797,4522,5589,5051,5730,3714,4887,5378,4001,4523,6163,5026,5522,4701,4175,2791,
3760,6589,5473,4224,4133,3847,4814,4815,4775,3259,5416,6590,2738,6164,6025,5304,
3733,5076,5650,4816,5590,6591,6165,6592,3934,5269,6593,3396,5340,6594,5804,3445,
3602,4042,4488,5731,5732,3525,5591,4601,5196,6166,6026,5172,3642,4612,3202,4506,
4798,6366,3818,5108,4303,5138,5139,4776,3332,4304,2915,3415,4434,5077,5109,4856,
2879,5305,4817,6595,5913,3104,3144,3903,4634,5341,3133,5110,5651,5805,6167,4057,
5592,2945,4371,5593,6596,3474,4182,6367,6597,6168,4507,4279,6598,2822,6599,4777,
4713,5594,3829,6169,3887,5417,6170,3653,5474,6368,4216,2971,5228,3790,4579,6369,
5733,6600,6601,4951,4746,4555,6602,5418,5475,6027,3400,4665,5806,6171,4799,6028,
5052,6172,3343,4800,4747,5006,6370,4556,4217,5476,4396,5229,5379,5477,3839,5914,
5652,5807,4714,3068,4635,5808,6173,5342,4192,5078,5419,5523,5734,6174,4557,6175,
4602,6371,6176,6603,5809,6372,5735,4260,3869,5111,5230,6029,5112,6177,3126,4681,
5524,5915,2706,3563,4748,3130,6178,4018,5525,6604,6605,5478,4012,4837,6606,4534,
4193,5810,4857,3615,5479,6030,4082,3697,3539,4086,5270,3662,4508,4931,5916,4912,
5811,5027,3888,6607,4397,3527,3302,3798,2775,2921,2637,3966,4122,4388,4028,4054,
1633,4858,5079,3024,5007,3982,3412,5736,6608,3426,3236,5595,3030,6179,3427,3336,
3279,3110,6373,3874,3039,5080,5917,5140,4489,3119,6374,5812,3405,4494,6031,4666,
4141,6180,4166,6032,5813,4981,6609,5081,4422,4982,4112,3915,5653,3296,3983,6375,
4266,4410,5654,6610,6181,3436,5082,6611,5380,6033,3819,5596,4535,5231,5306,5113,
6612,4952,5918,4275,3113,6613,6376,6182,6183,5814,3073,4731,4838,5008,3831,6614,
4888,3090,3848,4280,5526,5232,3014,5655,5009,5737,5420,5527,6615,5815,5343,5173,
5381,4818,6616,3151,4953,6617,5738,2796,3204,4360,2989,4281,5739,5174,5421,5197,
3132,5141,3849,5142,5528,5083,3799,3904,4839,5480,2880,4495,3448,6377,6184,5271,
5919,3771,3193,6034,6035,5920,5010,6036,5597,6037,6378,6038,3106,5422,6618,5423,
5424,4142,6619,4889,5084,4890,4313,5740,6620,3437,5175,5307,5816,4199,5198,5529,
5817,5199,5656,4913,5028,5344,3850,6185,2955,5272,5011,5818,4567,4580,5029,5921,
3616,5233,6621,6622,6186,4176,6039,6379,6380,3352,5200,5273,2908,5598,5234,3837,
5308,6623,6624,5819,4496,4323,5309,5201,6625,6626,4983,3194,3838,4167,5530,5922,
5274,6381,6382,3860,3861,5599,3333,4292,4509,6383,3553,5481,5820,5531,4778,6187,
3955,3956,4324,4389,4218,3945,4325,3397,2681,5923,4779,5085,4019,5482,4891,5382,
5383,6040,4682,3425,5275,4094,6627,5310,3015,5483,5657,4398,5924,3168,4819,6628,
5925,6629,5532,4932,4613,6041,6630,4636,6384,4780,4204,5658,4423,5821,3989,4683,
5822,6385,4954,6631,5345,6188,5425,5012,5384,3894,6386,4490,4104,6632,5741,5053,
6633,5823,5926,5659,5660,5927,6634,5235,5742,5824,4840,4933,4820,6387,4859,5928,
4955,6388,4143,3584,5825,5346,5013,6635,5661,6389,5014,5484,5743,4337,5176,5662,
6390,2836,6391,3268,6392,6636,6042,5236,6637,4158,6638,5744,5663,4471,5347,3663,
4123,5143,4293,3895,6639,6640,5311,5929,5826,3800,6189,6393,6190,5664,5348,3554,
3594,4749,4603,6641,5385,4801,6043,5827,4183,6642,5312,5426,4761,6394,5665,6191,
4715,2669,6643,6644,5533,3185,5427,5086,5930,5931,5386,6192,6044,6645,4781,4013,
5745,4282,4435,5534,4390,4267,6045,5746,4984,6046,2743,6193,3501,4087,5485,5932,
5428,4184,4095,5747,4061,5054,3058,3862,5933,5600,6646,5144,3618,6395,3131,5055,
5313,6396,4650,4956,3855,6194,3896,5202,4985,4029,4225,6195,6647,5828,5486,5829,
3589,3002,6648,6397,4782,5276,6649,6196,6650,4105,3803,4043,5237,5830,6398,4096,
3643,6399,3528,6651,4453,3315,4637,6652,3984,6197,5535,3182,3339,6653,3096,2660,
6400,6654,3449,5934,4250,4236,6047,6401,5831,6655,5487,3753,4062,5832,6198,6199,
6656,3766,6657,3403,4667,6048,6658,4338,2897,5833,3880,2797,3780,4326,6659,5748,
5015,6660,5387,4351,5601,4411,6661,3654,4424,5935,4339,4072,5277,4568,5536,6402,
6662,5238,6663,5349,5203,6200,5204,6201,5145,4536,5016,5056,4762,5834,4399,4957,
6202,6403,5666,5749,6664,4340,6665,5936,5177,5667,6666,6667,3459,4668,6404,6668,
6669,4543,6203,6670,4276,6405,4480,5537,6671,4614,5205,5668,6672,3348,2193,4763,
6406,6204,5937,5602,4177,5669,3419,6673,4020,6205,4443,4569,5388,3715,3639,6407,
6049,4058,6206,6674,5938,4544,6050,4185,4294,4841,4651,4615,5488,6207,6408,6051,
5178,3241,3509,5835,6208,4958,5836,4341,5489,5278,6209,2823,5538,5350,5206,5429,
6675,4638,4875,4073,3516,4684,4914,4860,5939,5603,5389,6052,5057,3237,5490,3791,
6676,6409,6677,4821,4915,4106,5351,5058,4243,5539,4244,5604,4842,4916,5239,3028,
3716,5837,5114,5605,5390,5940,5430,6210,4332,6678,5540,4732,3667,3840,6053,4305,
3408,5670,5541,6410,2744,5240,5750,6679,3234,5606,6680,5607,5671,3608,4283,4159,
4400,5352,4783,6681,6411,6682,4491,4802,6211,6412,5941,6413,6414,5542,5751,6683,
4669,3734,5942,6684,6415,5943,5059,3328,4670,4144,4268,6685,6686,6687,6688,4372,
3603,6689,5944,5491,4373,3440,6416,5543,4784,4822,5608,3792,4616,5838,5672,3514,
5391,6417,4892,6690,4639,6691,6054,5673,5839,6055,6692,6056,5392,6212,4038,5544,
5674,4497,6057,6693,5840,4284,5675,4021,4545,5609,6418,4454,6419,6213,4113,4472,
5314,3738,5087,5279,4074,5610,4959,4063,3179,4750,6058,6420,6214,3476,4498,4716,
5431,4960,4685,6215,5241,6694,6421,6216,6695,5841,5945,6422,3748,5946,5179,3905,
5752,5545,5947,4374,6217,4455,6423,4412,6218,4803,5353,6696,3832,5280,6219,4327,
4702,6220,6221,6059,4652,5432,6424,3749,4751,6425,5753,4986,5393,4917,5948,5030,
5754,4861,4733,6426,4703,6697,6222,4671,5949,4546,4961,5180,6223,5031,3316,5281,
6698,4862,4295,4934,5207,3644,6427,5842,5950,6428,6429,4570,5843,5282,6430,6224,
5088,3239,6060,6699,5844,5755,6061,6431,2701,5546,6432,5115,5676,4039,3993,3327,
4752,4425,5315,6433,3941,6434,5677,4617,4604,3074,4581,6225,5433,6435,6226,6062,
4823,5756,5116,6227,3717,5678,4717,5845,6436,5679,5846,6063,5847,6064,3977,3354,
6437,3863,5117,6228,5547,5394,4499,4524,6229,4605,6230,4306,4500,6700,5951,6065,
3693,5952,5089,4366,4918,6701,6231,5548,6232,6702,6438,4704,5434,6703,6704,5953,
4168,6705,5680,3420,6706,5242,4407,6066,3812,5757,5090,5954,4672,4525,3481,5681,
4618,5395,5354,5316,5955,6439,4962,6707,4526,6440,3465,4673,6067,6441,5682,6708,
5435,5492,5758,5683,4619,4571,4674,4804,4893,4686,5493,4753,6233,6068,4269,6442,
6234,5032,4705,5146,5243,5208,5848,6235,6443,4963,5033,4640,4226,6236,5849,3387,
6444,6445,4436,4437,5850,4843,5494,4785,4894,6709,4361,6710,5091,5956,3331,6237,
4987,5549,6069,6711,4342,3517,4473,5317,6070,6712,6071,4706,6446,5017,5355,6713,
6714,4988,5436,6447,4734,5759,6715,4735,4547,4456,4754,6448,5851,6449,6450,3547,
5852,5318,6451,6452,5092,4205,6716,6238,4620,4219,5611,6239,6072,4481,5760,5957,
5958,4059,6240,6453,4227,4537,6241,5761,4030,4186,5244,5209,3761,4457,4876,3337,
5495,5181,6242,5959,5319,5612,5684,5853,3493,5854,6073,4169,5613,5147,4895,6074,
5210,6717,5182,6718,3830,6243,2798,3841,6075,6244,5855,5614,3604,4606,5496,5685,
5118,5356,6719,6454,5960,5357,5961,6720,4145,3935,4621,5119,5962,4261,6721,6455,
4786,5963,4375,4582,6245,6246,6247,6076,5437,4877,5856,3376,4380,6248,4160,6722,
5148,6456,5211,6457,6723,4718,6458,6724,6249,5358,4044,3297,6459,6250,5857,5615,
5497,5245,6460,5498,6725,6251,6252,5550,3793,5499,2959,5396,6461,6462,4572,5093,
5500,5964,3806,4146,6463,4426,5762,5858,6077,6253,4755,3967,4220,5965,6254,4989,
5501,6464,4352,6726,6078,4764,2290,5246,3906,5438,5283,3767,4964,2861,5763,5094,
6255,6256,4622,5616,5859,5860,4707,6727,4285,4708,4824,5617,6257,5551,4787,5212,
4965,4935,4687,6465,6728,6466,5686,6079,3494,4413,2995,5247,5966,5618,6729,5967,
5764,5765,5687,5502,6730,6731,6080,5397,6467,4990,6258,6732,4538,5060,5619,6733,
4719,5688,5439,5018,5149,5284,5503,6734,6081,4607,6259,5120,3645,5861,4583,6260,
4584,4675,5620,4098,5440,6261,4863,2379,3306,4585,5552,5689,4586,5285,6735,4864,
6736,5286,6082,6737,4623,3010,4788,4381,4558,5621,4587,4896,3698,3161,5248,4353,
4045,6262,3754,5183,4588,6738,6263,6739,6740,5622,3936,6741,6468,6742,6264,5095,
6469,4991,5968,6743,4992,6744,6083,4897,6745,4256,5766,4307,3108,3968,4444,5287,
3889,4343,6084,4510,6085,4559,6086,4898,5969,6746,5623,5061,4919,5249,5250,5504,
5441,6265,5320,4878,3242,5862,5251,3428,6087,6747,4237,5624,5442,6266,5553,4539,
6748,2585,3533,5398,4262,6088,5150,4736,4438,6089,6267,5505,4966,6749,6268,6750,
6269,5288,5554,3650,6090,6091,4624,6092,5690,6751,5863,4270,5691,4277,5555,5864,
6752,5692,4720,4865,6470,5151,4688,4825,6753,3094,6754,6471,3235,4653,6755,5213,
5399,6756,3201,4589,5865,4967,6472,5866,6473,5019,3016,6757,5321,4756,3957,4573,
6093,4993,5767,4721,6474,6758,5625,6759,4458,6475,6270,6760,5556,4994,5214,5252,
6271,3875,5768,6094,5034,5506,4376,5769,6761,2120,6476,5253,5770,6762,5771,5970,
3990,5971,5557,5558,5772,6477,6095,2787,4641,5972,5121,6096,6097,6272,6763,3703,
5867,5507,6273,4206,6274,4789,6098,6764,3619,3646,3833,3804,2394,3788,4936,3978,
4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767)
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, #last 512
)
# flake8: noqa

View File

@ -13,12 +13,12 @@
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
@ -28,14 +28,19 @@
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import GB2312DistributionAnalysis
from .mbcssm import GB2312SMModel
from .mbcssm import GB2312_SM_MODEL
class GB2312Prober(MultiByteCharSetProber):
def __init__(self):
MultiByteCharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(GB2312SMModel)
self._mDistributionAnalyzer = GB2312DistributionAnalysis()
super(GB2312Prober, self).__init__()
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
self.distribution_analyzer = GB2312DistributionAnalysis()
self.reset()
def get_charset_name(self):
@property
def charset_name(self):
return "GB2312"
@property
def language(self):
return "Chinese"

View File

@ -26,8 +26,7 @@
######################### END LICENSE BLOCK #########################
from .charsetprober import CharSetProber
from .constants import eNotMe, eDetecting
from .compat import wrap_ord
from .enums import ProbingState
# This prober doesn't actually recognize a language or a charset.
# It is a helper prober for the use of the Hebrew model probers
@ -126,56 +125,59 @@ from .compat import wrap_ord
# model probers scores. The answer is returned in the form of the name of the
# charset identified, either "windows-1255" or "ISO-8859-8".
# windows-1255 / ISO-8859-8 code points of interest
FINAL_KAF = 0xea
NORMAL_KAF = 0xeb
FINAL_MEM = 0xed
NORMAL_MEM = 0xee
FINAL_NUN = 0xef
NORMAL_NUN = 0xf0
FINAL_PE = 0xf3
NORMAL_PE = 0xf4
FINAL_TSADI = 0xf5
NORMAL_TSADI = 0xf6
# Minimum Visual vs Logical final letter score difference.
# If the difference is below this, don't rely solely on the final letter score
# distance.
MIN_FINAL_CHAR_DISTANCE = 5
# Minimum Visual vs Logical model score difference.
# If the difference is below this, don't rely at all on the model score
# distance.
MIN_MODEL_DISTANCE = 0.01
VISUAL_HEBREW_NAME = "ISO-8859-8"
LOGICAL_HEBREW_NAME = "windows-1255"
class HebrewProber(CharSetProber):
# windows-1255 / ISO-8859-8 code points of interest
FINAL_KAF = 0xea
NORMAL_KAF = 0xeb
FINAL_MEM = 0xed
NORMAL_MEM = 0xee
FINAL_NUN = 0xef
NORMAL_NUN = 0xf0
FINAL_PE = 0xf3
NORMAL_PE = 0xf4
FINAL_TSADI = 0xf5
NORMAL_TSADI = 0xf6
# Minimum Visual vs Logical final letter score difference.
# If the difference is below this, don't rely solely on the final letter score
# distance.
MIN_FINAL_CHAR_DISTANCE = 5
# Minimum Visual vs Logical model score difference.
# If the difference is below this, don't rely at all on the model score
# distance.
MIN_MODEL_DISTANCE = 0.01
VISUAL_HEBREW_NAME = "ISO-8859-8"
LOGICAL_HEBREW_NAME = "windows-1255"
def __init__(self):
CharSetProber.__init__(self)
self._mLogicalProber = None
self._mVisualProber = None
super(HebrewProber, self).__init__()
self._final_char_logical_score = None
self._final_char_visual_score = None
self._prev = None
self._before_prev = None
self._logical_prober = None
self._visual_prober = None
self.reset()
def reset(self):
self._mFinalCharLogicalScore = 0
self._mFinalCharVisualScore = 0
self._final_char_logical_score = 0
self._final_char_visual_score = 0
# The two last characters seen in the previous buffer,
# mPrev and mBeforePrev are initialized to space in order to simulate
# a word delimiter at the beginning of the data
self._mPrev = ' '
self._mBeforePrev = ' '
self._prev = ' '
self._before_prev = ' '
# These probers are owned by the group prober.
def set_model_probers(self, logicalProber, visualProber):
self._mLogicalProber = logicalProber
self._mVisualProber = visualProber
self._logical_prober = logicalProber
self._visual_prober = visualProber
def is_final(self, c):
return wrap_ord(c) in [FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE,
FINAL_TSADI]
return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN,
self.FINAL_PE, self.FINAL_TSADI]
def is_non_final(self, c):
# The normal Tsadi is not a good Non-Final letter due to words like
@ -188,9 +190,10 @@ class HebrewProber(CharSetProber):
# for example legally end with a Non-Final Pe or Kaf. However, the
# benefit of these letters as Non-Final letters outweighs the damage
# since these words are quite rare.
return wrap_ord(c) in [NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, NORMAL_PE]
return c in [self.NORMAL_KAF, self.NORMAL_MEM,
self.NORMAL_NUN, self.NORMAL_PE]
def feed(self, aBuf):
def feed(self, byte_str):
# Final letter analysis for logical-visual decision.
# Look for evidence that the received buffer is either logical Hebrew
# or visual Hebrew.
@ -217,67 +220,73 @@ class HebrewProber(CharSetProber):
# We automatically filter out all 7-bit characters (replace them with
# spaces) so the word boundary detection works properly. [MAP]
if self.get_state() == eNotMe:
if self.state == ProbingState.NOT_ME:
# Both model probers say it's not them. No reason to continue.
return eNotMe
return ProbingState.NOT_ME
aBuf = self.filter_high_bit_only(aBuf)
byte_str = self.filter_high_byte_only(byte_str)
for cur in aBuf:
for cur in byte_str:
if cur == ' ':
# We stand on a space - a word just ended
if self._mBeforePrev != ' ':
# next-to-last char was not a space so self._mPrev is not a
if self._before_prev != ' ':
# next-to-last char was not a space so self._prev is not a
# 1 letter word
if self.is_final(self._mPrev):
if self.is_final(self._prev):
# case (1) [-2:not space][-1:final letter][cur:space]
self._mFinalCharLogicalScore += 1
elif self.is_non_final(self._mPrev):
self._final_char_logical_score += 1
elif self.is_non_final(self._prev):
# case (2) [-2:not space][-1:Non-Final letter][
# cur:space]
self._mFinalCharVisualScore += 1
self._final_char_visual_score += 1
else:
# Not standing on a space
if ((self._mBeforePrev == ' ') and
(self.is_final(self._mPrev)) and (cur != ' ')):
if ((self._before_prev == ' ') and
(self.is_final(self._prev)) and (cur != ' ')):
# case (3) [-2:space][-1:final letter][cur:not space]
self._mFinalCharVisualScore += 1
self._mBeforePrev = self._mPrev
self._mPrev = cur
self._final_char_visual_score += 1
self._before_prev = self._prev
self._prev = cur
# Forever detecting, till the end or until both model probers return
# eNotMe (handled above)
return eDetecting
# ProbingState.NOT_ME (handled above)
return ProbingState.DETECTING
def get_charset_name(self):
@property
def charset_name(self):
# Make the decision: is it Logical or Visual?
# If the final letter score distance is dominant enough, rely on it.
finalsub = self._mFinalCharLogicalScore - self._mFinalCharVisualScore
if finalsub >= MIN_FINAL_CHAR_DISTANCE:
return LOGICAL_HEBREW_NAME
if finalsub <= -MIN_FINAL_CHAR_DISTANCE:
return VISUAL_HEBREW_NAME
finalsub = self._final_char_logical_score - self._final_char_visual_score
if finalsub >= self.MIN_FINAL_CHAR_DISTANCE:
return self.LOGICAL_HEBREW_NAME
if finalsub <= -self.MIN_FINAL_CHAR_DISTANCE:
return self.VISUAL_HEBREW_NAME
# It's not dominant enough, try to rely on the model scores instead.
modelsub = (self._mLogicalProber.get_confidence()
- self._mVisualProber.get_confidence())
if modelsub > MIN_MODEL_DISTANCE:
return LOGICAL_HEBREW_NAME
if modelsub < -MIN_MODEL_DISTANCE:
return VISUAL_HEBREW_NAME
modelsub = (self._logical_prober.get_confidence()
- self._visual_prober.get_confidence())
if modelsub > self.MIN_MODEL_DISTANCE:
return self.LOGICAL_HEBREW_NAME
if modelsub < -self.MIN_MODEL_DISTANCE:
return self.VISUAL_HEBREW_NAME
# Still no good, back to final letter distance, maybe it'll save the
# day.
if finalsub < 0.0:
return VISUAL_HEBREW_NAME
return self.VISUAL_HEBREW_NAME
# (finalsub > 0 - Logical) or (don't know what to do) default to
# Logical.
return LOGICAL_HEBREW_NAME
return self.LOGICAL_HEBREW_NAME
def get_state(self):
@property
def language(self):
return 'Hebrew'
@property
def state(self):
# Remain active as long as any of the model probers are active.
if (self._mLogicalProber.get_state() == eNotMe) and \
(self._mVisualProber.get_state() == eNotMe):
return eNotMe
return eDetecting
if (self._logical_prober.state == ProbingState.NOT_ME) and \
(self._visual_prober.state == ProbingState.NOT_ME):
return ProbingState.NOT_ME
return ProbingState.DETECTING

View File

@ -46,7 +46,7 @@ JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0
# Char to FreqOrder table ,
JIS_TABLE_SIZE = 4368
JISCharToFreqOrder = (
JIS_CHAR_TO_FREQ_ORDER = (
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48
@ -320,250 +320,6 @@ JISCharToFreqOrder = (
2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512
#Everything below is of no interest for detection purpose
2138,2122,3730,2888,1995,1820,1044,6190,6191,6192,6193,6194,6195,6196,6197,6198, # 4384
6199,6200,6201,6202,6203,6204,6205,4670,6206,6207,6208,6209,6210,6211,6212,6213, # 4400
6214,6215,6216,6217,6218,6219,6220,6221,6222,6223,6224,6225,6226,6227,6228,6229, # 4416
6230,6231,6232,6233,6234,6235,6236,6237,3187,6238,6239,3969,6240,6241,6242,6243, # 4432
6244,4671,6245,6246,4672,6247,6248,4133,6249,6250,4364,6251,2923,2556,2613,4673, # 4448
4365,3970,6252,6253,6254,6255,4674,6256,6257,6258,2768,2353,4366,4675,4676,3188, # 4464
4367,3463,6259,4134,4677,4678,6260,2267,6261,3842,3332,4368,3543,6262,6263,6264, # 4480
3013,1954,1928,4135,4679,6265,6266,2478,3091,6267,4680,4369,6268,6269,1699,6270, # 4496
3544,4136,4681,6271,4137,6272,4370,2804,6273,6274,2593,3971,3972,4682,6275,2236, # 4512
4683,6276,6277,4684,6278,6279,4138,3973,4685,6280,6281,3258,6282,6283,6284,6285, # 4528
3974,4686,2841,3975,6286,6287,3545,6288,6289,4139,4687,4140,6290,4141,6291,4142, # 4544
6292,6293,3333,6294,6295,6296,4371,6297,3399,6298,6299,4372,3976,6300,6301,6302, # 4560
4373,6303,6304,3843,3731,6305,4688,4374,6306,6307,3259,2294,6308,3732,2530,4143, # 4576
6309,4689,6310,6311,6312,3048,6313,6314,4690,3733,2237,6315,6316,2282,3334,6317, # 4592
6318,3844,6319,6320,4691,6321,3400,4692,6322,4693,6323,3049,6324,4375,6325,3977, # 4608
6326,6327,6328,3546,6329,4694,3335,6330,4695,4696,6331,6332,6333,6334,4376,3978, # 4624
6335,4697,3979,4144,6336,3980,4698,6337,6338,6339,6340,6341,4699,4700,4701,6342, # 4640
6343,4702,6344,6345,4703,6346,6347,4704,6348,4705,4706,3135,6349,4707,6350,4708, # 4656
6351,4377,6352,4709,3734,4145,6353,2506,4710,3189,6354,3050,4711,3981,6355,3547, # 4672
3014,4146,4378,3735,2651,3845,3260,3136,2224,1986,6356,3401,6357,4712,2594,3627, # 4688
3137,2573,3736,3982,4713,3628,4714,4715,2682,3629,4716,6358,3630,4379,3631,6359, # 4704
6360,6361,3983,6362,6363,6364,6365,4147,3846,4717,6366,6367,3737,2842,6368,4718, # 4720
2628,6369,3261,6370,2386,6371,6372,3738,3984,4719,3464,4720,3402,6373,2924,3336, # 4736
4148,2866,6374,2805,3262,4380,2704,2069,2531,3138,2806,2984,6375,2769,6376,4721, # 4752
4722,3403,6377,6378,3548,6379,6380,2705,3092,1979,4149,2629,3337,2889,6381,3338, # 4768
4150,2557,3339,4381,6382,3190,3263,3739,6383,4151,4723,4152,2558,2574,3404,3191, # 4784
6384,6385,4153,6386,4724,4382,6387,6388,4383,6389,6390,4154,6391,4725,3985,6392, # 4800
3847,4155,6393,6394,6395,6396,6397,3465,6398,4384,6399,6400,6401,6402,6403,6404, # 4816
4156,6405,6406,6407,6408,2123,6409,6410,2326,3192,4726,6411,6412,6413,6414,4385, # 4832
4157,6415,6416,4158,6417,3093,3848,6418,3986,6419,6420,3849,6421,6422,6423,4159, # 4848
6424,6425,4160,6426,3740,6427,6428,6429,6430,3987,6431,4727,6432,2238,6433,6434, # 4864
4386,3988,6435,6436,3632,6437,6438,2843,6439,6440,6441,6442,3633,6443,2958,6444, # 4880
6445,3466,6446,2364,4387,3850,6447,4388,2959,3340,6448,3851,6449,4728,6450,6451, # 4896
3264,4729,6452,3193,6453,4389,4390,2706,3341,4730,6454,3139,6455,3194,6456,3051, # 4912
2124,3852,1602,4391,4161,3853,1158,3854,4162,3989,4392,3990,4731,4732,4393,2040, # 4928
4163,4394,3265,6457,2807,3467,3855,6458,6459,6460,3991,3468,4733,4734,6461,3140, # 4944
2960,6462,4735,6463,6464,6465,6466,4736,4737,4738,4739,6467,6468,4164,2403,3856, # 4960
6469,6470,2770,2844,6471,4740,6472,6473,6474,6475,6476,6477,6478,3195,6479,4741, # 4976
4395,6480,2867,6481,4742,2808,6482,2493,4165,6483,6484,6485,6486,2295,4743,6487, # 4992
6488,6489,3634,6490,6491,6492,6493,6494,6495,6496,2985,4744,6497,6498,4745,6499, # 5008
6500,2925,3141,4166,6501,6502,4746,6503,6504,4747,6505,6506,6507,2890,6508,6509, # 5024
6510,6511,6512,6513,6514,6515,6516,6517,6518,6519,3469,4167,6520,6521,6522,4748, # 5040
4396,3741,4397,4749,4398,3342,2125,4750,6523,4751,4752,4753,3052,6524,2961,4168, # 5056
6525,4754,6526,4755,4399,2926,4169,6527,3857,6528,4400,4170,6529,4171,6530,6531, # 5072
2595,6532,6533,6534,6535,3635,6536,6537,6538,6539,6540,6541,6542,4756,6543,6544, # 5088
6545,6546,6547,6548,4401,6549,6550,6551,6552,4402,3405,4757,4403,6553,6554,6555, # 5104
4172,3742,6556,6557,6558,3992,3636,6559,6560,3053,2726,6561,3549,4173,3054,4404, # 5120
6562,6563,3993,4405,3266,3550,2809,4406,6564,6565,6566,4758,4759,6567,3743,6568, # 5136
4760,3744,4761,3470,6569,6570,6571,4407,6572,3745,4174,6573,4175,2810,4176,3196, # 5152
4762,6574,4177,6575,6576,2494,2891,3551,6577,6578,3471,6579,4408,6580,3015,3197, # 5168
6581,3343,2532,3994,3858,6582,3094,3406,4409,6583,2892,4178,4763,4410,3016,4411, # 5184
6584,3995,3142,3017,2683,6585,4179,6586,6587,4764,4412,6588,6589,4413,6590,2986, # 5200
6591,2962,3552,6592,2963,3472,6593,6594,4180,4765,6595,6596,2225,3267,4414,6597, # 5216
3407,3637,4766,6598,6599,3198,6600,4415,6601,3859,3199,6602,3473,4767,2811,4416, # 5232
1856,3268,3200,2575,3996,3997,3201,4417,6603,3095,2927,6604,3143,6605,2268,6606, # 5248
3998,3860,3096,2771,6607,6608,3638,2495,4768,6609,3861,6610,3269,2745,4769,4181, # 5264
3553,6611,2845,3270,6612,6613,6614,3862,6615,6616,4770,4771,6617,3474,3999,4418, # 5280
4419,6618,3639,3344,6619,4772,4182,6620,2126,6621,6622,6623,4420,4773,6624,3018, # 5296
6625,4774,3554,6626,4183,2025,3746,6627,4184,2707,6628,4421,4422,3097,1775,4185, # 5312
3555,6629,6630,2868,6631,6632,4423,6633,6634,4424,2414,2533,2928,6635,4186,2387, # 5328
6636,4775,6637,4187,6638,1891,4425,3202,3203,6639,6640,4776,6641,3345,6642,6643, # 5344
3640,6644,3475,3346,3641,4000,6645,3144,6646,3098,2812,4188,3642,3204,6647,3863, # 5360
3476,6648,3864,6649,4426,4001,6650,6651,6652,2576,6653,4189,4777,6654,6655,6656, # 5376
2846,6657,3477,3205,4002,6658,4003,6659,3347,2252,6660,6661,6662,4778,6663,6664, # 5392
6665,6666,6667,6668,6669,4779,4780,2048,6670,3478,3099,6671,3556,3747,4004,6672, # 5408
6673,6674,3145,4005,3748,6675,6676,6677,6678,6679,3408,6680,6681,6682,6683,3206, # 5424
3207,6684,6685,4781,4427,6686,4782,4783,4784,6687,6688,6689,4190,6690,6691,3479, # 5440
6692,2746,6693,4428,6694,6695,6696,6697,6698,6699,4785,6700,6701,3208,2727,6702, # 5456
3146,6703,6704,3409,2196,6705,4429,6706,6707,6708,2534,1996,6709,6710,6711,2747, # 5472
6712,6713,6714,4786,3643,6715,4430,4431,6716,3557,6717,4432,4433,6718,6719,6720, # 5488
6721,3749,6722,4006,4787,6723,6724,3644,4788,4434,6725,6726,4789,2772,6727,6728, # 5504
6729,6730,6731,2708,3865,2813,4435,6732,6733,4790,4791,3480,6734,6735,6736,6737, # 5520
4436,3348,6738,3410,4007,6739,6740,4008,6741,6742,4792,3411,4191,6743,6744,6745, # 5536
6746,6747,3866,6748,3750,6749,6750,6751,6752,6753,6754,6755,3867,6756,4009,6757, # 5552
4793,4794,6758,2814,2987,6759,6760,6761,4437,6762,6763,6764,6765,3645,6766,6767, # 5568
3481,4192,6768,3751,6769,6770,2174,6771,3868,3752,6772,6773,6774,4193,4795,4438, # 5584
3558,4796,4439,6775,4797,6776,6777,4798,6778,4799,3559,4800,6779,6780,6781,3482, # 5600
6782,2893,6783,6784,4194,4801,4010,6785,6786,4440,6787,4011,6788,6789,6790,6791, # 5616
6792,6793,4802,6794,6795,6796,4012,6797,6798,6799,6800,3349,4803,3483,6801,4804, # 5632
4195,6802,4013,6803,6804,4196,6805,4014,4015,6806,2847,3271,2848,6807,3484,6808, # 5648
6809,6810,4441,6811,4442,4197,4443,3272,4805,6812,3412,4016,1579,6813,6814,4017, # 5664
6815,3869,6816,2964,6817,4806,6818,6819,4018,3646,6820,6821,4807,4019,4020,6822, # 5680
6823,3560,6824,6825,4021,4444,6826,4198,6827,6828,4445,6829,6830,4199,4808,6831, # 5696
6832,6833,3870,3019,2458,6834,3753,3413,3350,6835,4809,3871,4810,3561,4446,6836, # 5712
6837,4447,4811,4812,6838,2459,4448,6839,4449,6840,6841,4022,3872,6842,4813,4814, # 5728
6843,6844,4815,4200,4201,4202,6845,4023,6846,6847,4450,3562,3873,6848,6849,4816, # 5744
4817,6850,4451,4818,2139,6851,3563,6852,6853,3351,6854,6855,3352,4024,2709,3414, # 5760
4203,4452,6856,4204,6857,6858,3874,3875,6859,6860,4819,6861,6862,6863,6864,4453, # 5776
3647,6865,6866,4820,6867,6868,6869,6870,4454,6871,2869,6872,6873,4821,6874,3754, # 5792
6875,4822,4205,6876,6877,6878,3648,4206,4455,6879,4823,6880,4824,3876,6881,3055, # 5808
4207,6882,3415,6883,6884,6885,4208,4209,6886,4210,3353,6887,3354,3564,3209,3485, # 5824
2652,6888,2728,6889,3210,3755,6890,4025,4456,6891,4825,6892,6893,6894,6895,4211, # 5840
6896,6897,6898,4826,6899,6900,4212,6901,4827,6902,2773,3565,6903,4828,6904,6905, # 5856
6906,6907,3649,3650,6908,2849,3566,6909,3567,3100,6910,6911,6912,6913,6914,6915, # 5872
4026,6916,3355,4829,3056,4457,3756,6917,3651,6918,4213,3652,2870,6919,4458,6920, # 5888
2438,6921,6922,3757,2774,4830,6923,3356,4831,4832,6924,4833,4459,3653,2507,6925, # 5904
4834,2535,6926,6927,3273,4027,3147,6928,3568,6929,6930,6931,4460,6932,3877,4461, # 5920
2729,3654,6933,6934,6935,6936,2175,4835,2630,4214,4028,4462,4836,4215,6937,3148, # 5936
4216,4463,4837,4838,4217,6938,6939,2850,4839,6940,4464,6941,6942,6943,4840,6944, # 5952
4218,3274,4465,6945,6946,2710,6947,4841,4466,6948,6949,2894,6950,6951,4842,6952, # 5968
4219,3057,2871,6953,6954,6955,6956,4467,6957,2711,6958,6959,6960,3275,3101,4843, # 5984
6961,3357,3569,6962,4844,6963,6964,4468,4845,3570,6965,3102,4846,3758,6966,4847, # 6000
3878,4848,4849,4029,6967,2929,3879,4850,4851,6968,6969,1733,6970,4220,6971,6972, # 6016
6973,6974,6975,6976,4852,6977,6978,6979,6980,6981,6982,3759,6983,6984,6985,3486, # 6032
3487,6986,3488,3416,6987,6988,6989,6990,6991,6992,6993,6994,6995,6996,6997,4853, # 6048
6998,6999,4030,7000,7001,3211,7002,7003,4221,7004,7005,3571,4031,7006,3572,7007, # 6064
2614,4854,2577,7008,7009,2965,3655,3656,4855,2775,3489,3880,4222,4856,3881,4032, # 6080
3882,3657,2730,3490,4857,7010,3149,7011,4469,4858,2496,3491,4859,2283,7012,7013, # 6096
7014,2365,4860,4470,7015,7016,3760,7017,7018,4223,1917,7019,7020,7021,4471,7022, # 6112
2776,4472,7023,7024,7025,7026,4033,7027,3573,4224,4861,4034,4862,7028,7029,1929, # 6128
3883,4035,7030,4473,3058,7031,2536,3761,3884,7032,4036,7033,2966,2895,1968,4474, # 6144
3276,4225,3417,3492,4226,2105,7034,7035,1754,2596,3762,4227,4863,4475,3763,4864, # 6160
3764,2615,2777,3103,3765,3658,3418,4865,2296,3766,2815,7036,7037,7038,3574,2872, # 6176
3277,4476,7039,4037,4477,7040,7041,4038,7042,7043,7044,7045,7046,7047,2537,7048, # 6192
7049,7050,7051,7052,7053,7054,4478,7055,7056,3767,3659,4228,3575,7057,7058,4229, # 6208
7059,7060,7061,3660,7062,3212,7063,3885,4039,2460,7064,7065,7066,7067,7068,7069, # 6224
7070,7071,7072,7073,7074,4866,3768,4867,7075,7076,7077,7078,4868,3358,3278,2653, # 6240
7079,7080,4479,3886,7081,7082,4869,7083,7084,7085,7086,7087,7088,2538,7089,7090, # 6256
7091,4040,3150,3769,4870,4041,2896,3359,4230,2930,7092,3279,7093,2967,4480,3213, # 6272
4481,3661,7094,7095,7096,7097,7098,7099,7100,7101,7102,2461,3770,7103,7104,4231, # 6288
3151,7105,7106,7107,4042,3662,7108,7109,4871,3663,4872,4043,3059,7110,7111,7112, # 6304
3493,2988,7113,4873,7114,7115,7116,3771,4874,7117,7118,4232,4875,7119,3576,2336, # 6320
4876,7120,4233,3419,4044,4877,4878,4482,4483,4879,4484,4234,7121,3772,4880,1045, # 6336
3280,3664,4881,4882,7122,7123,7124,7125,4883,7126,2778,7127,4485,4486,7128,4884, # 6352
3214,3887,7129,7130,3215,7131,4885,4045,7132,7133,4046,7134,7135,7136,7137,7138, # 6368
7139,7140,7141,7142,7143,4235,7144,4886,7145,7146,7147,4887,7148,7149,7150,4487, # 6384
4047,4488,7151,7152,4888,4048,2989,3888,7153,3665,7154,4049,7155,7156,7157,7158, # 6400
7159,7160,2931,4889,4890,4489,7161,2631,3889,4236,2779,7162,7163,4891,7164,3060, # 6416
7165,1672,4892,7166,4893,4237,3281,4894,7167,7168,3666,7169,3494,7170,7171,4050, # 6432
7172,7173,3104,3360,3420,4490,4051,2684,4052,7174,4053,7175,7176,7177,2253,4054, # 6448
7178,7179,4895,7180,3152,3890,3153,4491,3216,7181,7182,7183,2968,4238,4492,4055, # 6464
7184,2990,7185,2479,7186,7187,4493,7188,7189,7190,7191,7192,4896,7193,4897,2969, # 6480
4494,4898,7194,3495,7195,7196,4899,4495,7197,3105,2731,7198,4900,7199,7200,7201, # 6496
4056,7202,3361,7203,7204,4496,4901,4902,7205,4497,7206,7207,2315,4903,7208,4904, # 6512
7209,4905,2851,7210,7211,3577,7212,3578,4906,7213,4057,3667,4907,7214,4058,2354, # 6528
3891,2376,3217,3773,7215,7216,7217,7218,7219,4498,7220,4908,3282,2685,7221,3496, # 6544
4909,2632,3154,4910,7222,2337,7223,4911,7224,7225,7226,4912,4913,3283,4239,4499, # 6560
7227,2816,7228,7229,7230,7231,7232,7233,7234,4914,4500,4501,7235,7236,7237,2686, # 6576
7238,4915,7239,2897,4502,7240,4503,7241,2516,7242,4504,3362,3218,7243,7244,7245, # 6592
4916,7246,7247,4505,3363,7248,7249,7250,7251,3774,4506,7252,7253,4917,7254,7255, # 6608
3284,2991,4918,4919,3219,3892,4920,3106,3497,4921,7256,7257,7258,4922,7259,4923, # 6624
3364,4507,4508,4059,7260,4240,3498,7261,7262,4924,7263,2992,3893,4060,3220,7264, # 6640
7265,7266,7267,7268,7269,4509,3775,7270,2817,7271,4061,4925,4510,3776,7272,4241, # 6656
4511,3285,7273,7274,3499,7275,7276,7277,4062,4512,4926,7278,3107,3894,7279,7280, # 6672
4927,7281,4513,7282,7283,3668,7284,7285,4242,4514,4243,7286,2058,4515,4928,4929, # 6688
4516,7287,3286,4244,7288,4517,7289,7290,7291,3669,7292,7293,4930,4931,4932,2355, # 6704
4933,7294,2633,4518,7295,4245,7296,7297,4519,7298,7299,4520,4521,4934,7300,4246, # 6720
4522,7301,7302,7303,3579,7304,4247,4935,7305,4936,7306,7307,7308,7309,3777,7310, # 6736
4523,7311,7312,7313,4248,3580,7314,4524,3778,4249,7315,3581,7316,3287,7317,3221, # 6752
7318,4937,7319,7320,7321,7322,7323,7324,4938,4939,7325,4525,7326,7327,7328,4063, # 6768
7329,7330,4940,7331,7332,4941,7333,4526,7334,3500,2780,1741,4942,2026,1742,7335, # 6784
7336,3582,4527,2388,7337,7338,7339,4528,7340,4250,4943,7341,7342,7343,4944,7344, # 6800
7345,7346,3020,7347,4945,7348,7349,7350,7351,3895,7352,3896,4064,3897,7353,7354, # 6816
7355,4251,7356,7357,3898,7358,3779,7359,3780,3288,7360,7361,4529,7362,4946,4530, # 6832
2027,7363,3899,4531,4947,3222,3583,7364,4948,7365,7366,7367,7368,4949,3501,4950, # 6848
3781,4951,4532,7369,2517,4952,4252,4953,3155,7370,4954,4955,4253,2518,4533,7371, # 6864
7372,2712,4254,7373,7374,7375,3670,4956,3671,7376,2389,3502,4065,7377,2338,7378, # 6880
7379,7380,7381,3061,7382,4957,7383,7384,7385,7386,4958,4534,7387,7388,2993,7389, # 6896
3062,7390,4959,7391,7392,7393,4960,3108,4961,7394,4535,7395,4962,3421,4536,7396, # 6912
4963,7397,4964,1857,7398,4965,7399,7400,2176,3584,4966,7401,7402,3422,4537,3900, # 6928
3585,7403,3782,7404,2852,7405,7406,7407,4538,3783,2654,3423,4967,4539,7408,3784, # 6944
3586,2853,4540,4541,7409,3901,7410,3902,7411,7412,3785,3109,2327,3903,7413,7414, # 6960
2970,4066,2932,7415,7416,7417,3904,3672,3424,7418,4542,4543,4544,7419,4968,7420, # 6976
7421,4255,7422,7423,7424,7425,7426,4067,7427,3673,3365,4545,7428,3110,2559,3674, # 6992
7429,7430,3156,7431,7432,3503,7433,3425,4546,7434,3063,2873,7435,3223,4969,4547, # 7008
4548,2898,4256,4068,7436,4069,3587,3786,2933,3787,4257,4970,4971,3788,7437,4972, # 7024
3064,7438,4549,7439,7440,7441,7442,7443,4973,3905,7444,2874,7445,7446,7447,7448, # 7040
3021,7449,4550,3906,3588,4974,7450,7451,3789,3675,7452,2578,7453,4070,7454,7455, # 7056
7456,4258,3676,7457,4975,7458,4976,4259,3790,3504,2634,4977,3677,4551,4260,7459, # 7072
7460,7461,7462,3907,4261,4978,7463,7464,7465,7466,4979,4980,7467,7468,2213,4262, # 7088
7469,7470,7471,3678,4981,7472,2439,7473,4263,3224,3289,7474,3908,2415,4982,7475, # 7104
4264,7476,4983,2655,7477,7478,2732,4552,2854,2875,7479,7480,4265,7481,4553,4984, # 7120
7482,7483,4266,7484,3679,3366,3680,2818,2781,2782,3367,3589,4554,3065,7485,4071, # 7136
2899,7486,7487,3157,2462,4072,4555,4073,4985,4986,3111,4267,2687,3368,4556,4074, # 7152
3791,4268,7488,3909,2783,7489,2656,1962,3158,4557,4987,1963,3159,3160,7490,3112, # 7168
4988,4989,3022,4990,4991,3792,2855,7491,7492,2971,4558,7493,7494,4992,7495,7496, # 7184
7497,7498,4993,7499,3426,4559,4994,7500,3681,4560,4269,4270,3910,7501,4075,4995, # 7200
4271,7502,7503,4076,7504,4996,7505,3225,4997,4272,4077,2819,3023,7506,7507,2733, # 7216
4561,7508,4562,7509,3369,3793,7510,3590,2508,7511,7512,4273,3113,2994,2616,7513, # 7232
7514,7515,7516,7517,7518,2820,3911,4078,2748,7519,7520,4563,4998,7521,7522,7523, # 7248
7524,4999,4274,7525,4564,3682,2239,4079,4565,7526,7527,7528,7529,5000,7530,7531, # 7264
5001,4275,3794,7532,7533,7534,3066,5002,4566,3161,7535,7536,4080,7537,3162,7538, # 7280
7539,4567,7540,7541,7542,7543,7544,7545,5003,7546,4568,7547,7548,7549,7550,7551, # 7296
7552,7553,7554,7555,7556,5004,7557,7558,7559,5005,7560,3795,7561,4569,7562,7563, # 7312
7564,2821,3796,4276,4277,4081,7565,2876,7566,5006,7567,7568,2900,7569,3797,3912, # 7328
7570,7571,7572,4278,7573,7574,7575,5007,7576,7577,5008,7578,7579,4279,2934,7580, # 7344
7581,5009,7582,4570,7583,4280,7584,7585,7586,4571,4572,3913,7587,4573,3505,7588, # 7360
5010,7589,7590,7591,7592,3798,4574,7593,7594,5011,7595,4281,7596,7597,7598,4282, # 7376
5012,7599,7600,5013,3163,7601,5014,7602,3914,7603,7604,2734,4575,4576,4577,7605, # 7392
7606,7607,7608,7609,3506,5015,4578,7610,4082,7611,2822,2901,2579,3683,3024,4579, # 7408
3507,7612,4580,7613,3226,3799,5016,7614,7615,7616,7617,7618,7619,7620,2995,3290, # 7424
7621,4083,7622,5017,7623,7624,7625,7626,7627,4581,3915,7628,3291,7629,5018,7630, # 7440
7631,7632,7633,4084,7634,7635,3427,3800,7636,7637,4582,7638,5019,4583,5020,7639, # 7456
3916,7640,3801,5021,4584,4283,7641,7642,3428,3591,2269,7643,2617,7644,4585,3592, # 7472
7645,4586,2902,7646,7647,3227,5022,7648,4587,7649,4284,7650,7651,7652,4588,2284, # 7488
7653,5023,7654,7655,7656,4589,5024,3802,7657,7658,5025,3508,4590,7659,7660,7661, # 7504
1969,5026,7662,7663,3684,1821,2688,7664,2028,2509,4285,7665,2823,1841,7666,2689, # 7520
3114,7667,3917,4085,2160,5027,5028,2972,7668,5029,7669,7670,7671,3593,4086,7672, # 7536
4591,4087,5030,3803,7673,7674,7675,7676,7677,7678,7679,4286,2366,4592,4593,3067, # 7552
2328,7680,7681,4594,3594,3918,2029,4287,7682,5031,3919,3370,4288,4595,2856,7683, # 7568
3509,7684,7685,5032,5033,7686,7687,3804,2784,7688,7689,7690,7691,3371,7692,7693, # 7584
2877,5034,7694,7695,3920,4289,4088,7696,7697,7698,5035,7699,5036,4290,5037,5038, # 7600
5039,7700,7701,7702,5040,5041,3228,7703,1760,7704,5042,3229,4596,2106,4089,7705, # 7616
4597,2824,5043,2107,3372,7706,4291,4090,5044,7707,4091,7708,5045,3025,3805,4598, # 7632
4292,4293,4294,3373,7709,4599,7710,5046,7711,7712,5047,5048,3806,7713,7714,7715, # 7648
5049,7716,7717,7718,7719,4600,5050,7720,7721,7722,5051,7723,4295,3429,7724,7725, # 7664
7726,7727,3921,7728,3292,5052,4092,7729,7730,7731,7732,7733,7734,7735,5053,5054, # 7680
7736,7737,7738,7739,3922,3685,7740,7741,7742,7743,2635,5055,7744,5056,4601,7745, # 7696
7746,2560,7747,7748,7749,7750,3923,7751,7752,7753,7754,7755,4296,2903,7756,7757, # 7712
7758,7759,7760,3924,7761,5057,4297,7762,7763,5058,4298,7764,4093,7765,7766,5059, # 7728
3925,7767,7768,7769,7770,7771,7772,7773,7774,7775,7776,3595,7777,4299,5060,4094, # 7744
7778,3293,5061,7779,7780,4300,7781,7782,4602,7783,3596,7784,7785,3430,2367,7786, # 7760
3164,5062,5063,4301,7787,7788,4095,5064,5065,7789,3374,3115,7790,7791,7792,7793, # 7776
7794,7795,7796,3597,4603,7797,7798,3686,3116,3807,5066,7799,7800,5067,7801,7802, # 7792
4604,4302,5068,4303,4096,7803,7804,3294,7805,7806,5069,4605,2690,7807,3026,7808, # 7808
7809,7810,7811,7812,7813,7814,7815,7816,7817,7818,7819,7820,7821,7822,7823,7824, # 7824
7825,7826,7827,7828,7829,7830,7831,7832,7833,7834,7835,7836,7837,7838,7839,7840, # 7840
7841,7842,7843,7844,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854,7855,7856, # 7856
7857,7858,7859,7860,7861,7862,7863,7864,7865,7866,7867,7868,7869,7870,7871,7872, # 7872
7873,7874,7875,7876,7877,7878,7879,7880,7881,7882,7883,7884,7885,7886,7887,7888, # 7888
7889,7890,7891,7892,7893,7894,7895,7896,7897,7898,7899,7900,7901,7902,7903,7904, # 7904
7905,7906,7907,7908,7909,7910,7911,7912,7913,7914,7915,7916,7917,7918,7919,7920, # 7920
7921,7922,7923,7924,3926,7925,7926,7927,7928,7929,7930,7931,7932,7933,7934,7935, # 7936
7936,7937,7938,7939,7940,7941,7942,7943,7944,7945,7946,7947,7948,7949,7950,7951, # 7952
7952,7953,7954,7955,7956,7957,7958,7959,7960,7961,7962,7963,7964,7965,7966,7967, # 7968
7968,7969,7970,7971,7972,7973,7974,7975,7976,7977,7978,7979,7980,7981,7982,7983, # 7984
7984,7985,7986,7987,7988,7989,7990,7991,7992,7993,7994,7995,7996,7997,7998,7999, # 8000
8000,8001,8002,8003,8004,8005,8006,8007,8008,8009,8010,8011,8012,8013,8014,8015, # 8016
8016,8017,8018,8019,8020,8021,8022,8023,8024,8025,8026,8027,8028,8029,8030,8031, # 8032
8032,8033,8034,8035,8036,8037,8038,8039,8040,8041,8042,8043,8044,8045,8046,8047, # 8048
8048,8049,8050,8051,8052,8053,8054,8055,8056,8057,8058,8059,8060,8061,8062,8063, # 8064
8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079, # 8080
8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095, # 8096
8096,8097,8098,8099,8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111, # 8112
8112,8113,8114,8115,8116,8117,8118,8119,8120,8121,8122,8123,8124,8125,8126,8127, # 8128
8128,8129,8130,8131,8132,8133,8134,8135,8136,8137,8138,8139,8140,8141,8142,8143, # 8144
8144,8145,8146,8147,8148,8149,8150,8151,8152,8153,8154,8155,8156,8157,8158,8159, # 8160
8160,8161,8162,8163,8164,8165,8166,8167,8168,8169,8170,8171,8172,8173,8174,8175, # 8176
8176,8177,8178,8179,8180,8181,8182,8183,8184,8185,8186,8187,8188,8189,8190,8191, # 8192
8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207, # 8208
8208,8209,8210,8211,8212,8213,8214,8215,8216,8217,8218,8219,8220,8221,8222,8223, # 8224
8224,8225,8226,8227,8228,8229,8230,8231,8232,8233,8234,8235,8236,8237,8238,8239, # 8240
8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, # 8256
8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271) # 8272
)
# flake8: noqa

View File

@ -25,13 +25,6 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .compat import wrap_ord
NUM_OF_CATEGORY = 6
DONT_KNOW = -1
ENOUGH_REL_THRESHOLD = 100
MAX_REL_THRESHOLD = 1000
MINIMUM_DATA_THRESHOLD = 4
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
jp2CharContext = (
@ -120,24 +113,35 @@ jp2CharContext = (
(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
)
class JapaneseContextAnalysis:
class JapaneseContextAnalysis(object):
NUM_OF_CATEGORY = 6
DONT_KNOW = -1
ENOUGH_REL_THRESHOLD = 100
MAX_REL_THRESHOLD = 1000
MINIMUM_DATA_THRESHOLD = 4
def __init__(self):
self._total_rel = None
self._rel_sample = None
self._need_to_skip_char_num = None
self._last_char_order = None
self._done = None
self.reset()
def reset(self):
self._mTotalRel = 0 # total sequence received
# category counters, each interger counts sequence in its category
self._mRelSample = [0] * NUM_OF_CATEGORY
self._total_rel = 0 # total sequence received
# category counters, each integer counts sequence in its category
self._rel_sample = [0] * self.NUM_OF_CATEGORY
# if last byte in current buffer is not the last byte of a character,
# we need to know how many bytes to skip in next buffer
self._mNeedToSkipCharNum = 0
self._mLastCharOrder = -1 # The order of previous char
self._need_to_skip_char_num = 0
self._last_char_order = -1 # The order of previous char
# If this flag is set to True, detection is done and conclusion has
# been made
self._mDone = False
self._done = False
def feed(self, aBuf, aLen):
if self._mDone:
def feed(self, byte_str, num_bytes):
if self._done:
return
# The buffer we got is byte oriented, and a character may span in more than one
@ -147,81 +151,83 @@ class JapaneseContextAnalysis:
# well and analyse the character once it is complete, but since a
# character will not make much difference, by simply skipping
# this character will simply our logic and improve performance.
i = self._mNeedToSkipCharNum
while i < aLen:
order, charLen = self.get_order(aBuf[i:i + 2])
i += charLen
if i > aLen:
self._mNeedToSkipCharNum = i - aLen
self._mLastCharOrder = -1
i = self._need_to_skip_char_num
while i < num_bytes:
order, char_len = self.get_order(byte_str[i:i + 2])
i += char_len
if i > num_bytes:
self._need_to_skip_char_num = i - num_bytes
self._last_char_order = -1
else:
if (order != -1) and (self._mLastCharOrder != -1):
self._mTotalRel += 1
if self._mTotalRel > MAX_REL_THRESHOLD:
self._mDone = True
if (order != -1) and (self._last_char_order != -1):
self._total_rel += 1
if self._total_rel > self.MAX_REL_THRESHOLD:
self._done = True
break
self._mRelSample[jp2CharContext[self._mLastCharOrder][order]] += 1
self._mLastCharOrder = order
self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1
self._last_char_order = order
def got_enough_data(self):
return self._mTotalRel > ENOUGH_REL_THRESHOLD
return self._total_rel > self.ENOUGH_REL_THRESHOLD
def get_confidence(self):
# This is just one way to calculate confidence. It works well for me.
if self._mTotalRel > MINIMUM_DATA_THRESHOLD:
return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel
if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
return (self._total_rel - self._rel_sample[0]) / self._total_rel
else:
return DONT_KNOW
return self.DONT_KNOW
def get_order(self, aBuf):
def get_order(self, byte_str):
return -1, 1
class SJISContextAnalysis(JapaneseContextAnalysis):
def __init__(self):
self.charset_name = "SHIFT_JIS"
super(SJISContextAnalysis, self).__init__()
self._charset_name = "SHIFT_JIS"
def get_charset_name(self):
return self.charset_name
@property
def charset_name(self):
return self._charset_name
def get_order(self, aBuf):
if not aBuf:
def get_order(self, byte_str):
if not byte_str:
return -1, 1
# find out current char's byte length
first_char = wrap_ord(aBuf[0])
if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
charLen = 2
first_char = byte_str[0]
if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC):
char_len = 2
if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
self.charset_name = "CP932"
self._charset_name = "CP932"
else:
charLen = 1
char_len = 1
# return its order if it is hiragana
if len(aBuf) > 1:
second_char = wrap_ord(aBuf[1])
if len(byte_str) > 1:
second_char = byte_str[1]
if (first_char == 202) and (0x9F <= second_char <= 0xF1):
return second_char - 0x9F, charLen
return second_char - 0x9F, char_len
return -1, charLen
return -1, char_len
class EUCJPContextAnalysis(JapaneseContextAnalysis):
def get_order(self, aBuf):
if not aBuf:
def get_order(self, byte_str):
if not byte_str:
return -1, 1
# find out current char's byte length
first_char = wrap_ord(aBuf[0])
first_char = byte_str[0]
if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
charLen = 2
char_len = 2
elif first_char == 0x8F:
charLen = 3
char_len = 3
else:
charLen = 1
char_len = 1
# return its order if it is hiragana
if len(aBuf) > 1:
second_char = wrap_ord(aBuf[1])
if len(byte_str) > 1:
second_char = byte_str[1]
if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
return second_char - 0xA1, charLen
return second_char - 0xA1, char_len
return -1, char_len
return -1, charLen
# flake8: noqa

View File

@ -210,20 +210,19 @@ BulgarianLangModel = (
)
Latin5BulgarianModel = {
'charToOrderMap': Latin5_BulgarianCharToOrderMap,
'precedenceMatrix': BulgarianLangModel,
'mTypicalPositiveRatio': 0.969392,
'keepEnglishLetter': False,
'charsetName': "ISO-8859-5"
'char_to_order_map': Latin5_BulgarianCharToOrderMap,
'precedence_matrix': BulgarianLangModel,
'typical_positive_ratio': 0.969392,
'keep_english_letter': False,
'charset_name': "ISO-8859-5",
'language': 'Bulgairan',
}
Win1251BulgarianModel = {
'charToOrderMap': win1251BulgarianCharToOrderMap,
'precedenceMatrix': BulgarianLangModel,
'mTypicalPositiveRatio': 0.969392,
'keepEnglishLetter': False,
'charsetName': "windows-1251"
'char_to_order_map': win1251BulgarianCharToOrderMap,
'precedence_matrix': BulgarianLangModel,
'typical_positive_ratio': 0.969392,
'keep_english_letter': False,
'charset_name': "windows-1251",
'language': 'Bulgarian',
}
# flake8: noqa

View File

@ -27,7 +27,7 @@
# KOI8-R language model
# Character Mapping Table:
KOI8R_CharToOrderMap = (
KOI8R_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
@ -46,7 +46,7 @@ KOI8R_CharToOrderMap = (
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, # f0
)
win1251_CharToOrderMap = (
win1251_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
@ -65,7 +65,7 @@ win1251_CharToOrderMap = (
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
)
latin5_CharToOrderMap = (
latin5_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
@ -84,7 +84,7 @@ latin5_CharToOrderMap = (
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
)
macCyrillic_CharToOrderMap = (
macCyrillic_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
@ -103,7 +103,7 @@ macCyrillic_CharToOrderMap = (
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
)
IBM855_CharToOrderMap = (
IBM855_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
@ -122,7 +122,7 @@ IBM855_CharToOrderMap = (
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
)
IBM866_CharToOrderMap = (
IBM866_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
@ -279,51 +279,55 @@ RussianLangModel = (
)
Koi8rModel = {
'charToOrderMap': KOI8R_CharToOrderMap,
'precedenceMatrix': RussianLangModel,
'mTypicalPositiveRatio': 0.976601,
'keepEnglishLetter': False,
'charsetName': "KOI8-R"
'char_to_order_map': KOI8R_char_to_order_map,
'precedence_matrix': RussianLangModel,
'typical_positive_ratio': 0.976601,
'keep_english_letter': False,
'charset_name': "KOI8-R",
'language': 'Russian',
}
Win1251CyrillicModel = {
'charToOrderMap': win1251_CharToOrderMap,
'precedenceMatrix': RussianLangModel,
'mTypicalPositiveRatio': 0.976601,
'keepEnglishLetter': False,
'charsetName': "windows-1251"
'char_to_order_map': win1251_char_to_order_map,
'precedence_matrix': RussianLangModel,
'typical_positive_ratio': 0.976601,
'keep_english_letter': False,
'charset_name': "windows-1251",
'language': 'Russian',
}
Latin5CyrillicModel = {
'charToOrderMap': latin5_CharToOrderMap,
'precedenceMatrix': RussianLangModel,
'mTypicalPositiveRatio': 0.976601,
'keepEnglishLetter': False,
'charsetName': "ISO-8859-5"
'char_to_order_map': latin5_char_to_order_map,
'precedence_matrix': RussianLangModel,
'typical_positive_ratio': 0.976601,
'keep_english_letter': False,
'charset_name': "ISO-8859-5",
'language': 'Russian',
}
MacCyrillicModel = {
'charToOrderMap': macCyrillic_CharToOrderMap,
'precedenceMatrix': RussianLangModel,
'mTypicalPositiveRatio': 0.976601,
'keepEnglishLetter': False,
'charsetName': "MacCyrillic"
};
'char_to_order_map': macCyrillic_char_to_order_map,
'precedence_matrix': RussianLangModel,
'typical_positive_ratio': 0.976601,
'keep_english_letter': False,
'charset_name': "MacCyrillic",
'language': 'Russian',
}
Ibm866Model = {
'charToOrderMap': IBM866_CharToOrderMap,
'precedenceMatrix': RussianLangModel,
'mTypicalPositiveRatio': 0.976601,
'keepEnglishLetter': False,
'charsetName': "IBM866"
'char_to_order_map': IBM866_char_to_order_map,
'precedence_matrix': RussianLangModel,
'typical_positive_ratio': 0.976601,
'keep_english_letter': False,
'charset_name': "IBM866",
'language': 'Russian',
}
Ibm855Model = {
'charToOrderMap': IBM855_CharToOrderMap,
'precedenceMatrix': RussianLangModel,
'mTypicalPositiveRatio': 0.976601,
'keepEnglishLetter': False,
'charsetName': "IBM855"
'char_to_order_map': IBM855_char_to_order_map,
'precedence_matrix': RussianLangModel,
'typical_positive_ratio': 0.976601,
'keep_english_letter': False,
'charset_name': "IBM855",
'language': 'Russian',
}
# flake8: noqa

View File

@ -31,7 +31,7 @@
# 252: 0 - 9
# Character Mapping Table:
Latin7_CharToOrderMap = (
Latin7_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
@ -50,7 +50,7 @@ Latin7_CharToOrderMap = (
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
)
win1253_CharToOrderMap = (
win1253_char_to_order_map = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
@ -207,19 +207,19 @@ GreekLangModel = (
)
Latin7GreekModel = {
'charToOrderMap': Latin7_CharToOrderMap,
'precedenceMatrix': GreekLangModel,
'mTypicalPositiveRatio': 0.982851,
'keepEnglishLetter': False,
'charsetName': "ISO-8859-7"
'char_to_order_map': Latin7_char_to_order_map,
'precedence_matrix': GreekLangModel,
'typical_positive_ratio': 0.982851,
'keep_english_letter': False,
'charset_name': "ISO-8859-7",
'language': 'Greek',
}
Win1253GreekModel = {
'charToOrderMap': win1253_CharToOrderMap,
'precedenceMatrix': GreekLangModel,
'mTypicalPositiveRatio': 0.982851,
'keepEnglishLetter': False,
'charsetName': "windows-1253"
'char_to_order_map': win1253_char_to_order_map,
'precedence_matrix': GreekLangModel,
'typical_positive_ratio': 0.982851,
'keep_english_letter': False,
'charset_name': "windows-1253",
'language': 'Greek',
}
# flake8: noqa

View File

@ -34,7 +34,7 @@
# Windows-1255 language model
# Character Mapping Table:
win1255_CharToOrderMap = (
WIN1255_CHAR_TO_ORDER_MAP = (
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
@ -59,7 +59,7 @@ win1255_CharToOrderMap = (
# first 1024 sequences: 1.5981%
# rest sequences: 0.087%
# negative sequences: 0.0015%
HebrewLangModel = (
HEBREW_LANG_MODEL = (
0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
@ -191,11 +191,10 @@ HebrewLangModel = (
)
Win1255HebrewModel = {
'charToOrderMap': win1255_CharToOrderMap,
'precedenceMatrix': HebrewLangModel,
'mTypicalPositiveRatio': 0.984004,
'keepEnglishLetter': False,
'charsetName': "windows-1255"
'char_to_order_map': WIN1255_CHAR_TO_ORDER_MAP,
'precedence_matrix': HEBREW_LANG_MODEL,
'typical_positive_ratio': 0.984004,
'keep_english_letter': False,
'charset_name': "windows-1255",
'language': 'Hebrew',
}
# flake8: noqa

View File

@ -207,19 +207,19 @@ HungarianLangModel = (
)
Latin2HungarianModel = {
'charToOrderMap': Latin2_HungarianCharToOrderMap,
'precedenceMatrix': HungarianLangModel,
'mTypicalPositiveRatio': 0.947368,
'keepEnglishLetter': True,
'charsetName': "ISO-8859-2"
'char_to_order_map': Latin2_HungarianCharToOrderMap,
'precedence_matrix': HungarianLangModel,
'typical_positive_ratio': 0.947368,
'keep_english_letter': True,
'charset_name': "ISO-8859-2",
'language': 'Hungarian',
}
Win1250HungarianModel = {
'charToOrderMap': win1250HungarianCharToOrderMap,
'precedenceMatrix': HungarianLangModel,
'mTypicalPositiveRatio': 0.947368,
'keepEnglishLetter': True,
'charsetName': "windows-1250"
'char_to_order_map': win1250HungarianCharToOrderMap,
'precedence_matrix': HungarianLangModel,
'typical_positive_ratio': 0.947368,
'keep_english_letter': True,
'charset_name': "windows-1250",
'language': 'Hungarian',
}
# flake8: noqa

View File

@ -190,11 +190,10 @@ ThaiLangModel = (
)
TIS620ThaiModel = {
'charToOrderMap': TIS620CharToOrderMap,
'precedenceMatrix': ThaiLangModel,
'mTypicalPositiveRatio': 0.926386,
'keepEnglishLetter': False,
'charsetName': "TIS-620"
'char_to_order_map': TIS620CharToOrderMap,
'precedence_matrix': ThaiLangModel,
'typical_positive_ratio': 0.926386,
'keep_english_letter': False,
'charset_name': "TIS-620",
'language': 'Thai',
}
# flake8: noqa

View File

@ -0,0 +1,193 @@
# -*- coding: utf-8 -*-
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Özgür Baskın - Turkish Language Model
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# 255: Control characters that usually does not exist in any text
# 254: Carriage/Return
# 253: symbol (punctuation) that does not belong to word
# 252: 0 - 9
# Character Mapping Table:
Latin5_TurkishCharToOrderMap = (
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255, 23, 37, 47, 39, 29, 52, 36, 45, 53, 60, 16, 49, 20, 46, 42,
48, 69, 44, 35, 31, 51, 38, 62, 65, 43, 56,255,255,255,255,255,
255, 1, 21, 28, 12, 2, 18, 27, 25, 3, 24, 10, 5, 13, 4, 15,
26, 64, 7, 8, 9, 14, 32, 57, 58, 11, 22,255,255,255,255,255,
180,179,178,177,176,175,174,173,172,171,170,169,168,167,166,165,
164,163,162,161,160,159,101,158,157,156,155,154,153,152,151,106,
150,149,148,147,146,145,144,100,143,142,141,140,139,138,137,136,
94, 80, 93,135,105,134,133, 63,132,131,130,129,128,127,126,125,
124,104, 73, 99, 79, 85,123, 54,122, 98, 92,121,120, 91,103,119,
68,118,117, 97,116,115, 50, 90,114,113,112,111, 55, 41, 40, 86,
89, 70, 59, 78, 71, 82, 88, 33, 77, 66, 84, 83,110, 75, 61, 96,
30, 67,109, 74, 87,102, 34, 95, 81,108, 76, 72, 17, 6, 19,107,
)
TurkishLangModel = (
3,2,3,3,3,1,3,3,3,3,3,3,3,3,2,1,1,3,3,1,3,3,0,3,3,3,3,3,0,3,1,3,
3,2,1,0,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
3,2,2,3,3,0,3,3,3,3,3,3,3,2,3,1,0,3,3,1,3,3,0,3,3,3,3,3,0,3,0,3,
3,1,1,0,1,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,2,2,0,0,0,1,0,1,
3,3,2,3,3,0,3,3,3,3,3,3,3,2,3,1,1,3,3,0,3,3,1,2,3,3,3,3,0,3,0,3,
3,1,1,0,0,0,1,0,0,0,0,1,1,0,1,2,1,0,0,0,1,0,0,0,0,2,0,0,0,0,0,1,
3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,2,0,3,2,1,2,2,1,3,3,0,0,0,2,
2,2,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,
3,3,3,2,3,3,1,2,3,3,3,3,3,3,3,1,3,2,1,0,3,2,0,1,2,3,3,2,1,0,0,2,
2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0,
1,0,1,3,3,1,3,3,3,3,3,3,3,1,2,0,0,2,3,0,2,3,0,0,2,2,2,3,0,3,0,1,
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,0,2,3,2,3,3,1,0,0,2,
3,2,0,0,1,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,2,0,0,1,
3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,0,3,3,0,0,2,1,0,0,2,3,2,2,0,0,0,2,
2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,1,0,2,0,0,1,
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,0,1,3,2,1,1,3,2,3,2,1,0,0,2,
2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,
3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,0,3,2,2,0,2,3,0,0,2,2,2,2,0,0,0,2,
3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
3,3,3,3,3,3,3,2,2,2,2,3,2,3,3,0,3,3,1,1,2,2,0,0,2,2,3,2,0,0,1,3,
0,3,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,
3,3,3,2,3,3,3,2,1,2,2,3,2,3,3,0,3,2,0,0,1,1,0,1,1,2,1,2,0,0,0,1,
0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,
3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,1,3,1,1,0,3,2,1,1,3,3,2,3,1,0,0,1,
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,1,
3,2,2,3,3,0,3,3,3,3,3,3,3,2,2,1,0,3,3,1,3,3,0,1,3,3,2,3,0,3,0,3,
2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
2,2,2,3,3,0,3,3,3,3,3,3,3,3,3,0,0,3,2,0,3,3,0,3,2,3,3,3,0,3,1,3,
2,0,0,0,0,0,0,0,0,0,0,1,0,1,2,0,1,0,0,0,0,0,0,0,2,2,0,0,1,0,0,1,
3,3,3,1,2,3,3,1,0,0,1,0,0,3,3,2,3,0,0,2,0,0,2,0,2,0,0,0,2,0,2,0,
0,3,1,0,1,0,0,0,2,2,1,0,1,1,2,1,2,2,2,0,2,1,1,0,0,0,2,0,0,0,0,0,
1,2,1,3,3,0,3,3,3,3,3,2,3,0,0,0,0,2,3,0,2,3,1,0,2,3,1,3,0,3,0,2,
3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,1,3,3,2,2,3,2,2,0,1,2,3,0,1,2,1,0,1,0,0,0,1,0,2,2,0,0,0,1,
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,
3,3,3,1,3,3,1,1,3,3,1,1,3,3,1,0,2,1,2,0,2,1,0,0,1,1,2,1,0,0,0,2,
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,1,0,2,1,3,0,0,2,0,0,3,3,0,3,0,0,1,0,1,2,0,0,1,1,2,2,0,1,0,
0,1,2,1,1,0,1,0,1,1,1,1,1,0,1,1,1,2,2,1,2,0,1,0,0,0,0,0,0,1,0,0,
3,3,3,2,3,2,3,3,0,2,2,2,3,3,3,0,3,0,0,0,2,2,0,1,2,1,1,1,0,0,0,1,
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
3,3,3,3,3,3,2,1,2,2,3,3,3,3,2,0,2,0,0,0,2,2,0,0,2,1,3,3,0,0,1,1,
1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,
1,1,2,3,3,0,3,3,3,3,3,3,2,2,0,2,0,2,3,2,3,2,2,2,2,2,2,2,1,3,2,3,
2,0,2,1,2,2,2,2,1,1,2,2,1,2,2,1,2,0,0,2,1,1,0,2,1,0,0,1,0,0,0,1,
2,3,3,1,1,1,0,1,1,1,2,3,2,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,2,2,3,2,3,2,2,1,3,3,3,0,2,1,2,0,2,1,0,0,1,1,1,1,1,0,0,1,
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,2,0,1,0,0,0,
3,3,3,2,3,3,3,3,3,2,3,1,2,3,3,1,2,0,0,0,0,0,0,0,3,2,1,1,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
3,3,3,2,2,3,3,2,1,1,1,1,1,3,3,0,3,1,0,0,1,1,0,0,3,1,2,1,0,0,0,0,
0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,
3,3,3,2,2,3,2,2,2,3,2,1,1,3,3,0,3,0,0,0,0,1,0,0,3,1,1,2,0,0,0,1,
1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
1,1,1,3,3,0,3,3,3,3,3,2,2,2,1,2,0,2,1,2,2,1,1,0,1,2,2,2,2,2,2,2,
0,0,2,1,2,1,2,1,0,1,1,3,1,2,1,1,2,0,0,2,0,1,0,1,0,1,0,0,0,1,0,1,
3,3,3,1,3,3,3,0,1,1,0,2,2,3,1,0,3,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,2,0,0,2,2,1,0,0,1,0,0,3,3,1,3,0,0,1,1,0,2,0,3,0,0,0,2,0,1,1,
0,1,2,0,1,2,2,0,2,2,2,2,1,0,2,1,1,0,2,0,2,1,2,0,0,0,0,0,0,0,0,0,
3,3,3,1,3,2,3,2,0,2,2,2,1,3,2,0,2,1,2,0,1,2,0,0,1,0,2,2,0,0,0,2,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,
3,3,3,0,3,3,1,1,2,3,1,0,3,2,3,0,3,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,
1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,3,3,0,3,3,2,3,3,2,2,0,0,0,0,1,2,0,1,3,0,0,0,3,1,1,0,3,0,2,
2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,1,2,2,1,0,3,1,1,1,1,3,3,2,3,0,0,1,0,1,2,0,2,2,0,2,2,0,2,1,
0,2,2,1,1,1,1,0,2,1,1,0,1,1,1,1,2,1,2,1,2,0,1,0,1,0,0,0,0,0,0,0,
3,3,3,0,1,1,3,0,0,1,1,0,0,2,2,0,3,0,0,1,1,0,1,0,0,0,0,0,2,0,0,0,
0,3,1,0,1,0,1,0,2,0,0,1,0,1,0,1,1,1,2,1,1,0,2,0,0,0,0,0,0,0,0,0,
3,3,3,0,2,0,2,0,1,1,1,0,0,3,3,0,2,0,0,1,0,0,2,1,1,0,1,0,1,0,1,0,
0,2,0,1,2,0,2,0,2,1,1,0,1,0,2,1,1,0,2,1,1,0,1,0,0,0,1,1,0,0,0,0,
3,2,3,0,1,0,0,0,0,0,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,0,2,0,0,0,
0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,0,0,2,3,0,0,1,0,1,0,2,3,2,3,0,0,1,3,0,2,1,0,0,0,0,2,0,1,0,
0,2,1,0,0,1,1,0,2,1,0,0,1,0,0,1,1,0,1,1,2,0,1,0,0,0,0,1,0,0,0,0,
3,2,2,0,0,1,1,0,0,0,0,0,0,3,1,1,1,0,0,0,0,0,1,0,0,0,0,0,2,0,1,0,
0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
0,0,0,3,3,0,2,3,2,2,1,2,2,1,1,2,0,1,3,2,2,2,0,0,2,2,0,0,0,1,2,1,
3,0,2,1,1,0,1,1,1,0,1,2,2,2,1,1,2,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,
0,1,1,2,3,0,3,3,3,2,2,2,2,1,0,1,0,1,0,1,2,2,0,0,2,2,1,3,1,1,2,1,
0,0,1,1,2,0,1,1,0,0,1,2,0,2,1,1,2,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,
3,3,2,0,0,3,1,0,0,0,0,0,0,3,2,1,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
0,2,1,1,0,0,1,0,1,2,0,0,1,1,0,0,2,1,1,1,1,0,2,0,0,0,0,0,0,0,0,0,
3,3,2,0,0,1,0,0,0,0,1,0,0,3,3,2,2,0,0,1,0,0,2,0,1,0,0,0,2,0,1,0,
0,0,1,1,0,0,2,0,2,1,0,0,1,1,2,1,2,0,2,1,2,1,1,1,0,0,1,1,0,0,0,0,
3,3,2,0,0,2,2,0,0,0,1,1,0,2,2,1,3,1,0,1,0,1,2,0,0,0,0,0,1,0,1,0,
0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,0,0,0,1,0,0,1,0,0,2,3,1,2,0,0,1,0,0,2,0,0,0,1,0,2,0,2,0,
0,1,1,2,2,1,2,0,2,1,1,0,0,1,1,0,1,1,1,1,2,1,1,0,0,0,0,0,0,0,0,0,
3,3,3,0,2,1,2,1,0,0,1,1,0,3,3,1,2,0,0,1,0,0,2,0,2,0,1,1,2,0,0,0,
0,0,1,1,1,1,2,0,1,1,0,1,1,1,1,0,0,0,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
3,3,3,0,2,2,3,2,0,0,1,0,0,2,3,1,0,0,0,0,0,0,2,0,2,0,0,0,2,0,0,0,
0,1,1,0,0,0,1,0,0,1,0,1,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,0,0,0,0,0,0,0,1,0,0,2,2,2,2,0,0,1,0,0,2,0,0,0,0,0,2,0,1,0,
0,0,2,1,1,0,1,0,2,1,1,0,0,1,1,2,1,0,2,0,2,0,1,0,0,0,2,0,0,0,0,0,
0,0,0,2,2,0,2,1,1,1,1,2,2,0,0,1,0,1,0,0,1,3,0,0,0,0,1,0,0,2,1,0,
0,0,1,0,1,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
2,0,0,2,3,0,2,3,1,2,2,0,2,0,0,2,0,2,1,1,1,2,1,0,0,1,2,1,1,2,1,0,
1,0,2,0,1,0,1,1,0,0,2,2,1,2,1,1,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,0,2,1,2,0,0,0,1,0,0,3,2,0,1,0,0,1,0,0,2,0,0,0,1,2,1,0,1,0,
0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,
0,0,0,2,2,0,2,2,1,1,0,1,1,1,1,1,0,0,1,2,1,1,1,0,1,0,0,0,1,1,1,1,
0,0,2,1,0,1,1,1,0,1,1,2,1,2,1,1,2,0,1,1,2,1,0,2,0,0,0,0,0,0,0,0,
3,2,2,0,0,2,0,0,0,0,0,0,0,2,2,0,2,0,0,1,0,0,2,0,0,0,0,0,2,0,0,0,
0,2,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,
0,0,0,3,2,0,2,2,0,1,1,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,
2,0,1,0,1,0,1,1,0,0,1,2,0,1,0,1,1,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,
2,2,2,0,1,1,0,0,0,1,0,0,0,1,2,0,1,0,0,1,0,0,1,0,0,0,0,1,2,0,1,0,
0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,1,0,1,1,1,0,0,0,0,1,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
1,1,2,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,
0,0,1,2,2,0,2,1,2,1,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,0,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
2,2,2,0,0,0,1,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,0,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
)
Latin5TurkishModel = {
'char_to_order_map': Latin5_TurkishCharToOrderMap,
'precedence_matrix': TurkishLangModel,
'typical_positive_ratio': 0.970290,
'keep_english_letter': True,
'charset_name': "ISO-8859-9",
'language': 'Turkish',
}

View File

@ -27,8 +27,7 @@
######################### END LICENSE BLOCK #########################
from .charsetprober import CharSetProber
from .constants import eNotMe
from .compat import wrap_ord
from .enums import ProbingState
FREQ_CAT_NUM = 4
@ -82,7 +81,7 @@ Latin1_CharToClass = (
# 2 : normal
# 3 : very likely
Latin1ClassModel = (
# UDF OTH ASC ASS ACV ACO ASV ASO
# UDF OTH ASC ASS ACV ACO ASV ASO
0, 0, 0, 0, 0, 0, 0, 0, # UDF
0, 3, 3, 3, 3, 3, 3, 3, # OTH
0, 3, 3, 3, 3, 3, 3, 3, # ASC
@ -96,40 +95,47 @@ Latin1ClassModel = (
class Latin1Prober(CharSetProber):
def __init__(self):
CharSetProber.__init__(self)
super(Latin1Prober, self).__init__()
self._last_char_class = None
self._freq_counter = None
self.reset()
def reset(self):
self._mLastCharClass = OTH
self._mFreqCounter = [0] * FREQ_CAT_NUM
self._last_char_class = OTH
self._freq_counter = [0] * FREQ_CAT_NUM
CharSetProber.reset(self)
def get_charset_name(self):
return "windows-1252"
@property
def charset_name(self):
return "ISO-8859-1"
def feed(self, aBuf):
aBuf = self.filter_with_english_letters(aBuf)
for c in aBuf:
charClass = Latin1_CharToClass[wrap_ord(c)]
freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
+ charClass]
@property
def language(self):
return ""
def feed(self, byte_str):
byte_str = self.filter_with_english_letters(byte_str)
for c in byte_str:
char_class = Latin1_CharToClass[c]
freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
+ char_class]
if freq == 0:
self._mState = eNotMe
self._state = ProbingState.NOT_ME
break
self._mFreqCounter[freq] += 1
self._mLastCharClass = charClass
self._freq_counter[freq] += 1
self._last_char_class = char_class
return self.get_state()
return self.state
def get_confidence(self):
if self.get_state() == eNotMe:
if self.state == ProbingState.NOT_ME:
return 0.01
total = sum(self._mFreqCounter)
total = sum(self._freq_counter)
if total < 0.01:
confidence = 0.0
else:
confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0)
confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
/ total)
if confidence < 0.0:
confidence = 0.0

View File

@ -0,0 +1,91 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
# Proofpoint, Inc.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .charsetprober import CharSetProber
from .enums import ProbingState, MachineState
class MultiByteCharSetProber(CharSetProber):
"""
MultiByteCharSetProber
"""
def __init__(self, lang_filter=None):
super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter)
self.distribution_analyzer = None
self.coding_sm = None
self._last_char = [0, 0]
def reset(self):
super(MultiByteCharSetProber, self).reset()
if self.coding_sm:
self.coding_sm.reset()
if self.distribution_analyzer:
self.distribution_analyzer.reset()
self._last_char = [0, 0]
@property
def charset_name(self):
raise NotImplementedError
@property
def language(self):
raise NotImplementedError
def feed(self, byte_str):
for i in range(len(byte_str)):
coding_state = self.coding_sm.next_state(byte_str[i])
if coding_state == MachineState.ERROR:
self.logger.debug('%s %s prober hit error at byte %s',
self.charset_name, self.language, i)
self._state = ProbingState.NOT_ME
break
elif coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
break
elif coding_state == MachineState.START:
char_len = self.coding_sm.get_current_charlen()
if i == 0:
self._last_char[1] = byte_str[0]
self.distribution_analyzer.feed(self._last_char, char_len)
else:
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
char_len)
self._last_char[0] = byte_str[-1]
if self.state == ProbingState.DETECTING:
if (self.distribution_analyzer.got_enough_data() and
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
self._state = ProbingState.FOUND_IT
return self.state
def get_confidence(self):
return self.distribution_analyzer.get_confidence()

View File

@ -39,9 +39,9 @@ from .euctwprober import EUCTWProber
class MBCSGroupProber(CharSetGroupProber):
def __init__(self):
CharSetGroupProber.__init__(self)
self._mProbers = [
def __init__(self, lang_filter=None):
super(MBCSGroupProber, self).__init__(lang_filter=lang_filter)
self.probers = [
UTF8Prober(),
SJISProber(),
EUCJPProber(),

View File

@ -0,0 +1,572 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .enums import MachineState
# BIG5
BIG5_CLS = (
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
1,1,1,1,1,1,0,0, # 08 - 0f
1,1,1,1,1,1,1,1, # 10 - 17
1,1,1,0,1,1,1,1, # 18 - 1f
1,1,1,1,1,1,1,1, # 20 - 27
1,1,1,1,1,1,1,1, # 28 - 2f
1,1,1,1,1,1,1,1, # 30 - 37
1,1,1,1,1,1,1,1, # 38 - 3f
2,2,2,2,2,2,2,2, # 40 - 47
2,2,2,2,2,2,2,2, # 48 - 4f
2,2,2,2,2,2,2,2, # 50 - 57
2,2,2,2,2,2,2,2, # 58 - 5f
2,2,2,2,2,2,2,2, # 60 - 67
2,2,2,2,2,2,2,2, # 68 - 6f
2,2,2,2,2,2,2,2, # 70 - 77
2,2,2,2,2,2,2,1, # 78 - 7f
4,4,4,4,4,4,4,4, # 80 - 87
4,4,4,4,4,4,4,4, # 88 - 8f
4,4,4,4,4,4,4,4, # 90 - 97
4,4,4,4,4,4,4,4, # 98 - 9f
4,3,3,3,3,3,3,3, # a0 - a7
3,3,3,3,3,3,3,3, # a8 - af
3,3,3,3,3,3,3,3, # b0 - b7
3,3,3,3,3,3,3,3, # b8 - bf
3,3,3,3,3,3,3,3, # c0 - c7
3,3,3,3,3,3,3,3, # c8 - cf
3,3,3,3,3,3,3,3, # d0 - d7
3,3,3,3,3,3,3,3, # d8 - df
3,3,3,3,3,3,3,3, # e0 - e7
3,3,3,3,3,3,3,3, # e8 - ef
3,3,3,3,3,3,3,3, # f0 - f7
3,3,3,3,3,3,3,0 # f8 - ff
)
BIG5_ST = (
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17
)
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
BIG5_SM_MODEL = {'class_table': BIG5_CLS,
'class_factor': 5,
'state_table': BIG5_ST,
'char_len_table': BIG5_CHAR_LEN_TABLE,
'name': 'Big5'}
# CP949
CP949_CLS = (
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f
1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f
4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f
1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f
5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f
0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f
6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f
6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af
7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf
7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
)
CP949_ST = (
#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6
)
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
CP949_SM_MODEL = {'class_table': CP949_CLS,
'class_factor': 10,
'state_table': CP949_ST,
'char_len_table': CP949_CHAR_LEN_TABLE,
'name': 'CP949'}
# EUC-JP
EUCJP_CLS = (
4,4,4,4,4,4,4,4, # 00 - 07
4,4,4,4,4,4,5,5, # 08 - 0f
4,4,4,4,4,4,4,4, # 10 - 17
4,4,4,5,4,4,4,4, # 18 - 1f
4,4,4,4,4,4,4,4, # 20 - 27
4,4,4,4,4,4,4,4, # 28 - 2f
4,4,4,4,4,4,4,4, # 30 - 37
4,4,4,4,4,4,4,4, # 38 - 3f
4,4,4,4,4,4,4,4, # 40 - 47
4,4,4,4,4,4,4,4, # 48 - 4f
4,4,4,4,4,4,4,4, # 50 - 57
4,4,4,4,4,4,4,4, # 58 - 5f
4,4,4,4,4,4,4,4, # 60 - 67
4,4,4,4,4,4,4,4, # 68 - 6f
4,4,4,4,4,4,4,4, # 70 - 77
4,4,4,4,4,4,4,4, # 78 - 7f
5,5,5,5,5,5,5,5, # 80 - 87
5,5,5,5,5,5,1,3, # 88 - 8f
5,5,5,5,5,5,5,5, # 90 - 97
5,5,5,5,5,5,5,5, # 98 - 9f
5,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,2,2,2, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,2,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
0,0,0,0,0,0,0,0, # e0 - e7
0,0,0,0,0,0,0,0, # e8 - ef
0,0,0,0,0,0,0,0, # f0 - f7
0,0,0,0,0,0,0,5 # f8 - ff
)
EUCJP_ST = (
3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f
3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27
)
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
EUCJP_SM_MODEL = {'class_table': EUCJP_CLS,
'class_factor': 6,
'state_table': EUCJP_ST,
'char_len_table': EUCJP_CHAR_LEN_TABLE,
'name': 'EUC-JP'}
# EUC-KR
EUCKR_CLS = (
1,1,1,1,1,1,1,1, # 00 - 07
1,1,1,1,1,1,0,0, # 08 - 0f
1,1,1,1,1,1,1,1, # 10 - 17
1,1,1,0,1,1,1,1, # 18 - 1f
1,1,1,1,1,1,1,1, # 20 - 27
1,1,1,1,1,1,1,1, # 28 - 2f
1,1,1,1,1,1,1,1, # 30 - 37
1,1,1,1,1,1,1,1, # 38 - 3f
1,1,1,1,1,1,1,1, # 40 - 47
1,1,1,1,1,1,1,1, # 48 - 4f
1,1,1,1,1,1,1,1, # 50 - 57
1,1,1,1,1,1,1,1, # 58 - 5f
1,1,1,1,1,1,1,1, # 60 - 67
1,1,1,1,1,1,1,1, # 68 - 6f
1,1,1,1,1,1,1,1, # 70 - 77
1,1,1,1,1,1,1,1, # 78 - 7f
0,0,0,0,0,0,0,0, # 80 - 87
0,0,0,0,0,0,0,0, # 88 - 8f
0,0,0,0,0,0,0,0, # 90 - 97
0,0,0,0,0,0,0,0, # 98 - 9f
0,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,3,3,3, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,3,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
2,2,2,2,2,2,2,2, # e0 - e7
2,2,2,2,2,2,2,2, # e8 - ef
2,2,2,2,2,2,2,2, # f0 - f7
2,2,2,2,2,2,2,0 # f8 - ff
)
EUCKR_ST = (
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
)
EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
EUCKR_SM_MODEL = {'class_table': EUCKR_CLS,
'class_factor': 4,
'state_table': EUCKR_ST,
'char_len_table': EUCKR_CHAR_LEN_TABLE,
'name': 'EUC-KR'}
# EUC-TW
EUCTW_CLS = (
2,2,2,2,2,2,2,2, # 00 - 07
2,2,2,2,2,2,0,0, # 08 - 0f
2,2,2,2,2,2,2,2, # 10 - 17
2,2,2,0,2,2,2,2, # 18 - 1f
2,2,2,2,2,2,2,2, # 20 - 27
2,2,2,2,2,2,2,2, # 28 - 2f
2,2,2,2,2,2,2,2, # 30 - 37
2,2,2,2,2,2,2,2, # 38 - 3f
2,2,2,2,2,2,2,2, # 40 - 47
2,2,2,2,2,2,2,2, # 48 - 4f
2,2,2,2,2,2,2,2, # 50 - 57
2,2,2,2,2,2,2,2, # 58 - 5f
2,2,2,2,2,2,2,2, # 60 - 67
2,2,2,2,2,2,2,2, # 68 - 6f
2,2,2,2,2,2,2,2, # 70 - 77
2,2,2,2,2,2,2,2, # 78 - 7f
0,0,0,0,0,0,0,0, # 80 - 87
0,0,0,0,0,0,6,0, # 88 - 8f
0,0,0,0,0,0,0,0, # 90 - 97
0,0,0,0,0,0,0,0, # 98 - 9f
0,3,4,4,4,4,4,4, # a0 - a7
5,5,1,1,1,1,1,1, # a8 - af
1,1,1,1,1,1,1,1, # b0 - b7
1,1,1,1,1,1,1,1, # b8 - bf
1,1,3,1,3,3,3,3, # c0 - c7
3,3,3,3,3,3,3,3, # c8 - cf
3,3,3,3,3,3,3,3, # d0 - d7
3,3,3,3,3,3,3,3, # d8 - df
3,3,3,3,3,3,3,3, # e0 - e7
3,3,3,3,3,3,3,3, # e8 - ef
3,3,3,3,3,3,3,3, # f0 - f7
3,3,3,3,3,3,3,0 # f8 - ff
)
EUCTW_ST = (
MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17
MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27
MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
)
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
EUCTW_SM_MODEL = {'class_table': EUCTW_CLS,
'class_factor': 7,
'state_table': EUCTW_ST,
'char_len_table': EUCTW_CHAR_LEN_TABLE,
'name': 'x-euc-tw'}
# GB2312
GB2312_CLS = (
1,1,1,1,1,1,1,1, # 00 - 07
1,1,1,1,1,1,0,0, # 08 - 0f
1,1,1,1,1,1,1,1, # 10 - 17
1,1,1,0,1,1,1,1, # 18 - 1f
1,1,1,1,1,1,1,1, # 20 - 27
1,1,1,1,1,1,1,1, # 28 - 2f
3,3,3,3,3,3,3,3, # 30 - 37
3,3,1,1,1,1,1,1, # 38 - 3f
2,2,2,2,2,2,2,2, # 40 - 47
2,2,2,2,2,2,2,2, # 48 - 4f
2,2,2,2,2,2,2,2, # 50 - 57
2,2,2,2,2,2,2,2, # 58 - 5f
2,2,2,2,2,2,2,2, # 60 - 67
2,2,2,2,2,2,2,2, # 68 - 6f
2,2,2,2,2,2,2,2, # 70 - 77
2,2,2,2,2,2,2,4, # 78 - 7f
5,6,6,6,6,6,6,6, # 80 - 87
6,6,6,6,6,6,6,6, # 88 - 8f
6,6,6,6,6,6,6,6, # 90 - 97
6,6,6,6,6,6,6,6, # 98 - 9f
6,6,6,6,6,6,6,6, # a0 - a7
6,6,6,6,6,6,6,6, # a8 - af
6,6,6,6,6,6,6,6, # b0 - b7
6,6,6,6,6,6,6,6, # b8 - bf
6,6,6,6,6,6,6,6, # c0 - c7
6,6,6,6,6,6,6,6, # c8 - cf
6,6,6,6,6,6,6,6, # d0 - d7
6,6,6,6,6,6,6,6, # d8 - df
6,6,6,6,6,6,6,6, # e0 - e7
6,6,6,6,6,6,6,6, # e8 - ef
6,6,6,6,6,6,6,6, # f0 - f7
6,6,6,6,6,6,6,0 # f8 - ff
)
GB2312_ST = (
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17
4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
)
# To be accurate, the length of class 6 can be either 2 or 4.
# But it is not necessary to discriminate between the two since
# it is used for frequency analysis only, and we are validating
# each code range there as well. So it is safe to set it to be
# 2 here.
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
GB2312_SM_MODEL = {'class_table': GB2312_CLS,
'class_factor': 7,
'state_table': GB2312_ST,
'char_len_table': GB2312_CHAR_LEN_TABLE,
'name': 'GB2312'}
# Shift_JIS
SJIS_CLS = (
1,1,1,1,1,1,1,1, # 00 - 07
1,1,1,1,1,1,0,0, # 08 - 0f
1,1,1,1,1,1,1,1, # 10 - 17
1,1,1,0,1,1,1,1, # 18 - 1f
1,1,1,1,1,1,1,1, # 20 - 27
1,1,1,1,1,1,1,1, # 28 - 2f
1,1,1,1,1,1,1,1, # 30 - 37
1,1,1,1,1,1,1,1, # 38 - 3f
2,2,2,2,2,2,2,2, # 40 - 47
2,2,2,2,2,2,2,2, # 48 - 4f
2,2,2,2,2,2,2,2, # 50 - 57
2,2,2,2,2,2,2,2, # 58 - 5f
2,2,2,2,2,2,2,2, # 60 - 67
2,2,2,2,2,2,2,2, # 68 - 6f
2,2,2,2,2,2,2,2, # 70 - 77
2,2,2,2,2,2,2,1, # 78 - 7f
3,3,3,3,3,2,2,3, # 80 - 87
3,3,3,3,3,3,3,3, # 88 - 8f
3,3,3,3,3,3,3,3, # 90 - 97
3,3,3,3,3,3,3,3, # 98 - 9f
#0xa0 is illegal in sjis encoding, but some pages does
#contain such byte. We need to be more error forgiven.
2,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,2,2,2, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,2,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
3,3,3,3,3,3,3,3, # e0 - e7
3,3,3,3,3,4,4,4, # e8 - ef
3,3,3,3,3,3,3,3, # f0 - f7
3,3,3,3,3,0,0,0) # f8 - ff
SJIS_ST = (
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17
)
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
SJIS_SM_MODEL = {'class_table': SJIS_CLS,
'class_factor': 6,
'state_table': SJIS_ST,
'char_len_table': SJIS_CHAR_LEN_TABLE,
'name': 'Shift_JIS'}
# UCS2-BE
UCS2BE_CLS = (
0,0,0,0,0,0,0,0, # 00 - 07
0,0,1,0,0,2,0,0, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,3,0,0,0,0, # 18 - 1f
0,0,0,0,0,0,0,0, # 20 - 27
0,3,3,3,3,3,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
0,0,0,0,0,0,0,0, # 40 - 47
0,0,0,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,0,0,0,0,0, # 78 - 7f
0,0,0,0,0,0,0,0, # 80 - 87
0,0,0,0,0,0,0,0, # 88 - 8f
0,0,0,0,0,0,0,0, # 90 - 97
0,0,0,0,0,0,0,0, # 98 - 9f
0,0,0,0,0,0,0,0, # a0 - a7
0,0,0,0,0,0,0,0, # a8 - af
0,0,0,0,0,0,0,0, # b0 - b7
0,0,0,0,0,0,0,0, # b8 - bf
0,0,0,0,0,0,0,0, # c0 - c7
0,0,0,0,0,0,0,0, # c8 - cf
0,0,0,0,0,0,0,0, # d0 - d7
0,0,0,0,0,0,0,0, # d8 - df
0,0,0,0,0,0,0,0, # e0 - e7
0,0,0,0,0,0,0,0, # e8 - ef
0,0,0,0,0,0,0,0, # f0 - f7
0,0,0,0,0,0,4,5 # f8 - ff
)
UCS2BE_ST = (
5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17
6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f
6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27
5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f
6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37
)
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS,
'class_factor': 6,
'state_table': UCS2BE_ST,
'char_len_table': UCS2BE_CHAR_LEN_TABLE,
'name': 'UTF-16BE'}
# UCS2-LE
UCS2LE_CLS = (
0,0,0,0,0,0,0,0, # 00 - 07
0,0,1,0,0,2,0,0, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,3,0,0,0,0, # 18 - 1f
0,0,0,0,0,0,0,0, # 20 - 27
0,3,3,3,3,3,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
0,0,0,0,0,0,0,0, # 40 - 47
0,0,0,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,0,0,0,0,0, # 78 - 7f
0,0,0,0,0,0,0,0, # 80 - 87
0,0,0,0,0,0,0,0, # 88 - 8f
0,0,0,0,0,0,0,0, # 90 - 97
0,0,0,0,0,0,0,0, # 98 - 9f
0,0,0,0,0,0,0,0, # a0 - a7
0,0,0,0,0,0,0,0, # a8 - af
0,0,0,0,0,0,0,0, # b0 - b7
0,0,0,0,0,0,0,0, # b8 - bf
0,0,0,0,0,0,0,0, # c0 - c7
0,0,0,0,0,0,0,0, # c8 - cf
0,0,0,0,0,0,0,0, # d0 - d7
0,0,0,0,0,0,0,0, # d8 - df
0,0,0,0,0,0,0,0, # e0 - e7
0,0,0,0,0,0,0,0, # e8 - ef
0,0,0,0,0,0,0,0, # f0 - f7
0,0,0,0,0,0,4,5 # f8 - ff
)
UCS2LE_ST = (
6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f
7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27
5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37
)
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS,
'class_factor': 6,
'state_table': UCS2LE_ST,
'char_len_table': UCS2LE_CHAR_LEN_TABLE,
'name': 'UTF-16LE'}
# UTF-8
UTF8_CLS = (
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
1,1,1,1,1,1,0,0, # 08 - 0f
1,1,1,1,1,1,1,1, # 10 - 17
1,1,1,0,1,1,1,1, # 18 - 1f
1,1,1,1,1,1,1,1, # 20 - 27
1,1,1,1,1,1,1,1, # 28 - 2f
1,1,1,1,1,1,1,1, # 30 - 37
1,1,1,1,1,1,1,1, # 38 - 3f
1,1,1,1,1,1,1,1, # 40 - 47
1,1,1,1,1,1,1,1, # 48 - 4f
1,1,1,1,1,1,1,1, # 50 - 57
1,1,1,1,1,1,1,1, # 58 - 5f
1,1,1,1,1,1,1,1, # 60 - 67
1,1,1,1,1,1,1,1, # 68 - 6f
1,1,1,1,1,1,1,1, # 70 - 77
1,1,1,1,1,1,1,1, # 78 - 7f
2,2,2,2,3,3,3,3, # 80 - 87
4,4,4,4,4,4,4,4, # 88 - 8f
4,4,4,4,4,4,4,4, # 90 - 97
4,4,4,4,4,4,4,4, # 98 - 9f
5,5,5,5,5,5,5,5, # a0 - a7
5,5,5,5,5,5,5,5, # a8 - af
5,5,5,5,5,5,5,5, # b0 - b7
5,5,5,5,5,5,5,5, # b8 - bf
0,0,6,6,6,6,6,6, # c0 - c7
6,6,6,6,6,6,6,6, # c8 - cf
6,6,6,6,6,6,6,6, # d0 - d7
6,6,6,6,6,6,6,6, # d8 - df
7,8,8,8,8,8,8,8, # e0 - e7
8,8,8,8,8,9,8,8, # e8 - ef
10,11,11,11,11,11,11,11, # f0 - f7
12,13,13,13,14,15,0,0 # f8 - ff
)
UTF8_ST = (
MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07
9, 11, 8, 7, 6, 5, 4, 3,#08-0f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f
MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f
MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f
MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f
MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af
MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf
)
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
UTF8_SM_MODEL = {'class_table': UTF8_CLS,
'class_factor': 16,
'state_table': UTF8_ST,
'char_len_table': UTF8_CHAR_LEN_TABLE,
'name': 'UTF-8'}

View File

@ -0,0 +1,132 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .charsetprober import CharSetProber
from .enums import CharacterCategory, ProbingState, SequenceLikelihood
class SingleByteCharSetProber(CharSetProber):
SAMPLE_SIZE = 64
SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2
POSITIVE_SHORTCUT_THRESHOLD = 0.95
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
def __init__(self, model, reversed=False, name_prober=None):
super(SingleByteCharSetProber, self).__init__()
self._model = model
# TRUE if we need to reverse every pair in the model lookup
self._reversed = reversed
# Optional auxiliary prober for name decision
self._name_prober = name_prober
self._last_order = None
self._seq_counters = None
self._total_seqs = None
self._total_char = None
self._freq_char = None
self.reset()
def reset(self):
super(SingleByteCharSetProber, self).reset()
# char order of last character
self._last_order = 255
self._seq_counters = [0] * SequenceLikelihood.get_num_categories()
self._total_seqs = 0
self._total_char = 0
# characters that fall in our sampling range
self._freq_char = 0
@property
def charset_name(self):
if self._name_prober:
return self._name_prober.charset_name
else:
return self._model['charset_name']
@property
def language(self):
if self._name_prober:
return self._name_prober.language
else:
return self._model.get('language')
def feed(self, byte_str):
if not self._model['keep_english_letter']:
byte_str = self.filter_international_words(byte_str)
if not byte_str:
return self.state
char_to_order_map = self._model['char_to_order_map']
for i, c in enumerate(byte_str):
# XXX: Order is in range 1-64, so one would think we want 0-63 here,
# but that leads to 27 more test failures than before.
order = char_to_order_map[c]
# XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but
# CharacterCategory.SYMBOL is actually 253, so we use CONTROL
# to make it closer to the original intent. The only difference
# is whether or not we count digits and control characters for
# _total_char purposes.
if order < CharacterCategory.CONTROL:
self._total_char += 1
if order < self.SAMPLE_SIZE:
self._freq_char += 1
if self._last_order < self.SAMPLE_SIZE:
self._total_seqs += 1
if not self._reversed:
i = (self._last_order * self.SAMPLE_SIZE) + order
model = self._model['precedence_matrix'][i]
else: # reverse the order of the letters in the lookup
i = (order * self.SAMPLE_SIZE) + self._last_order
model = self._model['precedence_matrix'][i]
self._seq_counters[model] += 1
self._last_order = order
charset_name = self._model['charset_name']
if self.state == ProbingState.DETECTING:
if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
confidence = self.get_confidence()
if confidence > self.POSITIVE_SHORTCUT_THRESHOLD:
self.logger.debug('%s confidence = %s, we have a winner',
charset_name, confidence)
self._state = ProbingState.FOUND_IT
elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD:
self.logger.debug('%s confidence = %s, below negative '
'shortcut threshhold %s', charset_name,
confidence,
self.NEGATIVE_SHORTCUT_THRESHOLD)
self._state = ProbingState.NOT_ME
return self.state
def get_confidence(self):
r = 0.01
if self._total_seqs > 0:
r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
self._total_seqs / self._model['typical_positive_ratio'])
r = r * self._freq_char / self._total_char
if r >= 1.0:
r = 0.99
return r

View File

@ -33,16 +33,17 @@ from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
Ibm866Model, Ibm855Model)
from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
# from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
from .langthaimodel import TIS620ThaiModel
from .langhebrewmodel import Win1255HebrewModel
from .hebrewprober import HebrewProber
from .langturkishmodel import Latin5TurkishModel
class SBCSGroupProber(CharSetGroupProber):
def __init__(self):
CharSetGroupProber.__init__(self)
self._mProbers = [
super(SBCSGroupProber, self).__init__()
self.probers = [
SingleByteCharSetProber(Win1251CyrillicModel),
SingleByteCharSetProber(Koi8rModel),
SingleByteCharSetProber(Latin5CyrillicModel),
@ -53,17 +54,20 @@ class SBCSGroupProber(CharSetGroupProber):
SingleByteCharSetProber(Win1253GreekModel),
SingleByteCharSetProber(Latin5BulgarianModel),
SingleByteCharSetProber(Win1251BulgarianModel),
SingleByteCharSetProber(Latin2HungarianModel),
SingleByteCharSetProber(Win1250HungarianModel),
# TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
# after we retrain model.
# SingleByteCharSetProber(Latin2HungarianModel),
# SingleByteCharSetProber(Win1250HungarianModel),
SingleByteCharSetProber(TIS620ThaiModel),
SingleByteCharSetProber(Latin5TurkishModel),
]
hebrewProber = HebrewProber()
logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
False, hebrewProber)
visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
hebrewProber)
hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
self._mProbers.extend([hebrewProber, logicalHebrewProber,
visualHebrewProber])
hebrew_prober = HebrewProber()
logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
False, hebrew_prober)
visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
hebrew_prober)
hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
self.probers.extend([hebrew_prober, logical_hebrew_prober,
visual_hebrew_prober])
self.reset()

View File

@ -0,0 +1,92 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import SJISDistributionAnalysis
from .jpcntx import SJISContextAnalysis
from .mbcssm import SJIS_SM_MODEL
from .enums import ProbingState, MachineState
class SJISProber(MultiByteCharSetProber):
def __init__(self):
super(SJISProber, self).__init__()
self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
self.distribution_analyzer = SJISDistributionAnalysis()
self.context_analyzer = SJISContextAnalysis()
self.reset()
def reset(self):
super(SJISProber, self).reset()
self.context_analyzer.reset()
@property
def charset_name(self):
return self.context_analyzer.charset_name
@property
def language(self):
return "Japanese"
def feed(self, byte_str):
for i in range(len(byte_str)):
coding_state = self.coding_sm.next_state(byte_str[i])
if coding_state == MachineState.ERROR:
self.logger.debug('%s %s prober hit error at byte %s',
self.charset_name, self.language, i)
self._state = ProbingState.NOT_ME
break
elif coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
break
elif coding_state == MachineState.START:
char_len = self.coding_sm.get_current_charlen()
if i == 0:
self._last_char[1] = byte_str[0]
self.context_analyzer.feed(self._last_char[2 - char_len:],
char_len)
self.distribution_analyzer.feed(self._last_char, char_len)
else:
self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
- char_len], char_len)
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
char_len)
self._last_char[0] = byte_str[-1]
if self.state == ProbingState.DETECTING:
if (self.context_analyzer.got_enough_data() and
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
self._state = ProbingState.FOUND_IT
return self.state
def get_confidence(self):
context_conf = self.context_analyzer.get_confidence()
distrib_conf = self.distribution_analyzer.get_confidence()
return max(context_conf, distrib_conf)

View File

@ -0,0 +1,286 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
"""
Module containing the UniversalDetector detector class, which is the primary
class a user of ``chardet`` should use.
:author: Mark Pilgrim (initial port to Python)
:author: Shy Shalom (original C code)
:author: Dan Blanchard (major refactoring for 3.0)
:author: Ian Cordasco
"""
import codecs
import logging
import re
from .charsetgroupprober import CharSetGroupProber
from .enums import InputState, LanguageFilter, ProbingState
from .escprober import EscCharSetProber
from .latin1prober import Latin1Prober
from .mbcsgroupprober import MBCSGroupProber
from .sbcsgroupprober import SBCSGroupProber
class UniversalDetector(object):
"""
The ``UniversalDetector`` class underlies the ``chardet.detect`` function
and coordinates all of the different charset probers.
To get a ``dict`` containing an encoding and its confidence, you can simply
run:
.. code::
u = UniversalDetector()
u.feed(some_bytes)
u.close()
detected = u.result
"""
MINIMUM_THRESHOLD = 0.20
HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]')
ESC_DETECTOR = re.compile(b'(\033|~{)')
WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]')
ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252',
'iso-8859-2': 'Windows-1250',
'iso-8859-5': 'Windows-1251',
'iso-8859-6': 'Windows-1256',
'iso-8859-7': 'Windows-1253',
'iso-8859-8': 'Windows-1255',
'iso-8859-9': 'Windows-1254',
'iso-8859-13': 'Windows-1257'}
def __init__(self, lang_filter=LanguageFilter.ALL):
self._esc_charset_prober = None
self._charset_probers = []
self.result = None
self.done = None
self._got_data = None
self._input_state = None
self._last_char = None
self.lang_filter = lang_filter
self.logger = logging.getLogger(__name__)
self._has_win_bytes = None
self.reset()
def reset(self):
"""
Reset the UniversalDetector and all of its probers back to their
initial states. This is called by ``__init__``, so you only need to
call this directly in between analyses of different documents.
"""
self.result = {'encoding': None, 'confidence': 0.0, 'language': None}
self.done = False
self._got_data = False
self._has_win_bytes = False
self._input_state = InputState.PURE_ASCII
self._last_char = b''
if self._esc_charset_prober:
self._esc_charset_prober.reset()
for prober in self._charset_probers:
prober.reset()
def feed(self, byte_str):
"""
Takes a chunk of a document and feeds it through all of the relevant
charset probers.
After calling ``feed``, you can check the value of the ``done``
attribute to see if you need to continue feeding the
``UniversalDetector`` more data, or if it has made a prediction
(in the ``result`` attribute).
.. note::
You should always call ``close`` when you're done feeding in your
document if ``done`` is not already ``True``.
"""
if self.done:
return
if not len(byte_str):
return
if not isinstance(byte_str, bytearray):
byte_str = bytearray(byte_str)
# First check for known BOMs, since these are guaranteed to be correct
if not self._got_data:
# If the data starts with BOM, we know it is UTF
if byte_str.startswith(codecs.BOM_UTF8):
# EF BB BF UTF-8 with BOM
self.result = {'encoding': "UTF-8-SIG",
'confidence': 1.0,
'language': ''}
elif byte_str.startswith((codecs.BOM_UTF32_LE,
codecs.BOM_UTF32_BE)):
# FF FE 00 00 UTF-32, little-endian BOM
# 00 00 FE FF UTF-32, big-endian BOM
self.result = {'encoding': "UTF-32",
'confidence': 1.0,
'language': ''}
elif byte_str.startswith(b'\xFE\xFF\x00\x00'):
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
self.result = {'encoding': "X-ISO-10646-UCS-4-3412",
'confidence': 1.0,
'language': ''}
elif byte_str.startswith(b'\x00\x00\xFF\xFE'):
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
self.result = {'encoding': "X-ISO-10646-UCS-4-2143",
'confidence': 1.0,
'language': ''}
elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)):
# FF FE UTF-16, little endian BOM
# FE FF UTF-16, big endian BOM
self.result = {'encoding': "UTF-16",
'confidence': 1.0,
'language': ''}
self._got_data = True
if self.result['encoding'] is not None:
self.done = True
return
# If none of those matched and we've only see ASCII so far, check
# for high bytes and escape sequences
if self._input_state == InputState.PURE_ASCII:
if self.HIGH_BYTE_DETECTOR.search(byte_str):
self._input_state = InputState.HIGH_BYTE
elif self._input_state == InputState.PURE_ASCII and \
self.ESC_DETECTOR.search(self._last_char + byte_str):
self._input_state = InputState.ESC_ASCII
self._last_char = byte_str[-1:]
# If we've seen escape sequences, use the EscCharSetProber, which
# uses a simple state machine to check for known escape sequences in
# HZ and ISO-2022 encodings, since those are the only encodings that
# use such sequences.
if self._input_state == InputState.ESC_ASCII:
if not self._esc_charset_prober:
self._esc_charset_prober = EscCharSetProber(self.lang_filter)
if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT:
self.result = {'encoding':
self._esc_charset_prober.charset_name,
'confidence':
self._esc_charset_prober.get_confidence(),
'language':
self._esc_charset_prober.language}
self.done = True
# If we've seen high bytes (i.e., those with values greater than 127),
# we need to do more complicated checks using all our multi-byte and
# single-byte probers that are left. The single-byte probers
# use character bigram distributions to determine the encoding, whereas
# the multi-byte probers use a combination of character unigram and
# bigram distributions.
elif self._input_state == InputState.HIGH_BYTE:
if not self._charset_probers:
self._charset_probers = [MBCSGroupProber(self.lang_filter)]
# If we're checking non-CJK encodings, use single-byte prober
if self.lang_filter & LanguageFilter.NON_CJK:
self._charset_probers.append(SBCSGroupProber())
self._charset_probers.append(Latin1Prober())
for prober in self._charset_probers:
if prober.feed(byte_str) == ProbingState.FOUND_IT:
self.result = {'encoding': prober.charset_name,
'confidence': prober.get_confidence(),
'language': prober.language}
self.done = True
break
if self.WIN_BYTE_DETECTOR.search(byte_str):
self._has_win_bytes = True
def close(self):
"""
Stop analyzing the current document and come up with a final
prediction.
:returns: The ``result`` attribute, a ``dict`` with the keys
`encoding`, `confidence`, and `language`.
"""
# Don't bother with checks if we're already done
if self.done:
return self.result
self.done = True
if not self._got_data:
self.logger.debug('no data received!')
# Default to ASCII if it is all we've seen so far
elif self._input_state == InputState.PURE_ASCII:
self.result = {'encoding': 'ascii',
'confidence': 1.0,
'language': ''}
# If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD
elif self._input_state == InputState.HIGH_BYTE:
prober_confidence = None
max_prober_confidence = 0.0
max_prober = None
for prober in self._charset_probers:
if not prober:
continue
prober_confidence = prober.get_confidence()
if prober_confidence > max_prober_confidence:
max_prober_confidence = prober_confidence
max_prober = prober
if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD):
charset_name = max_prober.charset_name
lower_charset_name = max_prober.charset_name.lower()
confidence = max_prober.get_confidence()
# Use Windows encoding name instead of ISO-8859 if we saw any
# extra Windows-specific bytes
if lower_charset_name.startswith('iso-8859'):
if self._has_win_bytes:
charset_name = self.ISO_WIN_MAP.get(lower_charset_name,
charset_name)
self.result = {'encoding': charset_name,
'confidence': confidence,
'language': max_prober.language}
# Log all prober confidences if none met MINIMUM_THRESHOLD
if self.logger.getEffectiveLevel() == logging.DEBUG:
if self.result['encoding'] is None:
self.logger.debug('no probers hit minimum threshold')
for group_prober in self._charset_probers:
if not group_prober:
continue
if isinstance(group_prober, CharSetGroupProber):
for prober in group_prober.probers:
self.logger.debug('%s %s confidence = %s',
prober.charset_name,
prober.language,
prober.get_confidence())
else:
self.logger.debug('%s %s confidence = %s',
prober.charset_name,
prober.language,
prober.get_confidence())
return self.result

View File

@ -25,52 +25,58 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from . import constants
from .charsetprober import CharSetProber
from .enums import ProbingState, MachineState
from .codingstatemachine import CodingStateMachine
from .mbcssm import UTF8SMModel
from .mbcssm import UTF8_SM_MODEL
ONE_CHAR_PROB = 0.5
class UTF8Prober(CharSetProber):
ONE_CHAR_PROB = 0.5
def __init__(self):
CharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(UTF8SMModel)
super(UTF8Prober, self).__init__()
self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
self._num_mb_chars = None
self.reset()
def reset(self):
CharSetProber.reset(self)
self._mCodingSM.reset()
self._mNumOfMBChar = 0
super(UTF8Prober, self).reset()
self.coding_sm.reset()
self._num_mb_chars = 0
def get_charset_name(self):
@property
def charset_name(self):
return "utf-8"
def feed(self, aBuf):
for c in aBuf:
codingState = self._mCodingSM.next_state(c)
if codingState == constants.eError:
self._mState = constants.eNotMe
break
elif codingState == constants.eItsMe:
self._mState = constants.eFoundIt
break
elif codingState == constants.eStart:
if self._mCodingSM.get_current_charlen() >= 2:
self._mNumOfMBChar += 1
@property
def language(self):
return ""
if self.get_state() == constants.eDetecting:
if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
self._mState = constants.eFoundIt
def feed(self, byte_str):
for c in byte_str:
coding_state = self.coding_sm.next_state(c)
if coding_state == MachineState.ERROR:
self._state = ProbingState.NOT_ME
break
elif coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
break
elif coding_state == MachineState.START:
if self.coding_sm.get_current_charlen() >= 2:
self._num_mb_chars += 1
return self.get_state()
if self.state == ProbingState.DETECTING:
if self.get_confidence() > self.SHORTCUT_THRESHOLD:
self._state = ProbingState.FOUND_IT
return self.state
def get_confidence(self):
unlike = 0.99
if self._mNumOfMBChar < 6:
for i in range(0, self._mNumOfMBChar):
unlike = unlike * ONE_CHAR_PROB
if self._num_mb_chars < 6:
unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars
return 1.0 - unlike
else:
return unlike

View File

@ -0,0 +1,9 @@
"""
This module exists only to simplify retrieving the version number of chardet
from within setup.py and from chardet subpackages.
:author: Dan Blanchard (dan.blanchard@gmail.com)
"""
__version__ = "3.0.4"
VERSION = __version__.split('.')

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,171 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from collections import MutableMapping
try:
from collections import UserDict
except ImportError:
from UserDict import UserDict
try:
from collections import OrderedDict
except ImportError:
from ordereddict import OrderedDict
from io import open
import sys
try:
from thread import get_ident
except ImportError:
try:
from _thread import get_ident
except ImportError:
from _dummy_thread import get_ident
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3
str = type('str')
def from_none(exc):
"""raise from_none(ValueError('a')) == raise ValueError('a') from None"""
exc.__cause__ = None
exc.__suppress_context__ = True
return exc
# from reprlib 3.2.1
def recursive_repr(fillvalue='...'):
'Decorator to make a repr function return fillvalue for a recursive call'
def decorating_function(user_function):
repr_running = set()
def wrapper(self):
key = id(self), get_ident()
if key in repr_running:
return fillvalue
repr_running.add(key)
try:
result = user_function(self)
finally:
repr_running.discard(key)
return result
# Can't use functools.wraps() here because of bootstrap issues
wrapper.__module__ = getattr(user_function, '__module__')
wrapper.__doc__ = getattr(user_function, '__doc__')
wrapper.__name__ = getattr(user_function, '__name__')
wrapper.__annotations__ = getattr(user_function, '__annotations__', {})
return wrapper
return decorating_function
# from collections 3.2.1
class _ChainMap(MutableMapping):
''' A ChainMap groups multiple dicts (or other mappings) together
to create a single, updateable view.
The underlying mappings are stored in a list. That list is public and can
accessed or updated using the *maps* attribute. There is no other state.
Lookups search the underlying mappings successively until a key is found.
In contrast, writes, updates, and deletions only operate on the first
mapping.
'''
def __init__(self, *maps):
'''Initialize a ChainMap by setting *maps* to the given mappings.
If no mappings are provided, a single empty dictionary is used.
'''
self.maps = list(maps) or [{}] # always at least one map
def __missing__(self, key):
raise KeyError(key)
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define __missing__
def get(self, key, default=None):
return self[key] if key in self else default
def __len__(self):
return len(set().union(*self.maps)) # reuses stored hash values if possible
def __iter__(self):
return iter(set().union(*self.maps))
def __contains__(self, key):
return any(key in m for m in self.maps)
@recursive_repr()
def __repr__(self):
return '{0.__class__.__name__}({1})'.format(
self, ', '.join(map(repr, self.maps)))
@classmethod
def fromkeys(cls, iterable, *args):
'Create a ChainMap with a single dict created from the iterable.'
return cls(dict.fromkeys(iterable, *args))
def copy(self):
'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]'
return self.__class__(self.maps[0].copy(), *self.maps[1:])
__copy__ = copy
def new_child(self): # like Django's Context.push()
'New ChainMap with a new dict followed by all previous maps.'
return self.__class__({}, *self.maps)
@property
def parents(self): # like Django's Context.pop()
'New ChainMap from maps[1:].'
return self.__class__(*self.maps[1:])
def __setitem__(self, key, value):
self.maps[0][key] = value
def __delitem__(self, key):
try:
del self.maps[0][key]
except KeyError:
raise KeyError('Key not found in the first mapping: {!r}'.format(key))
def popitem(self):
'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.'
try:
return self.maps[0].popitem()
except KeyError:
raise KeyError('No keys found in the first mapping.')
def pop(self, key, *args):
'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].'
try:
return self.maps[0].pop(key, *args)
except KeyError:
raise KeyError('Key not found in the first mapping: {!r}'.format(key))
def clear(self):
'Clear maps[0], leaving maps[1:] intact.'
self.maps[0].clear()
try:
from collections import ChainMap
except ImportError:
ChainMap = _ChainMap

View File

@ -0,0 +1,2 @@
from .package_data import __version__
from .core import *

View File

@ -0,0 +1,118 @@
from .core import encode, decode, alabel, ulabel, IDNAError
import codecs
import re
_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
class Codec(codecs.Codec):
def encode(self, data, errors='strict'):
if errors != 'strict':
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
if not data:
return "", 0
return encode(data), len(data)
def decode(self, data, errors='strict'):
if errors != 'strict':
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
if not data:
return u"", 0
return decode(data), len(data)
class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
def _buffer_encode(self, data, errors, final):
if errors != 'strict':
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
if not data:
return ("", 0)
labels = _unicode_dots_re.split(data)
trailing_dot = u''
if labels:
if not labels[-1]:
trailing_dot = '.'
del labels[-1]
elif not final:
# Keep potentially unfinished label until the next call
del labels[-1]
if labels:
trailing_dot = '.'
result = []
size = 0
for label in labels:
result.append(alabel(label))
if size:
size += 1
size += len(label)
# Join with U+002E
result = ".".join(result) + trailing_dot
size += len(trailing_dot)
return (result, size)
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def _buffer_decode(self, data, errors, final):
if errors != 'strict':
raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
if not data:
return (u"", 0)
# IDNA allows decoding to operate on Unicode strings, too.
if isinstance(data, unicode):
labels = _unicode_dots_re.split(data)
else:
# Must be ASCII string
data = str(data)
unicode(data, "ascii")
labels = data.split(".")
trailing_dot = u''
if labels:
if not labels[-1]:
trailing_dot = u'.'
del labels[-1]
elif not final:
# Keep potentially unfinished label until the next call
del labels[-1]
if labels:
trailing_dot = u'.'
result = []
size = 0
for label in labels:
result.append(ulabel(label))
if size:
size += 1
size += len(label)
result = u".".join(result) + trailing_dot
size += len(trailing_dot)
return (result, size)
class StreamWriter(Codec, codecs.StreamWriter):
pass
class StreamReader(Codec, codecs.StreamReader):
pass
def getregentry():
return codecs.CodecInfo(
name='idna',
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
)

View File

@ -0,0 +1,12 @@
from .core import *
from .codec import *
def ToASCII(label):
return encode(label)
def ToUnicode(label):
return decode(label)
def nameprep(s):
raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol")

View File

@ -0,0 +1,387 @@
from . import idnadata
import bisect
import unicodedata
import re
import sys
from .intranges import intranges_contain
_virama_combining_class = 9
_alabel_prefix = b'xn--'
_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
if sys.version_info[0] == 3:
unicode = str
unichr = chr
class IDNAError(UnicodeError):
""" Base exception for all IDNA-encoding related problems """
pass
class IDNABidiError(IDNAError):
""" Exception when bidirectional requirements are not satisfied """
pass
class InvalidCodepoint(IDNAError):
""" Exception when a disallowed or unallocated codepoint is used """
pass
class InvalidCodepointContext(IDNAError):
""" Exception when the codepoint is not valid in the context it is used """
pass
def _combining_class(cp):
return unicodedata.combining(unichr(cp))
def _is_script(cp, script):
return intranges_contain(ord(cp), idnadata.scripts[script])
def _punycode(s):
return s.encode('punycode')
def _unot(s):
return 'U+{0:04X}'.format(s)
def valid_label_length(label):
if len(label) > 63:
return False
return True
def valid_string_length(label, trailing_dot):
if len(label) > (254 if trailing_dot else 253):
return False
return True
def check_bidi(label, check_ltr=False):
# Bidi rules should only be applied if string contains RTL characters
bidi_label = False
for (idx, cp) in enumerate(label, 1):
direction = unicodedata.bidirectional(cp)
if direction == '':
# String likely comes from a newer version of Unicode
raise IDNABidiError('Unknown directionality in label {0} at position {1}'.format(repr(label), idx))
if direction in ['R', 'AL', 'AN']:
bidi_label = True
break
if not bidi_label and not check_ltr:
return True
# Bidi rule 1
direction = unicodedata.bidirectional(label[0])
if direction in ['R', 'AL']:
rtl = True
elif direction == 'L':
rtl = False
else:
raise IDNABidiError('First codepoint in label {0} must be directionality L, R or AL'.format(repr(label)))
valid_ending = False
number_type = False
for (idx, cp) in enumerate(label, 1):
direction = unicodedata.bidirectional(cp)
if rtl:
# Bidi rule 2
if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
raise IDNABidiError('Invalid direction for codepoint at position {0} in a right-to-left label'.format(idx))
# Bidi rule 3
if direction in ['R', 'AL', 'EN', 'AN']:
valid_ending = True
elif direction != 'NSM':
valid_ending = False
# Bidi rule 4
if direction in ['AN', 'EN']:
if not number_type:
number_type = direction
else:
if number_type != direction:
raise IDNABidiError('Can not mix numeral types in a right-to-left label')
else:
# Bidi rule 5
if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
raise IDNABidiError('Invalid direction for codepoint at position {0} in a left-to-right label'.format(idx))
# Bidi rule 6
if direction in ['L', 'EN']:
valid_ending = True
elif direction != 'NSM':
valid_ending = False
if not valid_ending:
raise IDNABidiError('Label ends with illegal codepoint directionality')
return True
def check_initial_combiner(label):
if unicodedata.category(label[0])[0] == 'M':
raise IDNAError('Label begins with an illegal combining character')
return True
def check_hyphen_ok(label):
if label[2:4] == '--':
raise IDNAError('Label has disallowed hyphens in 3rd and 4th position')
if label[0] == '-' or label[-1] == '-':
raise IDNAError('Label must not start or end with a hyphen')
return True
def check_nfc(label):
if unicodedata.normalize('NFC', label) != label:
raise IDNAError('Label must be in Normalization Form C')
def valid_contextj(label, pos):
cp_value = ord(label[pos])
if cp_value == 0x200c:
if pos > 0:
if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
return True
ok = False
for i in range(pos-1, -1, -1):
joining_type = idnadata.joining_types.get(ord(label[i]))
if joining_type == ord('T'):
continue
if joining_type in [ord('L'), ord('D')]:
ok = True
break
if not ok:
return False
ok = False
for i in range(pos+1, len(label)):
joining_type = idnadata.joining_types.get(ord(label[i]))
if joining_type == ord('T'):
continue
if joining_type in [ord('R'), ord('D')]:
ok = True
break
return ok
if cp_value == 0x200d:
if pos > 0:
if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
return True
return False
else:
return False
def valid_contexto(label, pos, exception=False):
cp_value = ord(label[pos])
if cp_value == 0x00b7:
if 0 < pos < len(label)-1:
if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c:
return True
return False
elif cp_value == 0x0375:
if pos < len(label)-1 and len(label) > 1:
return _is_script(label[pos + 1], 'Greek')
return False
elif cp_value == 0x05f3 or cp_value == 0x05f4:
if pos > 0:
return _is_script(label[pos - 1], 'Hebrew')
return False
elif cp_value == 0x30fb:
for cp in label:
if cp == u'\u30fb':
continue
if _is_script(cp, 'Hiragana') or _is_script(cp, 'Katakana') or _is_script(cp, 'Han'):
return True
return False
elif 0x660 <= cp_value <= 0x669:
for cp in label:
if 0x6f0 <= ord(cp) <= 0x06f9:
return False
return True
elif 0x6f0 <= cp_value <= 0x6f9:
for cp in label:
if 0x660 <= ord(cp) <= 0x0669:
return False
return True
def check_label(label):
if isinstance(label, (bytes, bytearray)):
label = label.decode('utf-8')
if len(label) == 0:
raise IDNAError('Empty Label')
check_nfc(label)
check_hyphen_ok(label)
check_initial_combiner(label)
for (pos, cp) in enumerate(label):
cp_value = ord(cp)
if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']):
continue
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']):
if not valid_contextj(label, pos):
raise InvalidCodepointContext('Joiner {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))
elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']):
if not valid_contexto(label, pos):
raise InvalidCodepointContext('Codepoint {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))
else:
raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
check_bidi(label)
def alabel(label):
try:
label = label.encode('ascii')
try:
ulabel(label)
except IDNAError:
raise IDNAError('The label {0} is not a valid A-label'.format(label))
if not valid_label_length(label):
raise IDNAError('Label too long')
return label
except UnicodeEncodeError:
pass
if not label:
raise IDNAError('No Input')
label = unicode(label)
check_label(label)
label = _punycode(label)
label = _alabel_prefix + label
if not valid_label_length(label):
raise IDNAError('Label too long')
return label
def ulabel(label):
if not isinstance(label, (bytes, bytearray)):
try:
label = label.encode('ascii')
except UnicodeEncodeError:
check_label(label)
return label
label = label.lower()
if label.startswith(_alabel_prefix):
label = label[len(_alabel_prefix):]
else:
check_label(label)
return label.decode('ascii')
label = label.decode('punycode')
check_label(label)
return label
def uts46_remap(domain, std3_rules=True, transitional=False):
"""Re-map the characters in the string according to UTS46 processing."""
from .uts46data import uts46data
output = u""
try:
for pos, char in enumerate(domain):
code_point = ord(char)
uts46row = uts46data[code_point if code_point < 256 else
bisect.bisect_left(uts46data, (code_point, "Z")) - 1]
status = uts46row[1]
replacement = uts46row[2] if len(uts46row) == 3 else None
if (status == "V" or
(status == "D" and not transitional) or
(status == "3" and std3_rules and replacement is None)):
output += char
elif replacement is not None and (status == "M" or
(status == "3" and std3_rules) or
(status == "D" and transitional)):
output += replacement
elif status != "I":
raise IndexError()
return unicodedata.normalize("NFC", output)
except IndexError:
raise InvalidCodepoint(
"Codepoint {0} not allowed at position {1} in {2}".format(
_unot(code_point), pos + 1, repr(domain)))
def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False):
if isinstance(s, (bytes, bytearray)):
s = s.decode("ascii")
if uts46:
s = uts46_remap(s, std3_rules, transitional)
trailing_dot = False
result = []
if strict:
labels = s.split('.')
else:
labels = _unicode_dots_re.split(s)
while labels and not labels[0]:
del labels[0]
if not labels:
raise IDNAError('Empty domain')
if labels[-1] == '':
del labels[-1]
trailing_dot = True
for label in labels:
result.append(alabel(label))
if trailing_dot:
result.append(b'')
s = b'.'.join(result)
if not valid_string_length(s, trailing_dot):
raise IDNAError('Domain too long')
return s
def decode(s, strict=False, uts46=False, std3_rules=False):
if isinstance(s, (bytes, bytearray)):
s = s.decode("ascii")
if uts46:
s = uts46_remap(s, std3_rules, False)
trailing_dot = False
result = []
if not strict:
labels = _unicode_dots_re.split(s)
else:
labels = s.split(u'.')
while labels and not labels[0]:
del labels[0]
if not labels:
raise IDNAError('Empty domain')
if not labels[-1]:
del labels[-1]
trailing_dot = True
for label in labels:
result.append(ulabel(label))
if trailing_dot:
result.append(u'')
return u'.'.join(result)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,53 @@
"""
Given a list of integers, made up of (hopefully) a small number of long runs
of consecutive integers, compute a representation of the form
((start1, end1), (start2, end2) ...). Then answer the question "was x present
in the original list?" in time O(log(# runs)).
"""
import bisect
def intranges_from_list(list_):
"""Represent a list of integers as a sequence of ranges:
((start_0, end_0), (start_1, end_1), ...), such that the original
integers are exactly those x such that start_i <= x < end_i for some i.
Ranges are encoded as single integers (start << 32 | end), not as tuples.
"""
sorted_list = sorted(list_)
ranges = []
last_write = -1
for i in range(len(sorted_list)):
if i+1 < len(sorted_list):
if sorted_list[i] == sorted_list[i+1]-1:
continue
current_range = sorted_list[last_write+1:i+1]
ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
last_write = i
return tuple(ranges)
def _encode_range(start, end):
return (start << 32) | end
def _decode_range(r):
return (r >> 32), (r & ((1 << 32) - 1))
def intranges_contain(int_, ranges):
"""Determine if `int_` falls into one of the ranges in `ranges`."""
tuple_ = _encode_range(int_, 0)
pos = bisect.bisect_left(ranges, tuple_)
# we could be immediately ahead of a tuple (start, end)
# with start < int_ <= end
if pos > 0:
left, right = _decode_range(ranges[pos-1])
if left <= int_ < right:
return True
# or we could be immediately behind a tuple (int_, end)
if pos < len(ranges):
left, _ = _decode_range(ranges[pos])
if left == int_:
return True
return False

View File

@ -0,0 +1,2 @@
__version__ = '2.6'

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More