Press n or j to go to the next uncovered block, b, p or k for the previous block.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | 6x 18x 102x 102x 102x 102x 102x 102x 184194x 156x 952x 952x 952x 1464x 52x 52x 52x 104x 184142x 102x 3x 3x 3x 6x 1x 6x 6x 1x 6x 6x 1x 6x | import { Context, Recogniser } from '.'; var match = require('../match').default; /** * This is a superclass for the individual detectors for * each of the detectable members of the ISO 2022 family * of encodings. */ class ISO_2022 implements Recogniser { escapeSequences: number[][] = []; name() { return 'ISO_2022'; } match(det: Context) { /** * Matching function shared among the 2022 detectors JP, CN and KR * Counts up the number of legal an unrecognized escape sequences in * the sample of text, and computes a score based on the total number & * the proportion that fit the encoding. * * * @param text the byte buffer containing text to analyse * @param textLen the size of the text in the byte. * @param escapeSequences the byte escape sequences to test for. * @return match quality, in the range of 0-100. */ var i, j; var escN; var hits = 0; var misses = 0; var shifts = 0; var quality; // TODO: refactor me var text = det.fInputBytes; var textLen = det.fInputLen; scanInput: for (i = 0; i < textLen; i++) { if (text[i] == 0x1b) { checkEscapes: for ( escN = 0; escN < this.escapeSequences.length; escN++ ) { var seq = this.escapeSequences[escN]; Iif (textLen - i < seq.length) continue checkEscapes; for (j = 1; j < seq.length; j++) if (seq[j] != text[i + j]) continue checkEscapes; hits++; i += seq.length - 1; continue scanInput; } misses++; } // Shift in/out if (text[i] == 0x0e || text[i] == 0x0f) shifts++; } if (hits == 0) return null; // // Initial quality is based on relative proportion of recognized vs. // unrecognized escape sequences. // All good: quality = 100; // half or less good: quality = 0; // linear in between. quality = (100 * hits - 100 * misses) / (hits + misses); // Back off quality if there were too few escape sequences seen. // Include shifts in this computation, so that KR does not get penalized // for having only a single Escape sequence, but many shifts. Iif (hits + shifts < 5) quality -= (5 - (hits + shifts)) * 10; return quality <= 0 ? null : match(det, this, quality); } } export class ISO_2022_JP extends ISO_2022 { name() { return 'ISO-2022-JP'; } escapeSequences = [ [0x1b, 0x24, 0x28, 0x43], // KS X 1001:1992 [0x1b, 0x24, 0x28, 0x44], // JIS X 212-1990 [0x1b, 0x24, 0x40], // JIS C 6226-1978 [0x1b, 0x24, 0x41], // GB 2312-80 [0x1b, 0x24, 0x42], // JIS X 208-1983 [0x1b, 0x26, 0x40], // JIS X 208 1990, 1997 [0x1b, 0x28, 0x42], // ASCII [0x1b, 0x28, 0x48], // JIS-Roman [0x1b, 0x28, 0x49], // Half-width katakana [0x1b, 0x28, 0x4a], // JIS-Roman [0x1b, 0x2e, 0x41], // ISO 8859-1 [0x1b, 0x2e, 0x46], // ISO 8859-7 ]; } export class ISO_2022_KR extends ISO_2022 { name() { return 'ISO-2022-KR'; } escapeSequences = [[0x1b, 0x24, 0x29, 0x43]]; } export class ISO_2022_CN extends ISO_2022 { name() { return 'ISO-2022-CN'; } escapeSequences = [ [0x1b, 0x24, 0x29, 0x41], // GB 2312-80 [0x1b, 0x24, 0x29, 0x47], // CNS 11643-1992 Plane 1 [0x1b, 0x24, 0x2a, 0x48], // CNS 11643-1992 Plane 2 [0x1b, 0x24, 0x29, 0x45], // ISO-IR-165 [0x1b, 0x24, 0x2b, 0x49], // CNS 11643-1992 Plane 3 [0x1b, 0x24, 0x2b, 0x4a], // CNS 11643-1992 Plane 4 [0x1b, 0x24, 0x2b, 0x4b], // CNS 11643-1992 Plane 5 [0x1b, 0x24, 0x2b, 0x4c], // CNS 11643-1992 Plane 6 [0x1b, 0x24, 0x2b, 0x4d], // CNS 11643-1992 Plane 7 [0x1b, 0x4e], // SS2 [0x1b, 0x4f], // SS3 ]; } |