encoding iso2022.ts

92.5% Statements 37/40
81.25% Branches 13/16
88.89% Functions 8/9
96.97% Lines 32/33
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133  
 
6x
 
 
 
 
 
 
 
 
18x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102x
102x
102x
 
 
 
102x
102x
 
102x
184194x
156x
 
 
 
 
952x
 
952x
 
952x
1464x
 
52x
52x
52x
 
 
104x
 
 
 
184142x
 
 
102x
 
 
 
 
 
 
 
3x
 
 
 
 
3x
 
3x
 
 
 
6x
 
1x
 
6x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6x
 
1x
 
6x
 
 
6x
 
1x
 
6x
 
 
 
 
 
 
 
 
 
 
 
 
 
  import { Context, Recogniser } from '.';
 
var match = require('../match').default;
 
/**
 * This is a superclass for the individual detectors for
 * each of the detectable members of the ISO 2022 family
 * of encodings.
 */
 
class ISO_2022 implements Recogniser {
  escapeSequences: number[][] = [];
 
  name() {
    return 'ISO_2022';
  }
 
  match(det: Context) {
    /**
     * Matching function shared among the 2022 detectors JP, CN and KR
     * Counts up the number of legal an unrecognized escape sequences in
     * the sample of text, and computes a score based on the total number &
     * the proportion that fit the encoding.
     *
     *
     * @param text the byte buffer containing text to analyse
     * @param textLen  the size of the text in the byte.
     * @param escapeSequences the byte escape sequences to test for.
     * @return match quality, in the range of 0-100.
     */
 
    var i, j;
    var escN;
    var hits = 0;
    var misses = 0;
    var shifts = 0;
    var quality;
 
    // TODO: refactor me
    var text = det.fInputBytes;
    var textLen = det.fInputLen;
 
    scanInput: for (i = 0; i < textLen; i++) {
      if (text[i] == 0x1b) {
        checkEscapes: for (
          escN = 0;
          escN < this.escapeSequences.length;
          escN++
        ) {
          var seq = this.escapeSequences[escN];
 
          Iif (textLen - i < seq.length) continue checkEscapes;
 
          for (j = 1; j < seq.length; j++)
            if (seq[j] != text[i + j]) continue checkEscapes;
 
          hits++;
          i += seq.length - 1;
          continue scanInput;
        }
 
        misses++;
      }
 
      // Shift in/out
      if (text[i] == 0x0e || text[i] == 0x0f) shifts++;
    }
 
    if (hits == 0) return null;
 
    //
    // Initial quality is based on relative proportion of recognized vs.
    //   unrecognized escape sequences.
    //   All good:  quality = 100;
    //   half or less good: quality = 0;
    //   linear in between.
    quality = (100 * hits - 100 * misses) / (hits + misses);
 
    // Back off quality if there were too few escape sequences seen.
    //   Include shifts in this computation, so that KR does not get penalized
    //   for having only a single Escape sequence, but many shifts.
    Iif (hits + shifts < 5) quality -= (5 - (hits + shifts)) * 10;
 
    return quality <= 0 ? null : match(det, this, quality);
  }
}
 
export class ISO_2022_JP extends ISO_2022 {
  name() {
    return 'ISO-2022-JP';
  }
  escapeSequences = [
    [0x1b, 0x24, 0x28, 0x43], // KS X 1001:1992
    [0x1b, 0x24, 0x28, 0x44], // JIS X 212-1990
    [0x1b, 0x24, 0x40], // JIS C 6226-1978
    [0x1b, 0x24, 0x41], // GB 2312-80
    [0x1b, 0x24, 0x42], // JIS X 208-1983
    [0x1b, 0x26, 0x40], // JIS X 208 1990, 1997
    [0x1b, 0x28, 0x42], // ASCII
    [0x1b, 0x28, 0x48], // JIS-Roman
    [0x1b, 0x28, 0x49], // Half-width katakana
    [0x1b, 0x28, 0x4a], // JIS-Roman
    [0x1b, 0x2e, 0x41], // ISO 8859-1
    [0x1b, 0x2e, 0x46], // ISO 8859-7
  ];
}
 
export class ISO_2022_KR extends ISO_2022 {
  name() {
    return 'ISO-2022-KR';
  }
  escapeSequences = [[0x1b, 0x24, 0x29, 0x43]];
}
 
export class ISO_2022_CN extends ISO_2022 {
  name() {
    return 'ISO-2022-CN';
  }
  escapeSequences = [
    [0x1b, 0x24, 0x29, 0x41], // GB 2312-80
    [0x1b, 0x24, 0x29, 0x47], // CNS 11643-1992 Plane 1
    [0x1b, 0x24, 0x2a, 0x48], // CNS 11643-1992 Plane 2
    [0x1b, 0x24, 0x29, 0x45], // ISO-IR-165
    [0x1b, 0x24, 0x2b, 0x49], // CNS 11643-1992 Plane 3
    [0x1b, 0x24, 0x2b, 0x4a], // CNS 11643-1992 Plane 4
    [0x1b, 0x24, 0x2b, 0x4b], // CNS 11643-1992 Plane 5
    [0x1b, 0x24, 0x2b, 0x4c], // CNS 11643-1992 Plane 6
    [0x1b, 0x24, 0x2b, 0x4d], // CNS 11643-1992 Plane 7
    [0x1b, 0x4e], // SS2
    [0x1b, 0x4f], // SS3
  ];
}