Press n or j to go to the next uncovered block, b, p or k for the previous block.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | 6x 6x 1x 34x 34x 1x 33x 6x 1x 34x 34x 1x 1x 33x 68x 68x 68x 68x 68x 68x 68x 68x 68x 30744x 30744x 29384x 1360x 68x 68x 68x 2x 66x 66x 68x 6x 1x 15406x 6x 1x 15406x | import { Context, Recogniser } from '.'; const match = require('../match').default; /** * This class matches UTF-16 and UTF-32, both big- and little-endian. The * BOM will be used if it is present. */ export class UTF_16BE implements Recogniser { name() { return 'UTF-16BE'; } match(det: Context) { var input = det.fRawInput; if ( input.length >= 2 && (input[0] & 0xff) == 0xfe && (input[1] & 0xff) == 0xff ) { return match(det, this, 100); // confidence = 100 } // TODO: Do some statistics to check for unsigned UTF-16BE return null; } } export class UTF_16LE implements Recogniser { name() { return 'UTF-16LE'; } match(det: Context) { var input = det.fRawInput; if ( input.length >= 2 && (input[0] & 0xff) == 0xff && (input[1] & 0xff) == 0xfe ) { // LE BOM is present. Iif (input.length >= 4 && input[2] == 0x00 && input[3] == 0x00) { // It is probably UTF-32 LE, not UTF-16 return null; } return match(det, this, 100); // confidence = 100 } // TODO: Do some statistics to check for unsigned UTF-16LE return null; } } interface WithGetChar { getChar(input: Uint8Array, index: number): number; } class UTF_32 implements Recogniser, WithGetChar { name() { return 'UTF-32'; } getChar(input: Uint8Array, index: number): number { return -1; } match(det: Context) { var input = det.fRawInput, limit = (det.fRawLength / 4) * 4, numValid = 0, numInvalid = 0, hasBOM = false, confidence = 0; Iif (limit == 0) { return null; } Iif (this.getChar(input, 0) == 0x0000feff) { hasBOM = true; } for (var i = 0; i < limit; i += 4) { var ch = this.getChar(input, i); if (ch < 0 || ch >= 0x10ffff || (ch >= 0xd800 && ch <= 0xdfff)) { numInvalid += 1; } else { numValid += 1; } } // Cook up some sort of confidence score, based on presence of a BOM // and the existence of valid and/or invalid multi-byte sequences. Iif (hasBOM && numInvalid == 0) { confidence = 100; } else Iif (hasBOM && numValid > numInvalid * 10) { confidence = 80; } else if (numValid > 3 && numInvalid == 0) { confidence = 100; } else Iif (numValid > 0 && numInvalid == 0) { confidence = 80; } else Iif (numValid > numInvalid * 10) { // Probably corrupt UTF-32BE data. Valid sequences aren't likely by chance. confidence = 25; } // return confidence == 0 ? null : new CharsetMatch(det, this, confidence); return confidence == 0 ? null : match(det, this, confidence); } } export class UTF_32BE extends UTF_32 { name() { return 'UTF-32BE'; } getChar(input: Uint8Array, index: number) { return ( ((input[index + 0] & 0xff) << 24) | ((input[index + 1] & 0xff) << 16) | ((input[index + 2] & 0xff) << 8) | (input[index + 3] & 0xff) ); } } export class UTF_32LE extends UTF_32 { name() { return 'UTF-32LE'; } getChar(input: Uint8Array, index: number) { return ( ((input[index + 3] & 0xff) << 24) | ((input[index + 2] & 0xff) << 16) | ((input[index + 1] & 0xff) << 8) | (input[index + 0] & 0xff) ); } } |