TextDecoder.js 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. import Stream, { DEFAULT_ENCODING, getEncoding } from './text_decoder_index.js'
  2. import { end_of_stream, finished, codePointsToString } from './text_decoder_utils.js'
  3. import { decoders } from './table.js'
  4. // 8.1 Interface TextDecoder
  5. class TextDecoder {
  6. /**
  7. * @param {string=} label The label of the encoding; defaults to 'utf-8'.
  8. * @param {Object=} options
  9. */
  10. constructor(label = DEFAULT_ENCODING, options = {}) {
  11. // A TextDecoder object has an associated encoding, decoder,
  12. // stream, ignore BOM flag (initially unset), BOM seen flag
  13. // (initially unset), error mode (initially replacement), and do
  14. // not flush flag (initially unset).
  15. /** @private */
  16. this._encoding = null
  17. /** @private @type {?Decoder} */
  18. this._decoder = null
  19. /** @private @type {boolean} */
  20. this._ignoreBOM = false
  21. /** @private @type {boolean} */
  22. this._BOMseen = false
  23. /** @private @type {string} */
  24. this._error_mode = 'replacement'
  25. /** @private @type {boolean} */
  26. this._do_not_flush = false
  27. // 1. Let encoding be the result of getting an encoding from
  28. // label.
  29. const encoding = getEncoding(label)
  30. // 2. If encoding is failure or replacement, throw a RangeError.
  31. if (encoding === null || encoding.name == 'replacement')
  32. throw RangeError('Unknown encoding: ' + label)
  33. if (!decoders[encoding.name]) {
  34. throw Error('Decoder not present.' +
  35. ' Did you forget to include encoding-indexes.js first?')
  36. }
  37. // 4. Set dec's encoding to encoding.
  38. this._encoding = encoding
  39. // 5. If options's fatal member is true, set dec's error mode to
  40. // fatal.
  41. if (options['fatal'])
  42. this._error_mode = 'fatal'
  43. // 6. If options's ignoreBOM member is true, set dec's ignore BOM
  44. // flag.
  45. if (options['ignoreBOM'])
  46. this._ignoreBOM = true
  47. }
  48. get encoding() {
  49. return this._encoding.name.toLowerCase()
  50. }
  51. get fatal() {
  52. return this._error_mode === 'fatal'
  53. }
  54. get ignoreBOM() {
  55. return this._ignoreBOM
  56. }
  57. /**
  58. * @param {BufferSource=} input The buffer of bytes to decode.
  59. * @param {Object=} options
  60. * @return The decoded string.
  61. */
  62. decode(input, options = {}) {
  63. let bytes
  64. if (typeof input === 'object' && input instanceof ArrayBuffer) {
  65. bytes = new Uint8Array(input)
  66. } else if (typeof input === 'object' && 'buffer' in input &&
  67. input.buffer instanceof ArrayBuffer) {
  68. bytes = new Uint8Array(input.buffer,
  69. input.byteOffset,
  70. input.byteLength)
  71. } else {
  72. bytes = new Uint8Array(0)
  73. }
  74. // 1. If the do not flush flag is unset, set decoder to a new
  75. // encoding's decoder, set stream to a new stream, and unset the
  76. // BOM seen flag.
  77. if (!this._do_not_flush) {
  78. this._decoder = decoders[this._encoding.name]({
  79. fatal: this._error_mode === 'fatal' })
  80. this._BOMseen = false
  81. }
  82. // 2. If options's stream is true, set the do not flush flag, and
  83. // unset the do not flush flag otherwise.
  84. this._do_not_flush = Boolean(options['stream'])
  85. // 3. If input is given, push a copy of input to stream.
  86. // TODO: Align with spec algorithm - maintain stream on instance.
  87. const input_stream = new Stream(bytes)
  88. // 4. Let output be a new stream.
  89. const output = []
  90. /** @type {?(number|!Array.<number>)} */
  91. let result
  92. // 5. While true:
  93. while (true) {
  94. // 1. Let token be the result of reading from stream.
  95. const token = input_stream.read()
  96. // 2. If token is end-of-stream and the do not flush flag is
  97. // set, return output, serialized.
  98. // TODO: Align with spec algorithm.
  99. if (token === end_of_stream)
  100. break
  101. // 3. Otherwise, run these subsubsteps:
  102. // 1. Let result be the result of processing token for decoder,
  103. // stream, output, and error mode.
  104. result = this._decoder.handler(input_stream, token)
  105. // 2. If result is finished, return output, serialized.
  106. if (result === finished)
  107. break
  108. if (result !== null) {
  109. if (Array.isArray(result))
  110. output.push.apply(output, /**@type {!Array.<number>}*/(result))
  111. else
  112. output.push(result)
  113. }
  114. // 3. Otherwise, if result is error, throw a TypeError.
  115. // (Thrown in handler)
  116. // 4. Otherwise, do nothing.
  117. }
  118. // TODO: Align with spec algorithm.
  119. if (!this._do_not_flush) {
  120. do {
  121. result = this._decoder.handler(input_stream, input_stream.read())
  122. if (result === finished)
  123. break
  124. if (result === null)
  125. continue
  126. if (Array.isArray(result))
  127. output.push.apply(output, /**@type {!Array.<number>}*/(result))
  128. else
  129. output.push(result)
  130. } while (!input_stream.endOfStream())
  131. this._decoder = null
  132. }
  133. return this.serializeStream(output)
  134. }
  135. // A TextDecoder object also has an associated serialize stream
  136. // algorithm...
  137. /**
  138. * @param {!Array.<number>} stream
  139. */
  140. serializeStream(stream) {
  141. // 1. Let token be the result of reading from stream.
  142. // (Done in-place on array, rather than as a stream)
  143. // 2. If encoding is UTF-8, UTF-16BE, or UTF-16LE, and ignore
  144. // BOM flag and BOM seen flag are unset, run these subsubsteps:
  145. if (['UTF-8', 'UTF-16LE', 'UTF-16BE'].includes(this._encoding.name) &&
  146. !this._ignoreBOM && !this._BOMseen) {
  147. if (stream.length > 0 && stream[0] === 0xFEFF) {
  148. // 1. If token is U+FEFF, set BOM seen flag.
  149. this._BOMseen = true
  150. stream.shift()
  151. } else if (stream.length > 0) {
  152. // 2. Otherwise, if token is not end-of-stream, set BOM seen
  153. // flag and append token to stream.
  154. this._BOMseen = true
  155. } else {
  156. // 3. Otherwise, if token is not end-of-stream, append token
  157. // to output.
  158. // (no-op)
  159. }
  160. }
  161. // 4. Otherwise, return output.
  162. return codePointsToString(stream)
  163. }
  164. }
  165. export {TextDecoder}