euc-kr.js 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. import { inRange, decoderError, encoderError, isASCIICodePoint,
  2. end_of_stream, finished, isASCIIByte, floor } from './text_decoder_utils.js'
  3. import index, { indexCodePointFor, indexPointerFor } from './text_decoder_indexes.js'
  4. //
  5. // 14. Legacy multi-byte Korean encodings
  6. //
  7. // 14.1 euc-kr
  8. // 14.1.1 euc-kr decoder
  9. /**
  10. * @implements {Decoder}
  11. */
  12. export class EUCKRDecoder {
  13. constructor(options) {
  14. const { fatal } = options
  15. this.fatal = fatal
  16. // euc-kr's decoder has an associated euc-kr lead (initially 0x00).
  17. this.euckr_lead = 0x00
  18. }
  19. /**
  20. * @param {Stream} stream The stream of bytes being decoded.
  21. * @param {number} bite The next byte read from the stream.
  22. */
  23. handler(stream, bite) {
  24. // 1. If byte is end-of-stream and euc-kr lead is not 0x00, set
  25. // euc-kr lead to 0x00 and return error.
  26. if (bite === end_of_stream && this.euckr_lead !== 0) {
  27. this.euckr_lead = 0x00
  28. return decoderError(this.fatal)
  29. }
  30. // 2. If byte is end-of-stream and euc-kr lead is 0x00, return
  31. // finished.
  32. if (bite === end_of_stream && this.euckr_lead === 0)
  33. return finished
  34. // 3. If euc-kr lead is not 0x00, let lead be euc-kr lead, let
  35. // pointer be null, set euc-kr lead to 0x00, and then run these
  36. // substeps:
  37. if (this.euckr_lead !== 0x00) {
  38. const lead = this.euckr_lead
  39. let pointer = null
  40. this.euckr_lead = 0x00
  41. // 1. If byte is in the range 0x41 to 0xFE, inclusive, set
  42. // pointer to (lead − 0x81) × 190 + (byte − 0x41).
  43. if (inRange(bite, 0x41, 0xFE))
  44. pointer = (lead - 0x81) * 190 + (bite - 0x41)
  45. // 2. Let code point be null, if pointer is null, and the
  46. // index code point for pointer in index euc-kr otherwise.
  47. const code_point = (pointer === null)
  48. ? null : indexCodePointFor(pointer, index('euc-kr'))
  49. // 3. If code point is null and byte is an ASCII byte, prepend
  50. // byte to stream.
  51. if (pointer === null && isASCIIByte(bite))
  52. stream.prepend(bite)
  53. // 4. If code point is null, return error.
  54. if (code_point === null)
  55. return decoderError(this.fatal)
  56. // 5. Return a code point whose value is code point.
  57. return code_point
  58. }
  59. // 4. If byte is an ASCII byte, return a code point whose value
  60. // is byte.
  61. if (isASCIIByte(bite))
  62. return bite
  63. // 5. If byte is in the range 0x81 to 0xFE, inclusive, set
  64. // euc-kr lead to byte and return continue.
  65. if (inRange(bite, 0x81, 0xFE)) {
  66. this.euckr_lead = bite
  67. return null
  68. }
  69. // 6. Return error.
  70. return decoderError(this.fatal)
  71. }
  72. }
  73. // 14.1.2 euc-kr encoder
  74. /**
  75. * @implements {Encoder}
  76. */
  77. export class EUCKREncoder {
  78. /**
  79. * @param {Stream} stream Input stream.
  80. * @param {number} code_point Next code point read from the stream.
  81. * @return {(number|!Array.<number>)} Byte(s) to emit.
  82. */
  83. handler(stream, code_point) {
  84. // 1. If code point is end-of-stream, return finished.
  85. if (code_point === end_of_stream)
  86. return finished
  87. // 2. If code point is an ASCII code point, return a byte whose
  88. // value is code point.
  89. if (isASCIICodePoint(code_point))
  90. return code_point
  91. // 3. Let pointer be the index pointer for code point in index
  92. // euc-kr.
  93. const pointer = indexPointerFor(code_point, index('euc-kr'))
  94. // 4. If pointer is null, return error with code point.
  95. if (pointer === null)
  96. return encoderError(code_point)
  97. // 5. Let lead be floor(pointer / 190) + 0x81.
  98. const lead = floor(pointer / 190) + 0x81
  99. // 6. Let trail be pointer % 190 + 0x41.
  100. const trail = (pointer % 190) + 0x41
  101. // 7. Return two bytes whose values are lead and trail.
  102. return [lead, trail]
  103. }
  104. }