euc-jp.js 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. import { inRange, decoderError, encoderError, isASCIICodePoint,
  2. end_of_stream, finished, isASCIIByte, floor } from './text_decoder_utils.js'
  3. import index, { indexCodePointFor, indexPointerFor } from './text_decoder_indexes.js'
  4. //
  5. // 13. Legacy multi-byte Japanese encodings
  6. //
  7. // 13.1 euc-jp
  8. // 13.1.1 euc-jp decoder
  9. /**
  10. * @implements {Decoder}
  11. */
  12. export class EUCJPDecoder {
  13. constructor(options) {
  14. const { fatal } = options
  15. this.fatal = fatal
  16. // euc-jp's decoder has an associated euc-jp jis0212 flag
  17. // (initially unset) and euc-jp lead (initially 0x00).
  18. this.eucjp_jis0212_flag = false
  19. this.eucjp_lead = 0x00
  20. }
  21. /**
  22. * @param {Stream} stream The stream of bytes being decoded.
  23. * @param {number} bite The next byte read from the stream.
  24. */
  25. handler(stream, bite) {
  26. // 1. If byte is end-of-stream and euc-jp lead is not 0x00, set
  27. // euc-jp lead to 0x00, and return error.
  28. if (bite === end_of_stream && this.eucjp_lead !== 0x00) {
  29. this.eucjp_lead = 0x00
  30. return decoderError(this.fatal)
  31. }
  32. // 2. If byte is end-of-stream and euc-jp lead is 0x00, return
  33. // finished.
  34. if (bite === end_of_stream && this.eucjp_lead === 0x00)
  35. return finished
  36. // 3. If euc-jp lead is 0x8E and byte is in the range 0xA1 to
  37. // 0xDF, inclusive, set euc-jp lead to 0x00 and return a code
  38. // point whose value is 0xFF61 − 0xA1 + byte.
  39. if (this.eucjp_lead === 0x8E && inRange(bite, 0xA1, 0xDF)) {
  40. this.eucjp_lead = 0x00
  41. return 0xFF61 - 0xA1 + bite
  42. }
  43. // 4. If euc-jp lead is 0x8F and byte is in the range 0xA1 to
  44. // 0xFE, inclusive, set the euc-jp jis0212 flag, set euc-jp lead
  45. // to byte, and return continue.
  46. if (this.eucjp_lead === 0x8F && inRange(bite, 0xA1, 0xFE)) {
  47. this.eucjp_jis0212_flag = true
  48. this.eucjp_lead = bite
  49. return null
  50. }
  51. // 5. If euc-jp lead is not 0x00, let lead be euc-jp lead, set
  52. // euc-jp lead to 0x00, and run these substeps:
  53. if (this.eucjp_lead !== 0x00) {
  54. const lead = this.eucjp_lead
  55. this.eucjp_lead = 0x00
  56. // 1. Let code point be null.
  57. let code_point = null
  58. // 2. If lead and byte are both in the range 0xA1 to 0xFE,
  59. // inclusive, set code point to the index code point for (lead
  60. // − 0xA1) × 94 + byte − 0xA1 in index jis0208 if the euc-jp
  61. // jis0212 flag is unset and in index jis0212 otherwise.
  62. if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) {
  63. code_point = indexCodePointFor(
  64. (lead - 0xA1) * 94 + (bite - 0xA1),
  65. index(!this.eucjp_jis0212_flag ? 'jis0208' : 'jis0212'))
  66. }
  67. // 3. Unset the euc-jp jis0212 flag.
  68. this.eucjp_jis0212_flag = false
  69. // 4. If byte is not in the range 0xA1 to 0xFE, inclusive,
  70. // prepend byte to stream.
  71. if (!inRange(bite, 0xA1, 0xFE))
  72. stream.prepend(bite)
  73. // 5. If code point is null, return error.
  74. if (code_point === null)
  75. return decoderError(this.fatal)
  76. // 6. Return a code point whose value is code point.
  77. return code_point
  78. }
  79. // 6. If byte is an ASCII byte, return a code point whose value
  80. // is byte.
  81. if (isASCIIByte(bite))
  82. return bite
  83. // 7. If byte is 0x8E, 0x8F, or in the range 0xA1 to 0xFE,
  84. // inclusive, set euc-jp lead to byte and return continue.
  85. if (bite === 0x8E || bite === 0x8F || inRange(bite, 0xA1, 0xFE)) {
  86. this.eucjp_lead = bite
  87. return null
  88. }
  89. // 8. Return error.
  90. return decoderError(this.fatal)
  91. }
  92. }
  93. // 13.1.2 euc-jp encoder
  94. /**
  95. * @implements {Encoder}
  96. */
  97. export class EUCJPEncoder {
  98. /**
  99. * @param {Stream} stream Input stream.
  100. * @param {number} code_point Next code point read from the stream.
  101. */
  102. handler(stream, code_point) {
  103. // 1. If code point is end-of-stream, return finished.
  104. if (code_point === end_of_stream)
  105. return finished
  106. // 2. If code point is an ASCII code point, return a byte whose
  107. // value is code point.
  108. if (isASCIICodePoint(code_point))
  109. return code_point
  110. // 3. If code point is U+00A5, return byte 0x5C.
  111. if (code_point === 0x00A5)
  112. return 0x5C
  113. // 4. If code point is U+203E, return byte 0x7E.
  114. if (code_point === 0x203E)
  115. return 0x7E
  116. // 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
  117. // return two bytes whose values are 0x8E and code point −
  118. // 0xFF61 + 0xA1.
  119. if (inRange(code_point, 0xFF61, 0xFF9F))
  120. return [0x8E, code_point - 0xFF61 + 0xA1]
  121. // 6. If code point is U+2212, set it to U+FF0D.
  122. if (code_point === 0x2212)
  123. code_point = 0xFF0D
  124. // 7. Let pointer be the index pointer for code point in index
  125. // jis0208.
  126. const pointer = indexPointerFor(code_point, index('jis0208'))
  127. // 8. If pointer is null, return error with code point.
  128. if (pointer === null)
  129. return encoderError(code_point)
  130. // 9. Let lead be floor(pointer / 94) + 0xA1.
  131. const lead = floor(pointer / 94) + 0xA1
  132. // 10. Let trail be pointer % 94 + 0xA1.
  133. const trail = pointer % 94 + 0xA1
  134. // 11. Return two bytes whose values are lead and trail.
  135. return [lead, trail]
  136. }
  137. }