shift-jis.js 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. import { inRange, decoderError, encoderError, floor, isASCIICodePoint, isASCIIByte,
  2. end_of_stream, finished } from './text_decoder_utils.js'
  3. import index, { indexCodePointFor, indexShiftJISPointerFor } from './text_decoder_indexes.js'
  4. // 13.3 Shift_JIS
  5. // 13.3.1 Shift_JIS decoder
  6. /**
  7. * @constructor
  8. * @implements {Decoder}
  9. * @param {{fatal: boolean}} options
  10. */
  11. export class ShiftJISDecoder {
  12. constructor(options) {
  13. const { fatal } = options
  14. this.fatal = fatal
  15. // Shift_JIS's decoder has an associated Shift_JIS lead (initially
  16. // 0x00).
  17. this.Shift_JIS_lead = 0x00
  18. }
  19. /**
  20. * @param {Stream} stream The stream of bytes being decoded.
  21. * @param {number} bite The next byte read from the stream.
  22. */
  23. handler(stream, bite) {
  24. // 1. If byte is end-of-stream and Shift_JIS lead is not 0x00,
  25. // set Shift_JIS lead to 0x00 and return error.
  26. if (bite === end_of_stream && this.Shift_JIS_lead !== 0x00) {
  27. this.Shift_JIS_lead = 0x00
  28. return decoderError(this.fatal)
  29. }
  30. // 2. If byte is end-of-stream and Shift_JIS lead is 0x00,
  31. // return finished.
  32. if (bite === end_of_stream && this.Shift_JIS_lead === 0x00)
  33. return finished
  34. // 3. If Shift_JIS lead is not 0x00, let lead be Shift_JIS lead,
  35. // let pointer be null, set Shift_JIS lead to 0x00, and then run
  36. // these substeps:
  37. if (this.Shift_JIS_lead !== 0x00) {
  38. var lead = this.Shift_JIS_lead
  39. var pointer = null
  40. this.Shift_JIS_lead = 0x00
  41. // 1. Let offset be 0x40, if byte is less than 0x7F, and 0x41
  42. // otherwise.
  43. var offset = (bite < 0x7F) ? 0x40 : 0x41
  44. // 2. Let lead offset be 0x81, if lead is less than 0xA0, and
  45. // 0xC1 otherwise.
  46. var lead_offset = (lead < 0xA0) ? 0x81 : 0xC1
  47. // 3. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
  48. // to 0xFC, inclusive, set pointer to (lead − lead offset) ×
  49. // 188 + byte − offset.
  50. if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFC))
  51. pointer = (lead - lead_offset) * 188 + bite - offset
  52. // 4. If pointer is in the range 8836 to 10715, inclusive,
  53. // return a code point whose value is 0xE000 − 8836 + pointer.
  54. if (inRange(pointer, 8836, 10715))
  55. return 0xE000 - 8836 + pointer
  56. // 5. Let code point be null, if pointer is null, and the
  57. // index code point for pointer in index jis0208 otherwise.
  58. var code_point = (pointer === null) ? null :
  59. indexCodePointFor(pointer, index('jis0208'))
  60. // 6. If code point is null and byte is an ASCII byte, prepend
  61. // byte to stream.
  62. if (code_point === null && isASCIIByte(bite))
  63. stream.prepend(bite)
  64. // 7. If code point is null, return error.
  65. if (code_point === null)
  66. return decoderError(this.fatal)
  67. // 8. Return a code point whose value is code point.
  68. return code_point
  69. }
  70. // 4. If byte is an ASCII byte or 0x80, return a code point
  71. // whose value is byte.
  72. if (isASCIIByte(bite) || bite === 0x80)
  73. return bite
  74. // 5. If byte is in the range 0xA1 to 0xDF, inclusive, return a
  75. // code point whose value is 0xFF61 − 0xA1 + byte.
  76. if (inRange(bite, 0xA1, 0xDF))
  77. return 0xFF61 - 0xA1 + bite
  78. // 6. If byte is in the range 0x81 to 0x9F, inclusive, or 0xE0
  79. // to 0xFC, inclusive, set Shift_JIS lead to byte and return
  80. // continue.
  81. if (inRange(bite, 0x81, 0x9F) || inRange(bite, 0xE0, 0xFC)) {
  82. this.Shift_JIS_lead = bite
  83. return null
  84. }
  85. // 7. Return error.
  86. return decoderError(this.fatal)
  87. }
  88. }
  89. // 13.3.2 Shift_JIS encoder
  90. /**
  91. * @constructor
  92. * @implements {Encoder}
  93. * @param {{fatal: boolean}} options
  94. */
  95. export class ShiftJISEncoder {
  96. /**
  97. * @param {Stream} stream Input stream.
  98. * @param {number} code_point Next code point read from the stream.
  99. */
  100. handler(stream, code_point) {
  101. // 1. If code point is end-of-stream, return finished.
  102. if (code_point === end_of_stream)
  103. return finished
  104. // 2. If code point is an ASCII code point or U+0080, return a
  105. // byte whose value is code point.
  106. if (isASCIICodePoint(code_point) || code_point === 0x0080)
  107. return code_point
  108. // 3. If code point is U+00A5, return byte 0x5C.
  109. if (code_point === 0x00A5)
  110. return 0x5C
  111. // 4. If code point is U+203E, return byte 0x7E.
  112. if (code_point === 0x203E)
  113. return 0x7E
  114. // 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
  115. // return a byte whose value is code point − 0xFF61 + 0xA1.
  116. if (inRange(code_point, 0xFF61, 0xFF9F))
  117. return code_point - 0xFF61 + 0xA1
  118. // 6. If code point is U+2212, set it to U+FF0D.
  119. if (code_point === 0x2212)
  120. code_point = 0xFF0D
  121. // 7. Let pointer be the index Shift_JIS pointer for code point.
  122. var pointer = indexShiftJISPointerFor(code_point)
  123. // 8. If pointer is null, return error with code point.
  124. if (pointer === null)
  125. return encoderError(code_point)
  126. // 9. Let lead be floor(pointer / 188).
  127. var lead = floor(pointer / 188)
  128. // 10. Let lead offset be 0x81, if lead is less than 0x1F, and
  129. // 0xC1 otherwise.
  130. var lead_offset = (lead < 0x1F) ? 0x81 : 0xC1
  131. // 11. Let trail be pointer % 188.
  132. var trail = pointer % 188
  133. // 12. Let offset be 0x40, if trail is less than 0x3F, and 0x41
  134. // otherwise.
  135. var offset = (trail < 0x3F) ? 0x40 : 0x41
  136. // 13. Return two bytes whose values are lead + lead offset and
  137. // trail + offset.
  138. return [lead + lead_offset, trail + offset]
  139. }
  140. }