big5.js 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. import { inRange, decoderError, encoderError, isASCIICodePoint,
  2. end_of_stream, finished, isASCIIByte, floor } from './text_decoder_utils.js'
  3. import index, { indexBig5PointerFor, indexCodePointFor } from './text_decoder_indexes.js'
  4. //
  5. // 12. Legacy multi-byte Chinese (traditional) encodings
  6. //
  7. // 12.1 Big5
  8. // 12.1.1 Big5 decoder
  9. /**
  10. * @implements {Decoder}
  11. */
  12. export class Big5Decoder {
  13. constructor(options) {
  14. const { fatal } = options
  15. this.fatal = fatal
  16. // Big5's decoder has an associated Big5 lead (initially 0x00).
  17. this.Big5_lead = 0x00
  18. }
  19. /**
  20. * @param {Stream} stream The stream of bytes being decoded.
  21. * @param {number} bite The next byte read from the stream.
  22. */
  23. handler(stream, bite) {
  24. // 1. If byte is end-of-stream and Big5 lead is not 0x00, set
  25. // Big5 lead to 0x00 and return error.
  26. if (bite === end_of_stream && this.Big5_lead !== 0x00) {
  27. this.Big5_lead = 0x00
  28. return decoderError(this.fatal)
  29. }
  30. // 2. If byte is end-of-stream and Big5 lead is 0x00, return
  31. // finished.
  32. if (bite === end_of_stream && this.Big5_lead === 0x00)
  33. return finished
  34. // 3. If Big5 lead is not 0x00, let lead be Big5 lead, let
  35. // pointer be null, set Big5 lead to 0x00, and then run these
  36. // substeps:
  37. if (this.Big5_lead !== 0x00) {
  38. const lead = this.Big5_lead
  39. let pointer = null
  40. this.Big5_lead = 0x00
  41. // 1. Let offset be 0x40 if byte is less than 0x7F and 0x62
  42. // otherwise.
  43. const offset = bite < 0x7F ? 0x40 : 0x62
  44. // 2. If byte is in the range 0x40 to 0x7E, inclusive, or 0xA1
  45. // to 0xFE, inclusive, set pointer to (lead − 0x81) × 157 +
  46. // (byte − offset).
  47. if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0xA1, 0xFE))
  48. pointer = (lead - 0x81) * 157 + (bite - offset)
  49. // 3. If there is a row in the table below whose first column
  50. // is pointer, return the two code points listed in its second
  51. // column
  52. // Pointer | Code points
  53. // --------+--------------
  54. // 1133 | U+00CA U+0304
  55. // 1135 | U+00CA U+030C
  56. // 1164 | U+00EA U+0304
  57. // 1166 | U+00EA U+030C
  58. switch (pointer) {
  59. case 1133: return [0x00CA, 0x0304]
  60. case 1135: return [0x00CA, 0x030C]
  61. case 1164: return [0x00EA, 0x0304]
  62. case 1166: return [0x00EA, 0x030C]
  63. }
  64. // 4. Let code point be null if pointer is null and the index
  65. // code point for pointer in index Big5 otherwise.
  66. const code_point = (pointer === null) ? null :
  67. indexCodePointFor(pointer, index('big5'))
  68. // 5. If code point is null and byte is an ASCII byte, prepend
  69. // byte to stream.
  70. if (code_point === null && isASCIIByte(bite))
  71. stream.prepend(bite)
  72. // 6. If code point is null, return error.
  73. if (code_point === null)
  74. return decoderError(this.fatal)
  75. // 7. Return a code point whose value is code point.
  76. return code_point
  77. }
  78. // 4. If byte is an ASCII byte, return a code point whose value
  79. // is byte.
  80. if (isASCIIByte(bite))
  81. return bite
  82. // 5. If byte is in the range 0x81 to 0xFE, inclusive, set Big5
  83. // lead to byte and return continue.
  84. if (inRange(bite, 0x81, 0xFE)) {
  85. this.Big5_lead = bite
  86. return null
  87. }
  88. // 6. Return error.
  89. return decoderError(this.fatal)
  90. }
  91. }
  92. // 12.1.2 Big5 encoder
  93. /**
  94. * @implements {Encoder}
  95. */
  96. export class Big5Encoder {
  97. constructor() {
  98. /**
  99. * @param {Stream} stream Input stream.
  100. * @param {number} code_point Next code point read from the stream.
  101. */
  102. this.handler = function(stream, code_point) {
  103. // 1. If code point is end-of-stream, return finished.
  104. if (code_point === end_of_stream)
  105. return finished
  106. // 2. If code point is an ASCII code point, return a byte whose
  107. // value is code point.
  108. if (isASCIICodePoint(code_point))
  109. return code_point
  110. // 3. Let pointer be the index Big5 pointer for code point.
  111. const pointer = indexBig5PointerFor(code_point)
  112. // 4. If pointer is null, return error with code point.
  113. if (pointer === null)
  114. return encoderError(code_point)
  115. // 5. Let lead be floor(pointer / 157) + 0x81.
  116. const lead = floor(pointer / 157) + 0x81
  117. // 6. If lead is less than 0xA1, return error with code point.
  118. if (lead < 0xA1)
  119. return encoderError(code_point)
  120. // 7. Let trail be pointer % 157.
  121. const trail = pointer % 157
  122. // 8. Let offset be 0x40 if trail is less than 0x3F and 0x62
  123. // otherwise.
  124. const offset = trail < 0x3F ? 0x40 : 0x62
  125. // Return two bytes whose values are lead and trail + offset.
  126. return [lead, trail + offset]
  127. }
  128. }
  129. }