decoder.go 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. package utf7
  2. import (
  3. "errors"
  4. "strings"
  5. "unicode/utf16"
  6. "unicode/utf8"
  7. )
  8. // ErrInvalidUTF7 means that a decoder encountered invalid UTF-7.
  9. var ErrInvalidUTF7 = errors.New("utf7: invalid UTF-7")
  10. // Decode decodes a string encoded with modified UTF-7.
  11. //
  12. // Note, raw UTF-8 is accepted.
  13. func Decode(src string) (string, error) {
  14. if !utf8.ValidString(src) {
  15. return "", errors.New("invalid UTF-8")
  16. }
  17. var sb strings.Builder
  18. sb.Grow(len(src))
  19. ascii := true
  20. for i := 0; i < len(src); i++ {
  21. ch := src[i]
  22. if ch < min || (ch > max && ch < utf8.RuneSelf) {
  23. // Illegal code point in ASCII mode. Note, UTF-8 codepoints are
  24. // always allowed.
  25. return "", ErrInvalidUTF7
  26. }
  27. if ch != '&' {
  28. sb.WriteByte(ch)
  29. ascii = true
  30. continue
  31. }
  32. // Find the end of the Base64 or "&-" segment
  33. start := i + 1
  34. for i++; i < len(src) && src[i] != '-'; i++ {
  35. if src[i] == '\r' || src[i] == '\n' { // base64 package ignores CR and LF
  36. return "", ErrInvalidUTF7
  37. }
  38. }
  39. if i == len(src) { // Implicit shift ("&...")
  40. return "", ErrInvalidUTF7
  41. }
  42. if i == start { // Escape sequence "&-"
  43. sb.WriteByte('&')
  44. ascii = true
  45. } else { // Control or non-ASCII code points in base64
  46. if !ascii { // Null shift ("&...-&...-")
  47. return "", ErrInvalidUTF7
  48. }
  49. b := decode([]byte(src[start:i]))
  50. if len(b) == 0 { // Bad encoding
  51. return "", ErrInvalidUTF7
  52. }
  53. sb.Write(b)
  54. ascii = false
  55. }
  56. }
  57. return sb.String(), nil
  58. }
  59. // Extracts UTF-16-BE bytes from base64 data and converts them to UTF-8.
  60. // A nil slice is returned if the encoding is invalid.
  61. func decode(b64 []byte) []byte {
  62. var b []byte
  63. // Allocate a single block of memory large enough to store the Base64 data
  64. // (if padding is required), UTF-16-BE bytes, and decoded UTF-8 bytes.
  65. // Since a 2-byte UTF-16 sequence may expand into a 3-byte UTF-8 sequence,
  66. // double the space allocation for UTF-8.
  67. if n := len(b64); b64[n-1] == '=' {
  68. return nil
  69. } else if n&3 == 0 {
  70. b = make([]byte, b64Enc.DecodedLen(n)*3)
  71. } else {
  72. n += 4 - n&3
  73. b = make([]byte, n+b64Enc.DecodedLen(n)*3)
  74. copy(b[copy(b, b64):n], []byte("=="))
  75. b64, b = b[:n], b[n:]
  76. }
  77. // Decode Base64 into the first 1/3rd of b
  78. n, err := b64Enc.Decode(b, b64)
  79. if err != nil || n&1 == 1 {
  80. return nil
  81. }
  82. // Decode UTF-16-BE into the remaining 2/3rds of b
  83. b, s := b[:n], b[n:]
  84. j := 0
  85. for i := 0; i < n; i += 2 {
  86. r := rune(b[i])<<8 | rune(b[i+1])
  87. if utf16.IsSurrogate(r) {
  88. if i += 2; i == n {
  89. return nil
  90. }
  91. r2 := rune(b[i])<<8 | rune(b[i+1])
  92. if r = utf16.DecodeRune(r, r2); r == utf8.RuneError {
  93. return nil
  94. }
  95. } else if min <= r && r <= max {
  96. return nil
  97. }
  98. j += utf8.EncodeRune(s[j:], r)
  99. }
  100. return s[:j]
  101. }