BASE62编码由a-z、A-Z已经0-9组成,相较于BASE64编码由更好的兼容性。为什么说具有更好的兼容性呢?因为BASE64编码后的文本中含有’/‘、’=’等具有特殊意义的字符,在文件名或URL参数等场景中会造成冲突。

那为什么URL参数为什么不使用URL编码呢?因为使用Java的URLEncoder编码在使用URLDecoder解码后在某些特殊的场景下编码后可能出现与原值不一样的情况。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import java.io.ByteArrayOutputStream;

public class Base62 {

private static final int STANDARD_BASE = 256;

private static final int TARGET_BASE = 62;

private final byte[] alphabet;

private byte[] lookup;

private Base62(final byte[] alphabet) {
this.alphabet = alphabet;
createLookupTable();
}

/**
* Creates a {@link Base62} instance. Defaults to the GMP-style character set.
*
* @return a {@link Base62} instance.
*/
public static Base62 createInstance() {
return createInstanceWithGmpCharacterSet();
}

/**
* Creates a {@link Base62} instance using the GMP-style character set.
*
* @return a {@link Base62} instance.
*/
public static Base62 createInstanceWithGmpCharacterSet() {
return new Base62(CharacterSets.GMP);
}

/**
* Creates a {@link Base62} instance using the inverted character set.
*
* @return a {@link Base62} instance.
*/
public static Base62 createInstanceWithInvertedCharacterSet() {
return new Base62(CharacterSets.INVERTED);
}

/**
* Encodes a sequence of bytes in Base62 encoding.
*
* @param message a byte sequence.
* @return a sequence of Base62-encoded bytes.
*/
public byte[] encode(final byte[] message) {
final byte[] indices = convert(message, STANDARD_BASE, TARGET_BASE);

return translate(indices, alphabet);
}

/**
* Decodes a sequence of Base62-encoded bytes.
*
* @param encoded a sequence of Base62-encoded bytes.
* @return a byte sequence.
* @throws IllegalArgumentException when {@code encoded} is not encoded over the Base62 alphabet.
*/
public byte[] decode(final byte[] encoded) {
if (!isBase62Encoding(encoded)) {
throw new IllegalArgumentException("Input is not encoded correctly");
}

final byte[] prepared = translate(encoded, lookup);

return convert(prepared, TARGET_BASE, STANDARD_BASE);
}

/**
* Checks whether a sequence of bytes is encoded over a Base62 alphabet.
*
* @param bytes a sequence of bytes.
* @return {@code true} when the bytes are encoded over a Base62 alphabet, {@code false} otherwise.
*/
public boolean isBase62Encoding(final byte[] bytes) {
if (bytes == null) {
return false;
}

for (final byte e : bytes) {
if ('0' > e || '9' < e) {
if ('a' > e || 'z' < e) {
if ('A' > e || 'Z' < e) {
return false;
}
}
}
}

return true;
}

/**
* Uses the elements of a byte array as indices to a dictionary and returns the corresponding values
* in form of a byte array.
*/
private byte[] translate(final byte[] indices, final byte[] dictionary) {
final byte[] translation = new byte[indices.length];

for (int i = 0; i < indices.length; i++) {
translation[i] = dictionary[indices[i]];
}

return translation;
}

/**
* Converts a byte array from a source base to a target base using the alphabet.
*/
private byte[] convert(final byte[] message, final int sourceBase, final int targetBase) {
/**
* This algorithm is inspired by: http://codegolf.stackexchange.com/a/21672
*/

final int estimatedLength = estimateOutputLength(message.length, sourceBase, targetBase);

final ByteArrayOutputStream out = new ByteArrayOutputStream(estimatedLength);

byte[] source = message;

while (source.length > 0) {
final ByteArrayOutputStream quotient = new ByteArrayOutputStream(source.length);

int remainder = 0;

for (int i = 0; i < source.length; i++) {
final int accumulator = (source[i] & 0xFF) + remainder * sourceBase;
final int digit = (accumulator - (accumulator % targetBase)) / targetBase;

remainder = accumulator % targetBase;

if (quotient.size() > 0 || digit > 0) {
quotient.write(digit);
}
}

out.write(remainder);

source = quotient.toByteArray();
}

// pad output with zeroes corresponding to the number of leading zeroes in the message
for (int i = 0; i < message.length - 1 && message[i] == 0; i++) {
out.write(0);
}

return reverse(out.toByteArray());
}

/**
* Estimates the length of the output in bytes.
*/
private int estimateOutputLength(int inputLength, int sourceBase, int targetBase) {
return (int) Math.ceil((Math.log(sourceBase) / Math.log(targetBase)) * inputLength);
}

/**
* Reverses a byte array.
*/
private byte[] reverse(final byte[] arr) {
final int length = arr.length;

final byte[] reversed = new byte[length];

for (int i = 0; i < length; i++) {
reversed[length - i - 1] = arr[i];
}

return reversed;
}

/**
* Creates the lookup table from character to index of character in character set.
*/
private void createLookupTable() {
lookup = new byte[256];

for (int i = 0; i < alphabet.length; i++) {
lookup[alphabet[i]] = (byte) (i & 0xFF);
}
}

private static class CharacterSets {

private static final byte[] GMP = {
(byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5', (byte) '6', (byte) '7',
(byte) '8', (byte) '9', (byte) 'A', (byte) 'B', (byte) 'C', (byte) 'D', (byte) 'E', (byte) 'F',
(byte) 'G', (byte) 'H', (byte) 'I', (byte) 'J', (byte) 'K', (byte) 'L', (byte) 'M', (byte) 'N',
(byte) 'O', (byte) 'P', (byte) 'Q', (byte) 'R', (byte) 'S', (byte) 'T', (byte) 'U', (byte) 'V',
(byte) 'W', (byte) 'X', (byte) 'Y', (byte) 'Z', (byte) 'a', (byte) 'b', (byte) 'c', (byte) 'd',
(byte) 'e', (byte) 'f', (byte) 'g', (byte) 'h', (byte) 'i', (byte) 'j', (byte) 'k', (byte) 'l',
(byte) 'm', (byte) 'n', (byte) 'o', (byte) 'p', (byte) 'q', (byte) 'r', (byte) 's', (byte) 't',
(byte) 'u', (byte) 'v', (byte) 'w', (byte) 'x', (byte) 'y', (byte) 'z'
};

private static final byte[] INVERTED = {
(byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4', (byte) '5', (byte) '6', (byte) '7',
(byte) '8', (byte) '9', (byte) 'a', (byte) 'b', (byte) 'c', (byte) 'd', (byte) 'e', (byte) 'f',
(byte) 'g', (byte) 'h', (byte) 'i', (byte) 'j', (byte) 'k', (byte) 'l', (byte) 'm', (byte) 'n',
(byte) 'o', (byte) 'p', (byte) 'q', (byte) 'r', (byte) 's', (byte) 't', (byte) 'u', (byte) 'v',
(byte) 'w', (byte) 'x', (byte) 'y', (byte) 'z', (byte) 'A', (byte) 'B', (byte) 'C', (byte) 'D',
(byte) 'E', (byte) 'F', (byte) 'G', (byte) 'H', (byte) 'I', (byte) 'J', (byte) 'K', (byte) 'L',
(byte) 'M', (byte) 'N', (byte) 'O', (byte) 'P', (byte) 'Q', (byte) 'R', (byte) 'S', (byte) 'T',
(byte) 'U', (byte) 'V', (byte) 'W', (byte) 'X', (byte) 'Y', (byte) 'Z'
};

}

}