Evgenii Akentev
·
2025-10-17
punycode.zig
1const std = @import("std");
2const unicode = @import("std").unicode;
3const ArrayList = std.array_list.Managed;
4const expectEqual = std.testing.expectEqual;
5const expectEqualSlices = std.testing.expectEqualSlices;
6
7const base = 36;
8
9const tmin = 1;
10const tmax = 26;
11
12const skew = 38;
13const damp = 700;
14
15const initialBias = 72;
16const initialN = 128;
17
18fn adapt(delta: usize, numpoints: usize, firstTime: bool) usize {
19 var newDelta: usize = if (firstTime) (delta / damp) else (delta >> 1);
20 newDelta += newDelta / numpoints;
21
22 var k: u32 = 0;
23 const deltaCond: u32 = (base - tmin) * tmax / 2;
24 while (newDelta > deltaCond) : (k += base) {
25 newDelta = newDelta / (base - tmin);
26 }
27 return k + (((base - tmin + 1) * newDelta) / (newDelta + skew));
28}
29
30fn encodeDigit(i: usize) u8 {
31 if (i < 26) {
32 return @truncate(i + 97); // ascii code of 'a'
33 } else {
34 return @truncate(i + 22); // ascii code of '0' - 26
35 }
36}
37
38fn decodeDigit(cp: u21) u21 {
39 if (cp - 48 < 10) {
40 return cp - 22;
41 } else if (cp - 65 < 26) {
42 return cp - 65;
43 } else if (cp - 97 < 26) {
44 return cp - 97;
45 } else return base;
46}
47
48test "decode digit" {
49 try expectEqual(@as(u21, 0), decodeDigit('a'));
50 try expectEqual(@as(u21, 0), decodeDigit('A'));
51 try expectEqual(@as(u21, 25), decodeDigit('z'));
52 try expectEqual(@as(u21, 26), decodeDigit('0'));
53 try expectEqual(@as(u21, 35), decodeDigit('9'));
54 try expectEqual(@as(u21, 0), decodeDigit(65));
55}
56
57pub fn encode(alloc: std.mem.Allocator, input: []const u21) !ArrayList(u8) {
58 var result = ArrayList(u8).init(alloc);
59
60 for (input) |c| {
61 if (c < initialN) {
62 try result.append(@truncate(c));
63 }
64 }
65
66 const numOfBasics = result.items.len;
67
68 if (numOfBasics > 0) {
69 try result.append('-');
70 }
71
72 var bias: usize = initialBias;
73 var delta: usize = 0;
74 var n: usize = initialN;
75 var h: usize = numOfBasics;
76
77 while (h < input.len) {
78 var m: usize = std.math.maxInt(usize);
79
80 for (input) |c| {
81 if (c >= n and c < m) {
82 m = c;
83 }
84 }
85
86 delta += (m - n) * (h + 1);
87 n = m;
88
89 for (input) |c| {
90 if (c < n) {
91 delta += 1;
92 }
93
94 if (c == n) {
95 var k: usize = base;
96 var q: usize = delta;
97
98 while (true) : (k += base) {
99 const t = if (k <= bias + tmin) tmin else (if (k >= bias + tmax) tmax else (k - bias));
100 if (q < t) break;
101
102 const newChar: usize = t + ((q - t) % (base - t));
103
104 try result.append(encodeDigit(newChar));
105 q = (q - t) / (base - t);
106 }
107
108 try result.append(encodeDigit(q));
109
110 bias = adapt(delta, h + 1, h == numOfBasics);
111 delta = 0;
112 h += 1;
113 }
114 }
115
116 delta += 1;
117 n += 1;
118 }
119
120 return result;
121}
122
123const DecodeError = error{
124 BadInput,
125};
126
127pub fn decode(alloc: std.mem.Allocator, input: []const u8) error{BadInput, OutOfMemory}!ArrayList(u21) {
128 var result = ArrayList(u21).init(alloc);
129
130 var b: usize = 0;
131 for (input, 0..) |c, i| {
132 if (c == '-') {
133 b = i;
134 }
135 }
136
137 var j: usize = 0;
138 while (j < b) : (j += 1) {
139 if (input[j] >= initialN) {
140 return error.BadInput;
141 }
142
143 try result.append(input[j]);
144 }
145
146 var n: usize = initialN;
147 var i: usize = 0;
148 var bias: usize = initialBias;
149
150 var in = if (0 < b) b + 1 else 0;
151 while (in < input.len) {
152 const oldI: usize = i;
153 var w: usize = 1;
154 var k: usize = base;
155
156 while(true) : (k += base) {
157 if (in >= input.len) {
158 return DecodeError.BadInput;
159 }
160
161 const digit = decodeDigit(input[in]);
162 in += 1;
163
164 if (digit >= base) {
165 return DecodeError.BadInput;
166 }
167
168 i += digit * w;
169
170 const t = if (k <= bias) tmin else (if (k >= bias + tmax) tmax else (k - bias));
171
172 if (digit < t) break;
173
174 w *= base - t;
175 }
176
177 const resultSize = result.items.len;
178 bias = adapt(i - oldI, resultSize + 1, oldI == 0);
179 n += i / (resultSize + 1);
180 i %= (resultSize + 1);
181
182 try result.insert(i, @truncate(n));
183 i += 1;
184 }
185
186 return result;
187}
188
189// tests
190
191test {
192 const alloc = std.testing.allocator;
193
194 var input = ArrayList(u21).init(alloc);
195 defer input.deinit();
196
197 var utf8 = (try std.unicode.Utf8View.init("3年b組金八先生")).iterator();
198 while (utf8.nextCodepoint()) |codepoint| {
199 try input.append(codepoint);
200 }
201
202 const encoded = try encode(alloc, input.items);
203 defer encoded.deinit();
204
205 try expectEqualSlices(u8, encoded.items, "3b-ww4c5e180e575a65lsy2b");
206}
207
208test {
209 const alloc = std.testing.allocator;
210
211 const decoded = try decode(alloc, "3b-ww4c5e180e575a65lsy2b");
212 defer decoded.deinit();
213
214 try expectEqualSlices(u21, &.{ 0x0033, 0x5E74, 0x0062, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F }, decoded.items);
215}
216
217const testValues = [_]struct {[]const u21, []const u8}
218 { // Arabic (Egyptian):
219 .{ &.{ 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644,
220 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F } , "egbpdaj6bu4bxfgehfvwxn" },
221
222 // Chinese (simplified):
223 .{ &.{ 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587} , "ihqwcrb4cv8a8dqg056pqjye" },
224
225 // Chinese (traditional):
226 .{ &.{ 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587} , "ihqwctvzc91f659drss3x8bo0yb" },
227
228 // Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
229 .{ &.{ 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074,
230 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D,
231 0x0065, 0x0073, 0x006B, 0x0079 } , "Proprostnemluvesky-uyb24dma41a" },
232
233 // Hebrew:
234 .{ &.{ 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8,
235 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2,
236 0x05D1, 0x05E8, 0x05D9, 0x05EA } , "4dbcagdahymbxekheh6e0a7fei0b" },
237
238 // Hindi (Devanagari):
239 .{ &.{ 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D,
240 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939,
241 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947,
242 0x0939, 0x0948, 0x0902 } , "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" },
243
244 // Japanese (kanji and hiragana):
245 .{ &.{ 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092,
246 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B } , "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" },
247
248 // Korean (Hangul syllables):
249 .{ &.{ 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
250 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
251 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C } , "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" },
252
253 // Russian (Cyrillic):
254 .{ &.{ 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E,
255 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440,
256 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A,
257 0x0438 } , "b1abfaaepdrnnbgefbadotcwatmq2g4l" },
258
259 // Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol:
260 .{ &.{ 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070,
261 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070,
262 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061,
263 0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070,
264 0x0061, 0x00F1, 0x006F, 0x006C } , "PorqunopuedensimplementehablarenEspaol-fmd56a" },
265
266 // Vietnamese: T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch<ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t:
267 .{ &.{ 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B,
268 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068,
269 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067,
270 0x0056, 0x0069, 0x1EC7, 0x0074 } , "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" },
271
272 // 3<nen>B<gumi><kinpachi><sensei>
273 .{ &.{ 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F } , "3B-ww4c5e180e575a65lsy2b"},
274
275
276 // <amuro><namie>-with-SUPER-MONKEYS
277 .{ &.{ 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074,
278 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D,
279 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053 } , "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"},
280
281 // Hello-Another-Way-<sorezore><no><basho>
282 .{ &.{ 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F,
283 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D,
284 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240 } , "Hello-Another-Way--fc4qua05auwb3674vfr0b"},
285
286 // <hitotsu><yane><no><shita>2
287 .{ &.{ 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032 } , "2-u9tlzr9756bt3uc0v"},
288
289 // Maji<de>Koi<suru>5<byou><mae>
290 .{ &.{ 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059,
291 0x308B, 0x0035, 0x79D2, 0x524D } , "MajiKoi5-783gue6qz075azm5e"},
292
293 // <pafii>de<runba>
294 .{ &.{ 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0 } , "de-jg4avhby1noc0d" },
295
296 // <sono><supiido><de>
297 .{ &.{ 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067 } , "d9juau41awczczp" },
298
299 // -> $1.00 <-
300 .{ &.{ 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030, 0x0020, 0x003C, 0x002D } , "-> $1.00 <--" }
301
302 };
303
304test {
305 const alloc = std.testing.allocator;
306
307 for (testValues) |value| {
308 const encoded = try encode(alloc, value.@"0");
309 defer encoded.deinit();
310
311 try expectEqualSlices(u8, value.@"1", encoded.items);
312
313 const decoded = try decode(alloc, encoded.items);
314 defer decoded.deinit();
315
316 try expectEqualSlices(u21, value.@"0", decoded.items);
317 }
318}
319