- commit
- e21b28b
- parent
- e21b28b
- author
- Evgenii Akentev
- date
- 2025-10-17 19:04:36 +0400 +04
Create punycode library
6 files changed,
+490,
-0
+23,
-0
1@@ -0,0 +1,23 @@
2+# This file is for zig-specific build artifacts.
3+# If you have OS-specific or editor-specific files to ignore,
4+# such as *.swp or .DS_Store, put those in your global
5+# ~/.gitignore and put this in your ~/.gitconfig:
6+#
7+# [core]
8+# excludesfile = ~/.gitignore
9+#
10+# Cheers!
11+# -andrewrk
12+
13+.zig-cache/
14+zig-out/
15+/release/
16+/debug/
17+/build/
18+/build-*/
19+/docgen_tmp/
20+
21+# Although this was renamed to .zig-cache, let's leave it here for a few
22+# releases to make it less annoying to work with multiple branches.
23+zig-cache/
24+
A
LICENSE
+20,
-0
1@@ -0,0 +1,20 @@
2+Copyright (c) 2025 Evgenii Akentev
3+
4+Permission is hereby granted, free of charge, to any person obtaining
5+a copy of this software and associated documentation files (the
6+"Software"), to deal in the Software without restriction, including
7+without limitation the rights to use, copy, modify, merge, publish,
8+distribute, sublicense, and/or sell copies of the Software, and to
9+permit persons to whom the Software is furnished to do so, subject to
10+the following conditions:
11+
12+The above copyright notice and this permission notice shall be included
13+in all copies or substantial portions of the Software.
14+
15+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+1,
-0
1@@ -0,0 +1 @@
2+Implementation of https://www.rfc-editor.org/rfc/rfc3492 in Zig
+46,
-0
1@@ -0,0 +1,46 @@
2+const std = @import("std");
3+
4+pub fn build(b: *std.Build) void {
5+ const target = b.standardTargetOptions(.{});
6+ const optimize = b.standardOptimizeOption(.{});
7+
8+ const libpunycode = b.addLibrary(.{
9+ .name = "punycode",
10+ .linkage = .static,
11+ .root_module = b.createModule(.{
12+ .root_source_file = b.path("src/punycode.zig"),
13+ .target = target,
14+ .optimize = optimize,
15+ }),
16+ });
17+
18+ b.installArtifact(libpunycode);
19+
20+ // Creates an executable that will run `test` blocks from the provided module.
21+ // Here `mod` needs to define a target, which is why earlier we made sure to
22+ // set the releative field.
23+ const mod_tests = b.addTest(.{
24+ .root_module = libpunycode.root_module,
25+ });
26+
27+ // A run step that will run the test executable.
28+ const run_mod_tests = b.addRunArtifact(mod_tests);
29+
30+ // A top level step for running all tests. dependOn can be called multiple
31+ // times and since the two run steps do not depend on one another, this will
32+ // make the two of them run in parallel.
33+ const test_step = b.step("test", "Run tests");
34+ test_step.dependOn(&run_mod_tests.step);
35+
36+ // Just like flags, top level steps are also listed in the `--help` menu.
37+ //
38+ // The Zig build system is entirely implemented in userland, which means
39+ // that it cannot hook into private compiler APIs. All compilation work
40+ // orchestrated by the build system will result in other Zig compiler
41+ // subcommands being invoked with the right flags defined. You can observe
42+ // these invocations when one fails (or you pass a flag to increase
43+ // verbosity) to validate assumptions and diagnose problems.
44+ //
45+ // Lastly, the Zig build system is relatively simple and self-contained,
46+ // and reading its source code will allow you to master it.
47+}
+81,
-0
1@@ -0,0 +1,81 @@
2+.{
3+ // This is the default name used by packages depending on this one. For
4+ // example, when a user runs `zig fetch --save <url>`, this field is used
5+ // as the key in the `dependencies` table. Although the user can choose a
6+ // different name, most users will stick with this provided value.
7+ //
8+ // It is redundant to include "zig" in this name because it is already
9+ // within the Zig package namespace.
10+ .name = .punycode_zig,
11+ // This is a [Semantic Version](https://semver.org/).
12+ // In a future version of Zig it will be used for package deduplication.
13+ .version = "0.0.0",
14+ // Together with name, this represents a globally unique package
15+ // identifier. This field is generated by the Zig toolchain when the
16+ // package is first created, and then *never changes*. This allows
17+ // unambiguous detection of one package being an updated version of
18+ // another.
19+ //
20+ // When forking a Zig project, this id should be regenerated (delete the
21+ // field and run `zig build`) if the upstream project is still maintained.
22+ // Otherwise, the fork is *hostile*, attempting to take control over the
23+ // original project's identity. Thus it is recommended to leave the comment
24+ // on the following line intact, so that it shows up in code reviews that
25+ // modify the field.
26+ .fingerprint = 0xf0800252b14743b, // Changing this has security and trust implications.
27+ // Tracks the earliest Zig version that the package considers to be a
28+ // supported use case.
29+ .minimum_zig_version = "0.15.1",
30+ // This field is optional.
31+ // Each dependency must either provide a `url` and `hash`, or a `path`.
32+ // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
33+ // Once all dependencies are fetched, `zig build` no longer requires
34+ // internet connectivity.
35+ .dependencies = .{
36+ // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
37+ //.example = .{
38+ // // When updating this field to a new URL, be sure to delete the corresponding
39+ // // `hash`, otherwise you are communicating that you expect to find the old hash at
40+ // // the new URL. If the contents of a URL change this will result in a hash mismatch
41+ // // which will prevent zig from using it.
42+ // .url = "https://example.com/foo.tar.gz",
43+ //
44+ // // This is computed from the file contents of the directory of files that is
45+ // // obtained after fetching `url` and applying the inclusion rules given by
46+ // // `paths`.
47+ // //
48+ // // This field is the source of truth; packages do not come from a `url`; they
49+ // // come from a `hash`. `url` is just one of many possible mirrors for how to
50+ // // obtain a package matching this `hash`.
51+ // //
52+ // // Uses the [multihash](https://multiformats.io/multihash/) format.
53+ // .hash = "...",
54+ //
55+ // // When this is provided, the package is found in a directory relative to the
56+ // // build root. In this case the package's hash is irrelevant and therefore not
57+ // // computed. This field and `url` are mutually exclusive.
58+ // .path = "foo",
59+ //
60+ // // When this is set to `true`, a package is declared to be lazily
61+ // // fetched. This makes the dependency only get fetched if it is
62+ // // actually used.
63+ // .lazy = false,
64+ //},
65+ },
66+ // Specifies the set of files and directories that are included in this package.
67+ // Only files and directories listed here are included in the `hash` that
68+ // is computed for this package. Only files listed here will remain on disk
69+ // when using the zig package manager. As a rule of thumb, one should list
70+ // files required for compilation plus any license(s).
71+ // Paths are relative to the build root. Use the empty string (`""`) to refer to
72+ // the build root itself.
73+ // A directory listed here means that all files within, recursively, are included.
74+ .paths = .{
75+ "build.zig",
76+ "build.zig.zon",
77+ "src",
78+ // For example...
79+ //"LICENSE",
80+ //"README.md",
81+ },
82+}
+319,
-0
1@@ -0,0 +1,319 @@
2+const std = @import("std");
3+const unicode = @import("std").unicode;
4+const ArrayList = std.array_list.Managed;
5+const expectEqual = std.testing.expectEqual;
6+const expectEqualSlices = std.testing.expectEqualSlices;
7+
8+const base = 36;
9+
10+const tmin = 1;
11+const tmax = 26;
12+
13+const skew = 38;
14+const damp = 700;
15+
16+const initialBias = 72;
17+const initialN = 128;
18+
19+fn adapt(delta: usize, numpoints: usize, firstTime: bool) usize {
20+ var newDelta: usize = if (firstTime) (delta / damp) else (delta >> 1);
21+ newDelta += newDelta / numpoints;
22+
23+ var k: u32 = 0;
24+ const deltaCond: u32 = (base - tmin) * tmax / 2;
25+ while (newDelta > deltaCond) : (k += base) {
26+ newDelta = newDelta / (base - tmin);
27+ }
28+ return k + (((base - tmin + 1) * newDelta) / (newDelta + skew));
29+}
30+
31+fn encodeDigit(i: usize) u8 {
32+ if (i < 26) {
33+ return @truncate(i + 97); // ascii code of 'a'
34+ } else {
35+ return @truncate(i + 22); // ascii code of '0' - 26
36+ }
37+}
38+
39+fn decodeDigit(cp: u21) u21 {
40+ if (cp - 48 < 10) {
41+ return cp - 22;
42+ } else if (cp - 65 < 26) {
43+ return cp - 65;
44+ } else if (cp - 97 < 26) {
45+ return cp - 97;
46+ } else return base;
47+}
48+
49+test "decode digit" {
50+ try expectEqual(@as(u21, 0), decodeDigit('a'));
51+ try expectEqual(@as(u21, 0), decodeDigit('A'));
52+ try expectEqual(@as(u21, 25), decodeDigit('z'));
53+ try expectEqual(@as(u21, 26), decodeDigit('0'));
54+ try expectEqual(@as(u21, 35), decodeDigit('9'));
55+ try expectEqual(@as(u21, 0), decodeDigit(65));
56+}
57+
58+pub fn encode(alloc: std.mem.Allocator, input: []const u21) !ArrayList(u8) {
59+ var result = ArrayList(u8).init(alloc);
60+
61+ for (input) |c| {
62+ if (c < initialN) {
63+ try result.append(@truncate(c));
64+ }
65+ }
66+
67+ const numOfBasics = result.items.len;
68+
69+ if (numOfBasics > 0) {
70+ try result.append('-');
71+ }
72+
73+ var bias: usize = initialBias;
74+ var delta: usize = 0;
75+ var n: usize = initialN;
76+ var h: usize = numOfBasics;
77+
78+ while (h < input.len) {
79+ var m: usize = std.math.maxInt(usize);
80+
81+ for (input) |c| {
82+ if (c >= n and c < m) {
83+ m = c;
84+ }
85+ }
86+
87+ delta += (m - n) * (h + 1);
88+ n = m;
89+
90+ for (input) |c| {
91+ if (c < n) {
92+ delta += 1;
93+ }
94+
95+ if (c == n) {
96+ var k: usize = base;
97+ var q: usize = delta;
98+
99+ while (true) : (k += base) {
100+ const t = if (k <= bias + tmin) tmin else (if (k >= bias + tmax) tmax else (k - bias));
101+ if (q < t) break;
102+
103+ const newChar: usize = t + ((q - t) % (base - t));
104+
105+ try result.append(encodeDigit(newChar));
106+ q = (q - t) / (base - t);
107+ }
108+
109+ try result.append(encodeDigit(q));
110+
111+ bias = adapt(delta, h + 1, h == numOfBasics);
112+ delta = 0;
113+ h += 1;
114+ }
115+ }
116+
117+ delta += 1;
118+ n += 1;
119+ }
120+
121+ return result;
122+}
123+
124+const DecodeError = error{
125+ BadInput,
126+};
127+
128+pub fn decode(alloc: std.mem.Allocator, input: []const u8) error{BadInput, OutOfMemory}!ArrayList(u21) {
129+ var result = ArrayList(u21).init(alloc);
130+
131+ var b: usize = 0;
132+ for (input, 0..) |c, i| {
133+ if (c == '-') {
134+ b = i;
135+ }
136+ }
137+
138+ var j: usize = 0;
139+ while (j < b) : (j += 1) {
140+ if (input[j] >= initialN) {
141+ return error.BadInput;
142+ }
143+
144+ try result.append(input[j]);
145+ }
146+
147+ var n: usize = initialN;
148+ var i: usize = 0;
149+ var bias: usize = initialBias;
150+
151+ var in = if (0 < b) b + 1 else 0;
152+ while (in < input.len) {
153+ const oldI: usize = i;
154+ var w: usize = 1;
155+ var k: usize = base;
156+
157+ while(true) : (k += base) {
158+ if (in >= input.len) {
159+ return DecodeError.BadInput;
160+ }
161+
162+ const digit = decodeDigit(input[in]);
163+ in += 1;
164+
165+ if (digit >= base) {
166+ return DecodeError.BadInput;
167+ }
168+
169+ i += digit * w;
170+
171+ const t = if (k <= bias) tmin else (if (k >= bias + tmax) tmax else (k - bias));
172+
173+ if (digit < t) break;
174+
175+ w *= base - t;
176+ }
177+
178+ const resultSize = result.items.len;
179+ bias = adapt(i - oldI, resultSize + 1, oldI == 0);
180+ n += i / (resultSize + 1);
181+ i %= (resultSize + 1);
182+
183+ try result.insert(i, @truncate(n));
184+ i += 1;
185+ }
186+
187+ return result;
188+}
189+
190+// tests
191+
192+test {
193+ const alloc = std.testing.allocator;
194+
195+ var input = ArrayList(u21).init(alloc);
196+ defer input.deinit();
197+
198+ var utf8 = (try std.unicode.Utf8View.init("3年b組金八先生")).iterator();
199+ while (utf8.nextCodepoint()) |codepoint| {
200+ try input.append(codepoint);
201+ }
202+
203+ const encoded = try encode(alloc, input.items);
204+ defer encoded.deinit();
205+
206+ try expectEqualSlices(u8, encoded.items, "3b-ww4c5e180e575a65lsy2b");
207+}
208+
209+test {
210+ const alloc = std.testing.allocator;
211+
212+ const decoded = try decode(alloc, "3b-ww4c5e180e575a65lsy2b");
213+ defer decoded.deinit();
214+
215+ try expectEqualSlices(u21, &.{ 0x0033, 0x5E74, 0x0062, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F }, decoded.items);
216+}
217+
218+const testValues = [_]struct {[]const u21, []const u8}
219+ { // Arabic (Egyptian):
220+ .{ &.{ 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644,
221+ 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F } , "egbpdaj6bu4bxfgehfvwxn" },
222+
223+ // Chinese (simplified):
224+ .{ &.{ 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587} , "ihqwcrb4cv8a8dqg056pqjye" },
225+
226+ // Chinese (traditional):
227+ .{ &.{ 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587} , "ihqwctvzc91f659drss3x8bo0yb" },
228+
229+ // Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
230+ .{ &.{ 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074,
231+ 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D,
232+ 0x0065, 0x0073, 0x006B, 0x0079 } , "Proprostnemluvesky-uyb24dma41a" },
233+
234+ // Hebrew:
235+ .{ &.{ 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8,
236+ 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2,
237+ 0x05D1, 0x05E8, 0x05D9, 0x05EA } , "4dbcagdahymbxekheh6e0a7fei0b" },
238+
239+ // Hindi (Devanagari):
240+ .{ &.{ 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D,
241+ 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939,
242+ 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947,
243+ 0x0939, 0x0948, 0x0902 } , "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" },
244+
245+ // Japanese (kanji and hiragana):
246+ .{ &.{ 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092,
247+ 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B } , "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" },
248+
249+ // Korean (Hangul syllables):
250+ .{ &.{ 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
251+ 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
252+ 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C } , "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" },
253+
254+ // Russian (Cyrillic):
255+ .{ &.{ 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E,
256+ 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440,
257+ 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A,
258+ 0x0438 } , "b1abfaaepdrnnbgefbadotcwatmq2g4l" },
259+
260+ // Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol:
261+ .{ &.{ 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070,
262+ 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070,
263+ 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061,
264+ 0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070,
265+ 0x0061, 0x00F1, 0x006F, 0x006C } , "PorqunopuedensimplementehablarenEspaol-fmd56a" },
266+
267+ // Vietnamese: T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch<ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t:
268+ .{ &.{ 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B,
269+ 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068,
270+ 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067,
271+ 0x0056, 0x0069, 0x1EC7, 0x0074 } , "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" },
272+
273+ // 3<nen>B<gumi><kinpachi><sensei>
274+ .{ &.{ 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F } , "3B-ww4c5e180e575a65lsy2b"},
275+
276+
277+ // <amuro><namie>-with-SUPER-MONKEYS
278+ .{ &.{ 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074,
279+ 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D,
280+ 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053 } , "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"},
281+
282+ // Hello-Another-Way-<sorezore><no><basho>
283+ .{ &.{ 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F,
284+ 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D,
285+ 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240 } , "Hello-Another-Way--fc4qua05auwb3674vfr0b"},
286+
287+ // <hitotsu><yane><no><shita>2
288+ .{ &.{ 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032 } , "2-u9tlzr9756bt3uc0v"},
289+
290+ // Maji<de>Koi<suru>5<byou><mae>
291+ .{ &.{ 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059,
292+ 0x308B, 0x0035, 0x79D2, 0x524D } , "MajiKoi5-783gue6qz075azm5e"},
293+
294+ // <pafii>de<runba>
295+ .{ &.{ 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0 } , "de-jg4avhby1noc0d" },
296+
297+ // <sono><supiido><de>
298+ .{ &.{ 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067 } , "d9juau41awczczp" },
299+
300+ // -> $1.00 <-
301+ .{ &.{ 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030, 0x0020, 0x003C, 0x002D } , "-> $1.00 <--" }
302+
303+ };
304+
305+test {
306+ const alloc = std.testing.allocator;
307+
308+ for (testValues) |value| {
309+ const encoded = try encode(alloc, value.@"0");
310+ defer encoded.deinit();
311+
312+ try expectEqualSlices(u8, value.@"1", encoded.items);
313+
314+ const decoded = try decode(alloc, encoded.items);
315+ defer decoded.deinit();
316+
317+ try expectEqualSlices(u21, value.@"0", decoded.items);
318+ }
319+}
320+