Create punycode library

author Evgenii Akentev <github@xn--bdkaa.com>

Fri, 17 Oct 2025 15:04:36 +0000 (19:04 +0400)

committer Evgenii Akentev <hi@xn--bdkaa.com>

Tue, 21 Oct 2025 16:18:36 +0000 (20:18 +0400)
author Evgenii Akentev <github@xn--bdkaa.com>
Fri, 17 Oct 2025 15:04:36 +0000 (19:04 +0400)
committer Evgenii Akentev <hi@xn--bdkaa.com>
Tue, 21 Oct 2025 16:18:36 +0000 (20:18 +0400)
diff --git a/.gitignore b/.gitignore

new file mode 100644 (file)

index 0000000..bc0b345
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,23 @@
+# This file is for zig-specific build artifacts.
+# If you have OS-specific or editor-specific files to ignore,
+# such as *.swp or .DS_Store, put those in your global
+# ~/.gitignore and put this in your ~/.gitconfig:
+#
+# [core]
+#     excludesfile = ~/.gitignore
+#
+# Cheers!
+# -andrewrk
+
+.zig-cache/
+zig-out/
+/release/
+/debug/
+/build/
+/build-*/
+/docgen_tmp/
+
+# Although this was renamed to .zig-cache, let's leave it here for a few
+# releases to make it less annoying to work with multiple branches.
+zig-cache/
+
diff --git a/LICENSE b/LICENSE

new file mode 100644 (file)

index 0000000..755feff
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,20 @@
+Copyright (c) 2025 Evgenii Akentev
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md

new file mode 100644 (file)

index 0000000..8f0d674
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+Implementation of https://www.rfc-editor.org/rfc/rfc3492 in Zig
diff --git a/build.zig b/build.zig

new file mode 100644 (file)

index 0000000..f81b341
--- /dev/null
+++ b/build.zig
@@ -0,0 +1,46 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) void {
+    const target = b.standardTargetOptions(.{});
+    const optimize = b.standardOptimizeOption(.{});
+
+    const libpunycode = b.addLibrary(.{
+        .name = "punycode",
+        .linkage = .static,
+        .root_module = b.createModule(.{
+            .root_source_file = b.path("src/punycode.zig"),
+            .target = target,
+            .optimize = optimize,
+        }),
+    });
+
+    b.installArtifact(libpunycode);
+
+    // Creates an executable that will run `test` blocks from the provided module.
+    // Here `mod` needs to define a target, which is why earlier we made sure to
+    // set the releative field.
+    const mod_tests = b.addTest(.{
+        .root_module = libpunycode.root_module,
+    });
+
+    // A run step that will run the test executable.
+    const run_mod_tests = b.addRunArtifact(mod_tests);
+
+    // A top level step for running all tests. dependOn can be called multiple
+    // times and since the two run steps do not depend on one another, this will
+    // make the two of them run in parallel.
+    const test_step = b.step("test", "Run tests");
+    test_step.dependOn(&run_mod_tests.step);
+
+    // Just like flags, top level steps are also listed in the `--help` menu.
+    //
+    // The Zig build system is entirely implemented in userland, which means
+    // that it cannot hook into private compiler APIs. All compilation work
+    // orchestrated by the build system will result in other Zig compiler
+    // subcommands being invoked with the right flags defined. You can observe
+    // these invocations when one fails (or you pass a flag to increase
+    // verbosity) to validate assumptions and diagnose problems.
+    //
+    // Lastly, the Zig build system is relatively simple and self-contained,
+    // and reading its source code will allow you to master it.
+}
diff --git a/build.zig.zon b/build.zig.zon

new file mode 100644 (file)

index 0000000..1588516
--- /dev/null
+++ b/build.zig.zon
@@ -0,0 +1,81 @@
+.{
+    // This is the default name used by packages depending on this one. For
+    // example, when a user runs `zig fetch --save <url>`, this field is used
+    // as the key in the `dependencies` table. Although the user can choose a
+    // different name, most users will stick with this provided value.
+    //
+    // It is redundant to include "zig" in this name because it is already
+    // within the Zig package namespace.
+    .name = .punycode_zig,
+    // This is a [Semantic Version](https://semver.org/).
+    // In a future version of Zig it will be used for package deduplication.
+    .version = "0.0.0",
+    // Together with name, this represents a globally unique package
+    // identifier. This field is generated by the Zig toolchain when the
+    // package is first created, and then *never changes*. This allows
+    // unambiguous detection of one package being an updated version of
+    // another.
+    //
+    // When forking a Zig project, this id should be regenerated (delete the
+    // field and run `zig build`) if the upstream project is still maintained.
+    // Otherwise, the fork is *hostile*, attempting to take control over the
+    // original project's identity. Thus it is recommended to leave the comment
+    // on the following line intact, so that it shows up in code reviews that
+    // modify the field.
+    .fingerprint = 0xf0800252b14743b, // Changing this has security and trust implications.
+    // Tracks the earliest Zig version that the package considers to be a
+    // supported use case.
+    .minimum_zig_version = "0.15.1",
+    // This field is optional.
+    // Each dependency must either provide a `url` and `hash`, or a `path`.
+    // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
+    // Once all dependencies are fetched, `zig build` no longer requires
+    // internet connectivity.
+    .dependencies = .{
+        // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
+        //.example = .{
+        //    // When updating this field to a new URL, be sure to delete the corresponding
+        //    // `hash`, otherwise you are communicating that you expect to find the old hash at
+        //    // the new URL. If the contents of a URL change this will result in a hash mismatch
+        //    // which will prevent zig from using it.
+        //    .url = "https://example.com/foo.tar.gz",
+        //
+        //    // This is computed from the file contents of the directory of files that is
+        //    // obtained after fetching `url` and applying the inclusion rules given by
+        //    // `paths`.
+        //    //
+        //    // This field is the source of truth; packages do not come from a `url`; they
+        //    // come from a `hash`. `url` is just one of many possible mirrors for how to
+        //    // obtain a package matching this `hash`.
+        //    //
+        //    // Uses the [multihash](https://multiformats.io/multihash/) format.
+        //    .hash = "...",
+        //
+        //    // When this is provided, the package is found in a directory relative to the
+        //    // build root. In this case the package's hash is irrelevant and therefore not
+        //    // computed. This field and `url` are mutually exclusive.
+        //    .path = "foo",
+        //
+        //    // When this is set to `true`, a package is declared to be lazily
+        //    // fetched. This makes the dependency only get fetched if it is
+        //    // actually used.
+        //    .lazy = false,
+        //},
+    },
+    // Specifies the set of files and directories that are included in this package.
+    // Only files and directories listed here are included in the `hash` that
+    // is computed for this package. Only files listed here will remain on disk
+    // when using the zig package manager. As a rule of thumb, one should list
+    // files required for compilation plus any license(s).
+    // Paths are relative to the build root. Use the empty string (`""`) to refer to
+    // the build root itself.
+    // A directory listed here means that all files within, recursively, are included.
+    .paths = .{
+        "build.zig",
+        "build.zig.zon",
+        "src",
+        // For example...
+        //"LICENSE",
+        //"README.md",
+    },
+}
diff --git a/src/punycode.zig b/src/punycode.zig

new file mode 100644 (file)

index 0000000..71cc24f
--- /dev/null
+++ b/src/punycode.zig
@@ -0,0 +1,319 @@
+const std = @import("std");
+const unicode = @import("std").unicode;
+const ArrayList = std.array_list.Managed;
+const expectEqual = std.testing.expectEqual;
+const expectEqualSlices = std.testing.expectEqualSlices;
+
+const base = 36;
+
+const tmin = 1;
+const tmax = 26;
+
+const skew = 38;
+const damp = 700;
+
+const initialBias = 72;
+const initialN = 128;
+
+fn adapt(delta: usize, numpoints: usize, firstTime: bool) usize {
+    var newDelta: usize = if (firstTime) (delta / damp) else (delta >> 1);
+    newDelta += newDelta / numpoints; 
+
+    var k: u32 = 0;
+    const deltaCond: u32 = (base - tmin) * tmax / 2;
+    while (newDelta > deltaCond) : (k += base) {
+        newDelta = newDelta / (base - tmin);
+    }
+    return k + (((base - tmin + 1) * newDelta) / (newDelta + skew));
+}
+
+fn encodeDigit(i: usize) u8 {
+    if (i < 26) {
+        return @truncate(i + 97); // ascii code of 'a'
+    } else {
+        return @truncate(i + 22); // ascii code of '0' - 26
+    }
+}
+
+fn decodeDigit(cp: u21) u21 {
+    if (cp - 48 < 10) {
+        return cp - 22;
+    } else if (cp - 65 < 26) {
+        return cp - 65;
+    } else if (cp - 97 < 26) {
+        return cp - 97;
+    } else return base;
+}
+
+test "decode digit" {
+    try expectEqual(@as(u21, 0), decodeDigit('a'));
+    try expectEqual(@as(u21, 0), decodeDigit('A'));
+    try expectEqual(@as(u21, 25), decodeDigit('z'));
+    try expectEqual(@as(u21, 26), decodeDigit('0'));
+    try expectEqual(@as(u21, 35), decodeDigit('9'));
+    try expectEqual(@as(u21, 0), decodeDigit(65));
+}
+
+pub fn encode(alloc: std.mem.Allocator, input: []const u21) !ArrayList(u8) {
+    var result = ArrayList(u8).init(alloc);
+
+    for (input) |c| {
+        if (c < initialN) {
+            try result.append(@truncate(c));
+        }
+    }
+
+    const numOfBasics = result.items.len;
+
+    if (numOfBasics > 0) {
+        try result.append('-');
+    }
+
+    var bias: usize = initialBias;
+    var delta: usize = 0;
+    var n: usize = initialN;
+    var h: usize = numOfBasics; 
+
+    while (h < input.len) {
+        var m: usize = std.math.maxInt(usize);
+
+        for (input) |c| {
+            if (c >= n and c < m) {
+                m = c; 
+            }
+        }
+
+        delta += (m - n) * (h + 1);
+        n = m;
+
+        for (input) |c| {
+            if (c < n) {
+                delta += 1;
+            }
+
+            if (c == n) {
+                var k: usize = base;
+                var q: usize = delta;
+
+                while (true) : (k += base) {
+                    const t = if (k <= bias + tmin) tmin else (if (k >= bias + tmax) tmax else (k - bias)); 
+                    if (q < t) break;
+
+                    const newChar: usize = t + ((q - t) % (base - t)); 
+
+                    try result.append(encodeDigit(newChar));
+                    q = (q - t) / (base - t);
+                }
+
+                try result.append(encodeDigit(q));
+
+                bias = adapt(delta, h + 1, h == numOfBasics);
+                delta = 0;
+                h += 1;
+            } 
+        }
+
+        delta += 1;
+        n += 1;
+    }
+
+    return result;
+}
+
+const DecodeError = error{
+    BadInput,
+};
+
+pub fn decode(alloc: std.mem.Allocator, input: []const u8) error{BadInput, OutOfMemory}!ArrayList(u21) {
+    var result = ArrayList(u21).init(alloc);
+
+    var b: usize = 0;
+    for (input, 0..) |c, i| {
+        if (c == '-') {
+            b = i;
+        }
+    }
+
+    var j: usize = 0;
+    while (j < b) : (j += 1) {
+        if (input[j] >= initialN) {
+            return error.BadInput;
+        }
+
+        try result.append(input[j]);
+    }
+
+    var n: usize = initialN;
+    var i: usize = 0;
+    var bias: usize = initialBias;
+
+    var in = if (0 < b) b + 1 else 0; 
+    while (in < input.len) {
+        const oldI: usize = i;
+        var w: usize = 1;
+        var k: usize = base;
+
+        while(true) : (k += base) {
+            if (in >= input.len) {
+                return DecodeError.BadInput;
+            }
+
+            const digit = decodeDigit(input[in]);
+            in += 1;
+
+            if (digit >= base) {
+                return DecodeError.BadInput;
+            }
+
+            i += digit * w;
+
+            const t = if (k <= bias) tmin else (if (k >= bias + tmax) tmax else (k - bias)); 
+
+            if (digit < t) break;
+
+            w *= base - t;
+        }
+
+        const resultSize = result.items.len;
+        bias = adapt(i - oldI, resultSize + 1, oldI == 0);
+        n += i / (resultSize + 1);
+        i %= (resultSize + 1);
+
+        try result.insert(i, @truncate(n));
+        i += 1;
+    }
+
+    return result;
+}
+
+// tests
+
+test {
+    const alloc = std.testing.allocator;
+
+    var input = ArrayList(u21).init(alloc);
+    defer input.deinit();
+
+    var utf8 = (try std.unicode.Utf8View.init("3年b組金八先生")).iterator();
+    while (utf8.nextCodepoint()) |codepoint| {
+        try input.append(codepoint);
+    }
+
+    const encoded = try encode(alloc, input.items); 
+    defer encoded.deinit();
+
+    try expectEqualSlices(u8, encoded.items, "3b-ww4c5e180e575a65lsy2b");
+}
+
+test {
+    const alloc = std.testing.allocator;
+
+    const decoded = try decode(alloc, "3b-ww4c5e180e575a65lsy2b");
+    defer decoded.deinit();
+
+    try expectEqualSlices(u21, &.{ 0x0033, 0x5E74, 0x0062, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F }, decoded.items);
+}
+
+const testValues = [_]struct {[]const u21, []const u8}
+    {   // Arabic (Egyptian):
+        .{ &.{ 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644,
+               0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F } , "egbpdaj6bu4bxfgehfvwxn" },
+
+        // Chinese (simplified):
+        .{ &.{ 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587} , "ihqwcrb4cv8a8dqg056pqjye" },
+
+        // Chinese (traditional):
+        .{ &.{ 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587} , "ihqwctvzc91f659drss3x8bo0yb" },
+
+        // Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
+        .{ &.{ 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074,
+                0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D,
+                0x0065, 0x0073, 0x006B, 0x0079 } , "Proprostnemluvesky-uyb24dma41a" },
+
+        // Hebrew:
+        .{ &.{ 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8,
+                0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2,
+                0x05D1, 0x05E8, 0x05D9, 0x05EA } , "4dbcagdahymbxekheh6e0a7fei0b" },
+
+        // Hindi (Devanagari):
+        .{ &.{ 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D,
+                0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939,
+                0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947,
+                0x0939, 0x0948, 0x0902 } , "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" },
+
+        // Japanese (kanji and hiragana):
+        .{ &.{ 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092,
+                0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B } , "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" },
+
+        // Korean (Hangul syllables): 
+        .{ &.{ 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
+                0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
+                0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C } , "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" },
+
+        // Russian (Cyrillic):
+        .{ &.{ 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E,
+                0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440,
+                0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A,
+                0x0438 } , "b1abfaaepdrnnbgefbadotcwatmq2g4l" },
+
+        // Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol:
+        .{ &.{ 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070,
+                0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070,
+                0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061,
+                0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070,
+                0x0061, 0x00F1, 0x006F, 0x006C } , "PorqunopuedensimplementehablarenEspaol-fmd56a" },
+
+        // Vietnamese: T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch<ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t:
+        .{ &.{ 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B,
+                0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068,
+                0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067,
+                0x0056, 0x0069, 0x1EC7, 0x0074 } , "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" },
+
+        // 3<nen>B<gumi><kinpachi><sensei>
+        .{ &.{ 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F } , "3B-ww4c5e180e575a65lsy2b"},
+
+
+        // <amuro><namie>-with-SUPER-MONKEYS 
+        .{ &.{ 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074,
+                0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D,
+                0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053 } , "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"},
+
+        // Hello-Another-Way-<sorezore><no><basho>
+        .{ &.{ 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F,
+                0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D,
+                0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240 } , "Hello-Another-Way--fc4qua05auwb3674vfr0b"},
+
+        // <hitotsu><yane><no><shita>2 
+        .{ &.{ 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032 } , "2-u9tlzr9756bt3uc0v"},
+
+        // Maji<de>Koi<suru>5<byou><mae>
+        .{ &.{ 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059,
+                0x308B, 0x0035, 0x79D2, 0x524D } , "MajiKoi5-783gue6qz075azm5e"},
+
+        // <pafii>de<runba> 
+        .{ &.{ 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0 } , "de-jg4avhby1noc0d" },
+
+        // <sono><supiido><de>
+        .{ &.{ 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067 } , "d9juau41awczczp" },
+
+        // -> $1.00 <- 
+        .{ &.{ 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030, 0x0020, 0x003C, 0x002D } , "-> $1.00 <--" }
+
+    };
+
+test {
+    const alloc = std.testing.allocator;
+
+    for (testValues) |value| {
+        const encoded = try encode(alloc, value.@"0"); 
+        defer encoded.deinit();
+
+        try expectEqualSlices(u8, value.@"1", encoded.items);
+
+        const decoded = try decode(alloc, encoded.items); 
+        defer decoded.deinit();
+
+        try expectEqualSlices(u21, value.@"0", decoded.items);
+    }
+}
+
author	Evgenii Akentev <github@xn--bdkaa.com>
	Fri, 17 Oct 2025 15:04:36 +0000 (19:04 +0400)
committer	Evgenii Akentev <hi@xn--bdkaa.com>
	Tue, 21 Oct 2025 16:18:36 +0000 (20:18 +0400)
.gitignore	[new file with mode: 0644]	patch \| blob
LICENSE	[new file with mode: 0644]	patch \| blob
README.md	[new file with mode: 0644]	patch \| blob
build.zig	[new file with mode: 0644]	patch \| blob
build.zig.zon	[new file with mode: 0644]	patch \| blob
src/punycode.zig	[new file with mode: 0644]	patch \| blob