• huf [he/him]
    ·
    17 days ago

    extremely similar tool if you dont want to open a browser:

    #!/usr/bin/perl
    use strict;
    use warnings;
    
    our $VERSION = '0.4';
    use Encode qw/decode_utf8 encode_utf8/;
    use Unicode::UCD qw/charinfo prop_invmap/;
    use List::Util qw/max sum/;
    use Getopt::Long qw/:config no_ignore_case/;
    
    binmode STDOUT, ':encoding(UTF-8)';
    
    GetOptions
        'char|c' => \my $opt_char,
        'decimal|d!' => \my $opt_decimal,
        'hex|x!' => \my $opt_hex,
        'hex-escape|X!' => \my $opt_hex_escaped,
        'name|n!' => \my $opt_name,
        'ascii|a!' => \my $opt_ascii,
        'help|h!' => \my $opt_help,
        'version|v!' => \my $opt_version
            or die usage();
    
    die __FILE__ =~ s{.*/}{}r . " v$VERSION\n" if $opt_version;
    
    my $set_opt_count = sum map { $_ // 0 } $opt_char, $opt_decimal, $opt_hex, $opt_hex_escaped, $opt_name, $opt_help;
    
    die usage("Given options exclude each other") if $set_opt_count > 1;
    
    die usage() if $opt_help || !@ARGV;
    
    $opt_char = 1 if $set_opt_count == 0;
    
    my @args;
    if ($opt_decimal) {
        @args = [ map { chr } @ARGV ];
    }
    elsif ($opt_hex) {
        @args = [ map { chr hex $_ } @ARGV ];
    }
    elsif ($opt_hex_escaped) {
        @args = map { [ split //, decode_utf8_or_not($opt_ascii, pack 'H*', join '', /(?:\\x)?([0-9a-f]+)/gi) ] } @ARGV;
    }
    elsif ($opt_name) {
        @args = lookup_by_name(@ARGV);
    }
    elsif ($opt_char) {
        @args = map { [ split //, decode_utf8_or_not($opt_ascii, $_) ] } @ARGV;
    }
    else {
        die usage("No option passed, do not know what to do");
    }
    
    my @data;
    my $codepoint_maxlen = 4;
    my $utf8_maxlen = 2;
    my $name_maxlen = 1;
    
    my $nrun = 0;
    for my $run (@args) {
        push @data, undef if $nrun++ > 0;
    
        for my $char (@$run) {
            my $codepoint = ord $char;
            my $charinfo = charinfo $codepoint;
            my $name = $charinfo->{name} || "NONEXISTENT CHAR";
            my $utf8 = join ' ', map { sprintf "%x", ord $_ } split //, encode_utf8 $char;
            $codepoint_maxlen = max $codepoint_maxlen, length sprintf '%X', $codepoint;
            $utf8_maxlen = max $utf8_maxlen, length $utf8;
            $name_maxlen = max $name_maxlen, length $name;
    
            push @data, {
                char => $name eq '<control>'
                    ? join '', map "\\x$_", split ' ', $utf8
                    : $char,
                codepoint => $codepoint,
                utf8 => $utf8,
                name => $name,
            };
        }
    }
    
    for my $line (@data) {
        if ($line) {
            printf
                "U+%0${codepoint_maxlen}X (%-${utf8_maxlen}s): %-${name_maxlen}s [%s]\n",
                    $line->{codepoint},
                    $line->{utf8},
                    $line->{name},
                    $line->{char};
        }
        else {
            print "\n";
        }
    }
    
    sub usage {
        my $name = __FILE__ =~ s{.*/}{}r;
        print "@_\n" if @_;
        <<~"EOS";
        $name [options] [mode] ...
            modes:
            -c <literal string> ... (this is the default)
            -d <decimal code point> ...
            -x <hexadecimal code point> ...
            -X <string containing only hexadecimal escapes \\xHH> ...
            -n <character name fragment> ...
            options:
            -a - treat input as bytes instead of utf8
            -h - this help
            -v - version
        v$VERSION
        EOS
    }
    
    sub lookup_by_name {
        my @search_terms = @_;
    
        my %cp;
        # All codepoints
        for my $cat (qw(Name Name_Alias)) {
            my ($codepoints, $names, $format, $default) = prop_invmap($cat);
            # $format => "n", $default => ""
            for my $i (0 .. @$codepoints - 2) {
                my ($cp, $n) = ($codepoints->[$i], $names->[$i]);
                # If $n is a ref, the same codepoint has multiple names
                for my $name (ref $n ? @$n : $n) {
                    $cp{$name} //= $cp;
                }
            }
        }
    
        my @names = keys %cp;
        for my $term (@search_terms) {
            @names = grep { /$term/i } @names;
        }
    
        return [ map chr, sort { $a <=> $b } @cp{@names} ];
    }
    
    sub decode_utf8_or_not {
        my ($no_dont_do_it, $string) = @_;
        return $no_dont_do_it ? $string : decode_utf8($string);
    }
    
      • huf [he/him]
        ·
        17 days ago

        it's a lexical alias to a package variable. super simple stuff.

    • m_f@midwest.social
      hexagon
      ·
      17 days ago

      Thanks! Now I'm curious, do you write Perl regularly? I don't see too much of it, my sense is that it's still around in sysadmin type work though

      • huf [he/him]
        ·
        17 days ago

        i write little things like the above in it for myself, but i've never used it much at work. but yeah, it's long past its peak and unlikely to ever recover, although it does still have a community using it and writing new libraries for it, maintaining and developing the core language.

        it's a great language, i love it.