If you want only an simple converter, you can use the following filter
perl -CSDA -nle 'printf "\\u%*v04x\n", "\\u",$_'
#or
perl -CSDA -nlE 'printf "\\u%04x",$_ for unpack "U*"'
like:
echo "इस परीक्षण के लिए है" | perl -CSDA -ne 'printf "\\u%*v04x\n", "\\u",$_'
#or
perl -CSDA -ne 'printf "\\u%*v04x\n", "\\u",$_' <<< "इस परीक्षण के लिए है"
prints:
\u0907\u0938\u0020\u092a\u0930\u0940\u0915\u094d\u0937\u0923\u0020\u0915\u0947\u0020\u0932\u093f\u090f\u0020\u0939\u0948\u000a
Unicode with surrogate pairs.
use strict;
use warnings;
use utf8;
use open qw(:std :utf8);
my $str = "if( \N{U+1F42A}+\N{U+1F410} == \N{U+1F41B} ){ \N{U+1F602} = \N{U+1F52B} } # ορισμός ";
print "$str\n";
for my $ch (unpack "U*", $str) {
if( $ch > 0xffff ) {
my $h = ($ch - 0x10000) / 0x400 + 0xD800;
my $l = ($ch - 0x10000) % 0x400 + 0xDC00;
printf "\\u%04x\\u%04x", $h, $l;
}
else {
printf "\\u%04x", $ch;
}
}
print "\n";
prints
if( 🐪+🐐 == 🐛 ){ 😂 = 🔫 } # ορισμός
\u0069\u0066\u0028\u0020\ud83d\udc2a\u002b\ud83d\udc10\u0020\u003d\u003d\u0020\ud83d\udc1b\u0020\u0029\u007b\u0020\ud83d\ude02\u0020\u003d\u0020\ud83d\udd2b\u0020\u007d\u0020\u0023\u0020\u03bf\u03c1\u03b9\u03c3\u03bc\u03cc\u03c2\u0020
Encode,Text::Unidecode).