Decode characters that are ampersanded

So I have an encoded text coming from a webhook that contains encoded characters with ambersand and HTML :

- é 
- '
-  
- <br/>

And I need it to be decoded somehow, or at least readable!
There these functions:

  • stripHtml : which is nice, it removes all HTML tags such as the
  • decodeURL : It’s unfortunately not an URL and it doesn’t decode it
  • ascii : I don’t think it’s the function to use here, the characters are already ascii

Any ideas what I could try to do here?
I guess my last shot is to replace every characters I may encounter but it might be a bit complex!

@Timothe_Viot :raised_hands:

Hi there! I’m an AI that helps with customer support. It sounds like you’re having trouble decoding some encoded text. Can you tell me more about the text you’re trying to decode? What type of encoding is it? Are you using any specific tools or libraries to decode it? Stay tuned for more help!

Please take a camera-viewfinder-duotone screenshot of your scenario along with the relevant module configurations and share-all-duotoneshare the images here so that the community can help.

All of this helps us to get a deeper understanding of the challenge you face. :make:

Hi. Maybe you must combine all functions, including write a regex.


Thanks, Helio!
Wemakefuture
If you have questions reach out :wink:

1 Like

Hello, thank you for your answer! Unfortunately, I guess the regex will remove the characters. The idea is to actually translate them so

&eacute;

Should become

é

I’m not sure this is possible at the moment though.

2 Likes

There is an idea to allow this feature:

Related content:

1 Like

I made an IML function inspired by the project he (for “HTML entities”) (github.com), but unfortunately the limit of 5000 (5K) characters didn’t allow saving the entire code just for decode (unescape HTML).

I was allowed just a part, not the entire code of 60K lines, like the example below:

function utilDecodeHtml(s) {
/*https://mths.be/he*/var regexDecode = /&(ccedil|Ccedil|nbsp|lg|gl|eg|ne|Gt|GT|ii);|&(Aacute|Agrave|Atilde|Ccedil|Eacute|Egrave|Oacute|Ograve|Oslash|Otilde|Uacute|Ugrave|aacute|agrave|atilde|ccedil|curren|divide|eacute|egrave|iacute|igrave|iquest|oacute|ograve|oslash|otilde|Acirc|Aring|Ecirc|Icirc|Ocirc|Ucirc|acirc|acute|cedil|ecirc|icirc|ucirc|QUOT|euml|nbsp|quot|GT|LT|gt|lt)(?!;)([=a-zA-Z0-9]?)|&#([0-9]+)(;?)|&#[xX]([a-fA-F0-9]+)(;?)|&([0-9a-zA-Z]+)/g;var decodeMap = {'aacute':'\xE1','Aacute':'\xC1','acirc':'\xE2','Acirc':'\xC2','acute':'\xB4','aelig':'\xE6','AElig':'\xC6','agrave':'\xE0','Agrave':'\xC0','amp':'&','AMP':'&','aring':'\xE5','Aring':'\xC5','atilde':'\xE3','Atilde':'\xC3','auml':'\xE4','Auml':'\xC4','brvbar':'\xA6','ccedil':'\xE7','Ccedil':'\xC7','cedil':'\xB8','cent':'\xA2','copy':'\xA9','COPY':'\xA9','curren':'\xA4','deg':'\xB0','divide':'\xF7','eacute':'\xE9','Eacute':'\xC9','ecirc':'\xEA','Ecirc':'\xCA','egrave':'\xE8','Egrave':'\xC8','eth':'\xF0','ETH':'\xD0','euml':'\xEB','Euml':'\xCB','frac12':'\xBD','frac14':'\xBC','frac34':'\xBE','gt':'>','GT':'>','iacute':'\xED','Iacute':'\xCD','icirc':'\xEE','Icirc':'\xCE','iexcl':'\xA1','igrave':'\xEC','Igrave':'\xCC','iquest':'\xBF','iuml':'\xEF','Iuml':'\xCF','laquo':'\xAB','lt':'<','LT':'<','macr':'\xAF','micro':'\xB5','middot':'\xB7','nbsp':'\xA0','not':'\xAC','ntilde':'\xF1','Ntilde':'\xD1','oacute':'\xF3','Oacute':'\xD3','ocirc':'\xF4','Ocirc':'\xD4','ograve':'\xF2','Ograve':'\xD2','ordf':'\xAA','ordm':'\xBA','oslash':'\xF8','Oslash':'\xD8','otilde':'\xF5','Otilde':'\xD5','ouml':'\xF6','Ouml':'\xD6','para':'\xB6','plusmn':'\xB1','pound':'\xA3','quot':'"','QUOT':'"','raquo':'\xBB','reg':'\xAE','REG':'\xAE','sect':'\xA7','shy':'\xAD','sup1':'\xB9','sup2':'\xB2','sup3':'\xB3','szlig':'\xDF','thorn':'\xFE','THORN':'\xDE','times':'\xD7','uacute':'\xFA','Uacute':'\xDA','ucirc':'\xFB','Ucirc':'\xDB','ugrave':'\xF9','Ugrave':'\xD9','uml':'\xA8','uuml':'\xFC','Uuml':'\xDC','yacute':'\xFD','Yacute':'\xDD','yen':'\xA5','yuml':'\xFF'};var decodeMapNumeric = {'0':'\uFFFD','128':'\u20AC','130':'\u201A','131':'\u0192','132':'\u201E','133':'\u2026','134':'\u2020','135':'\u2021','136':'\u02C6','137':'\u2030','138':'\u0160','139':'\u2039','140':'\u0152','142':'\u017D','145':'\u2018','146':'\u2019','147':'\u201C','148':'\u201D','149':'\u2022','150':'\u2013','151':'\u2014','152':'\u02DC','153':'\u2122','154':'\u0161','155':'\u203A','156':'\u0153','158':'\u017E','159':'\u0178'};var invalidReferenceCodePoints = [1,2,3,4,5,6,7,8,11,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,64976,64977,64978,64979,64980,64981,64982,64983,64984,64985,64986,64987,64988,64989,64990,64991,64992,64993,64994,64995,64996,64997,64998,64999,65000,65001,65002,65003,65004,65005,65006,65007,65534,65535,131070,131071,196606,196607,262142,262143,327678,327679,393214,393215,458750,458751,524286,524287,589822,589823,655358,655359,720894,720895,786430,786431,851966,851967,917502,917503,983038,983039,1048574,1048575,1114110,1114111];var stringFromCharCode = String.fromCharCode;var object={};var hasOwnProperty=object.hasOwnProperty;var has=function(r,a){return hasOwnProperty.call(r,a)};var contains=function(n,r){for(var t=-1,a=n.length;++t<a;)if(n[t]==r)return!0;return!1};var merge=function(r,n){if(!r)return n;var e,a={};for(e in n)a[e]=has(r,e)?r[e]:n[e];return a};let codePointToSymbol=function(e,r){let o="";return e>=55296&&e<=57343||e>1114111?(r&&parseError("character reference outside the permissible Unicode range"),"�"):has(decodeMapNumeric,e)?(r&&parseError("disallowed character reference"),decodeMapNumeric[e]):(r&&contains(invalidReferenceCodePoints,e)&&parseError("disallowed character reference"),e>65535&&(e-=65536,o+=stringFromCharCode(e>>>10&1023|55296),e=56320|1023&e),o+=stringFromCharCode(e),o)};var hexEscape=function(e){return"&#x"+e.toString(16).toUpperCase()+";"};var decEscape=function(c){return"&#"+c+";"};var parseError=function(r){throw Error("Parse error: "+r)};var decode=function(e,r){var a=(r=merge(r,decode.options)).strict;return a&&regexInvalidEntity.test(e)&&parseError("malformed character reference"),e.replace(regexDecode,(function(e,t,o,c,n,d,s,i,m){var p,l,f,y,b,E;return t?(b=t,decodeMap[b]):o?(b=o,(E=c)&&r.isAttributeValue?(a&&"="==E&&parseError("`&` did not start a character reference"),e):(a&&parseError("named character reference was not terminated by a semicolon"),decodeMap[b]+(E||""))):n?(f=n,l=d,a&&!l&&parseError("character reference was not terminated by a semicolon"),p=parseInt(f,10),codePointToSymbol(p,a)):s?(y=s,l=i,a&&!l&&parseError("character reference was not terminated by a semicolon"),p=parseInt(y,16),codePointToSymbol(p,a)):(a&&parseError("named character reference was not terminated by a semicolon"),e)}))};decode.options = {'isAttributeValue':false,'strict':false};
return decode(s);
}
3 Likes

Hi, any solution to this problem?