Skip to content

Xvezda

Analyzing CloudFlare email protection algorithm

analysis, cloudflare, javascript, cryptography2 min read

During the maintenance of website as usual; suddenly, CloudFlare's email protection caught my eyes.

1<!-- email -->
2<a href="/cdn-cgi/l/email-protection#bac2ccdfc0dedbfad4dbccdfc894d9d5d7" class="css-12vt1sm"><span class="__cf_email__" data-cfemail="116967746b7570517f706774633f727e7c">[email&#160;protected]</span></a>
3<!-- script -->
4<script data-cfasync="false" src="/cdn-cgi/scripts/5c5dd728/cloudflare-static/email-decode.min.js"></script>

Then I was curious how those hash-like looking hex character sequences works. 🤔

There must be some other people's article exists somewhere on the internet. But, It's much help and meaningful when I figure it out by my self. 😊

So, let's find out!

Prettify minimized code

First, I use beautifier.io to reformat minimized and single lined code. There is alternative programs such like prettier or JS Beautifier.

Screenshot of beautifier.io

Now the source code become from this

email-decode.min.js
1// cdn-cgi/scripts/cloudflare-static/email-decode.min.js
2!function(){"use strict";function e(e){try{if("undefined"==typeof console)return;"error"in console?console.error(e):console.log(e)}catch(e){}}function t(e){return d.innerHTML='<a href="'+e.replace(/"/g,"&quot;")+'"></a>',d.childNodes[0].getAttribute("href")||""}function r(e,t){var r=e.substr(t,2);return parseInt(r,16)}function n(n,c){for(var o="",a=r(n,c),i=c+2;i<n.length;i+=2){var l=r(n,i)^a;o+=String.fromCharCode(l)}try{o=decodeURIComponent(escape(o))}catch(u){e(u)}return t(o)}function c(t){for(var r=t.querySelectorAll("a"),c=0;c<r.length;c++)try{var o=r[c],a=o.href.indexOf(l);a>-1&&(o.href="mailto:"+n(o.href,a+l.length))}catch(i){e(i)}}function o(t){for(var r=t.querySelectorAll(u),c=0;c<r.length;c++)try{var o=r[c],a=o.parentNode,i=o.getAttribute(f);if(i){var l=n(i,0),d=document.createTextNode(l);a.replaceChild(d,o)}}catch(h){e(h)}}function a(t){for(var r=t.querySelectorAll("template"),n=0;n<r.length;n++)try{i(r[n].content)}catch(c){e(c)}}function i(t){try{c(t),o(t),a(t)}catch(r){e(r)}}var l="/cdn-cgi/l/email-protection#",u=".__cf_email__",f="data-cfemail",d=document.createElement("div");i(document),function(){var e=document.currentScript||document.scripts[document.scripts.length-1];e.parentNode.removeChild(e)}()}();

to this.

email-decode.js
1// cdn-cgi/scripts/cloudflare-static/email-decode.min.js
2! function() {
3 "use strict";
4
5 function e(e) {
6 try {
7 if ("undefined" == typeof console) return;
8 "error" in console ? console.error(e) : console.log(e)
9 } catch (e) {}
10 }
11
12 function t(e) {
13 return d.innerHTML = '<a href="' + e.replace(/"/g, "&quot;") + '"></a>', d.childNodes[0].getAttribute("href") || ""
14 }
15
16 function r(e, t) {
17 var r = e.substr(t, 2);
18 return parseInt(r, 16)
19 }
20
21 function n(n, c) {
22 for (var o = "", a = r(n, c), i = c + 2; i < n.length; i += 2) {
23 var l = r(n, i) ^ a;
24 o += String.fromCharCode(l)
25 }
26 try {
27 o = decodeURIComponent(escape(o))
28 } catch (u) {
29 e(u)
30 }
31 return t(o)
32 }
33
34 function c(t) {
35 for (var r = t.querySelectorAll("a"), c = 0; c < r.length; c++) try {
36 var o = r[c],
37 a = o.href.indexOf(l);
38 a > -1 && (o.href = "mailto:" + n(o.href, a + l.length))
39 } catch (i) {
40 e(i)
41 }
42 }
43
44 function o(t) {
45 for (var r = t.querySelectorAll(u), c = 0; c < r.length; c++) try {
46 var o = r[c],
47 a = o.parentNode,
48 i = o.getAttribute(f);
49 if (i) {
50 var l = n(i, 0),
51 d = document.createTextNode(l);
52 a.replaceChild(d, o)
53 }
54 } catch (h) {
55 e(h)
56 }
57 }
58
59 function a(t) {
60 for (var r = t.querySelectorAll("template"), n = 0; n < r.length; n++) try {
61 i(r[n].content)
62 } catch (c) {
63 e(c)
64 }
65 }
66
67 function i(t) {
68 try {
69 c(t), o(t), a(t)
70 } catch (r) {
71 e(r)
72 }
73 }
74 var l = "/cdn-cgi/l/email-protection#",
75 u = ".__cf_email__",
76 f = "data-cfemail",
77 d = document.createElement("div");
78 i(document),
79 function() {
80 var e = document.currentScript || document.scripts[document.scripts.length - 1];
81 e.parentNode.removeChild(e)
82 }()
83}();

Much cleaner and easy to read. 😄

Renaming all the symbols

sed ing all the characters could also work but, I wanted to make it sure replacing by it's context. There comes vscode's Rename Symbol feature was super helpful.

Other IDE programs would also have similar feature, usually named "Refactoring".

After rename all the symbols, I removed redundant codes and add some comments for readability. Here is the modified result I made.

1! function() { // IIFE (Immediately Invoked Function Expressions)
2 // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Strict_mode
3 "use strict";
4 var emailProtectionUrl = "/cdn-cgi/l/email-protection#",
5 protectionClassSelector = ".__cf_email__",
6 dataCfmailAttribute = "data-cfemail",
7 virtualDiv = document.createElement("div");
8
9 function logError(message) {
10 try {
11 // Elegantly handle log messages
12 if ("undefined" == typeof console) return;
13 "error" in console ? console.error(message) : console.log(message)
14 } catch (e) {}
15 }
16
17 function getEscapedAddress(address) {
18 // Create link inside div
19 virtualDiv.innerHTML = '<a href="' + address.replace(/"/g, "&quot;") + '"></a>';
20 // Return created link address
21 return virtualDiv.childNodes[0].getAttribute("href") || ""
22 }
23
24 function hexToInt(text, offset) {
25 // Get two characters from offset
26 var hexstr = text.substr(offset, 2);
27 // Pretend to hexadecimal value
28 return parseInt(hexstr, 16)
29 }
30
31 function decodeData(enc, keyOffset) {
32 for (var dec = "", key = hexToInt(enc, keyOffset), i = keyOffset + 2; i < enc.length; i += 2) {
33 // Decode with xor
34 var l = hexToInt(enc, i) ^ key;
35 // Build string
36 dec += String.fromCharCode(l)
37 }
38 try {
39 dec = decodeURIComponent(escape(dec))
40 } catch (e) {
41 logError(e)
42 }
43 return getEscapedAddress(dec)
44 }
45
46 function restoreProtectionEmailLink(element) {
47 // Iterate through links
48 for (var allLinks = element.querySelectorAll("a"), i = 0; i < allLinks.length; i++) {
49 try {
50 var link = allLinks[i],
51 protectionUrlIndex = link.href.indexOf(emailProtectionUrl);
52 if (protectionUrlIndex > -1) { // Has url
53 link.href = "mailto:" + decodeData(link.href,
54 protectionUrlIndex + emailProtectionUrl.length /* Index after protection URL */);
55 }
56 } catch (e) {
57 logError(e)
58 }
59 }
60 }
61
62 function restoreEmailTextNode(element) {
63 for (var protectedEmails = element.querySelectorAll(protectionClassSelector), i = 0; i < protectedEmails.length; i++) {
64 try {
65 var protectedEmail = protectedEmails[i],
66 protectedEmailParent = protectedEmail.parentNode,
67 dataCfemail = protectedEmail.getAttribute(dataCfmailAttribute);
68 if (dataCfemail) {
69 var email = decodeData(dataCfemail, 0),
70 emailText = document.createTextNode(email);
71 // Replace protected email text to origianl
72 protectedEmailParent.replaceChild(emailText, protectedEmail)
73 }
74 } catch (e) {
75 logError(e)
76 }
77 }
78 }
79
80 function templateRecursiveApply(element) {
81 // Find all template tags, initialize from templates content DOM
82 for (var allTemplateTags = element.querySelectorAll("template"), i = 0; i < allTemplateTags.length; i++) {
83 try {
84 initialize(allTemplateTags[i].content)
85 } catch (e) {
86 logError(e)
87 }
88 }
89 }
90
91 function initialize(element) {
92 try {
93 restoreProtectionEmailLink(element);
94 restoreEmailTextNode(element);
95 templateRecursiveApply(element);
96 } catch (e) {
97 logError(e)
98 }
99 }
100
101 initialize(document);
102 // Get this script
103 // https://developer.mozilla.org/en-US/docs/Web/API/Document/currentScript
104 var self = document.currentScript || document.scripts[document.scripts.length - 1];
105 // Remove this script
106 self.parentNode.removeChild(self)
107}();

Let's break down line by line.

Immediately Invoked Function Expressions

The syntax starting with exclamation mark is called IIFE (Immediately Invoked Function Expressions).

1!function() {
2// ...
3}()

When you call anonymous function immediately, it will raise syntax error.

1function(){}()

Uncaught SyntaxError: Function statements require a function name

But with leading exclamation mark it will work just fine. Other chracters such as + and - also works, but ! is most common.

There is another common way to IIFE; by using parentheses.

1(function() {
2// ...
3})()

Then why IIFE is used? It's because of the behavior of javascript; always put variables in the global scope by default. It basically massup your global scope and cause many negative side effects.

So, many libraries written in javascript use IIFE to prevent this. Thanksfully, there is also new spec of HTML5 attribute type=module given to script tag will make "file based scoping" with out any explicit IIFE.

logError

logError() is basically logger to log error messages. It will log message using console.error() if exists, otherwise console.log() will be used.

Wrapped up with try {} catch(e) {} clause to prevent abort by unexpected error.

1function logError(message) {
2 try {
3 // Elegantly handle log messages
4 if ("undefined" == typeof console) return;
5 "error" in console ? console.error(message) : console.log(message)
6 } catch (e) {}
7}

getEscapedAddress

This is very interesting function. It uses invisible tag (which I named as virtualDiv) to create unrelative path(absolute) and escaped URL address.

1function getEscapedAddress(address) {
2 // Create link inside div
3 virtualDiv.innerHTML = '<a href="' + address.replace(/"/g, "&quot;") + '"></a>';
4 // Return created link address
5 return virtualDiv.childNodes[0].getAttribute("href") || ""
6}

hexToInt

This function slice 2 characters from offset, then parse it from hexadecimal to decimal (0x00-0xff).

1function hexToInt(text, offset) {
2 // Get two characters from offset
3 var hexstr = text.substr(offset, 2);
4 // Pretend to hexadecimal value
5 return parseInt(hexstr, 16)
6}

decodeData

Now the main part. It is basically xor decryption which encrypting and decrypting method are the same. It takes first two character as the key, then increase offset by two every loop, decode hex values by xoring with key.

1function decodeData(enc, keyOffset) {
2 for (var dec = "", key = hexToInt(enc, keyOffset), i = keyOffset + 2; i < enc.length; i += 2) {
3 // Decode with xor
4 var l = hexToInt(enc, i) ^ key;
5 // Build string
6 dec += String.fromCharCode(l)
7 }
8 try {
9 dec = decodeURIComponent(escape(dec))
10 } catch (e) {
11 logError(e)
12 }
13 return getEscapedAddress(dec)
14}

Here is the simple python script I wrote to decode hex strings by using this algorithm.

decode_cfemail.py
1# -*- coding: utf-8 -*-
2
3def decode_cfemail(enc):
4 from textwrap import wrap
5 res = ''
6 key = int(enc[:2], 16)
7 for v in wrap(enc[2:], 2):
8 res += chr(int(v, 16) ^ key)
9 return res
10
11"""
12e.g.
13 `<span class="__cf_email__" data-cfemail="ee96988b948a8fae808f988b9cc08d8183">[email&#160;protected]</span>`
14
15 >>> decode_cfemail('ee96988b948a8fae808f988b9cc08d8183')
17"""

And also, encoding script.

encode_cfemail.py
1# -*- coding: utf-8 -*-
2
3from __future__ import print_function
4
5
6def encode_cfemail(email):
7 def hexpad(char):
8 return char[2:].zfill(2)
9
10 from random import random as rand
11
12 # Generate random key
13 key = int(rand() * (0xff+1) + 1) # 0x01 ~ 0xff
14 # Prefix key
15 enc = hexpad(hex(key))
16 for c in email:
17 # Build string
18 enc += hexpad(hex(ord(c) ^ key))
19 return enc
20
21"""
22e.g.
23 >>> encode_cfemail('[email protected]')
24 'cdb5bba8b7a9ac8da3acbba8bfe3aea2a0'
25"""

restoreProtectionEmailLink

This function restore every hyperlinks which contains CloudFlare's email protection data.

1function restoreProtectionEmailLink(element) {
2 // Iterate through links
3 for (var allLinks = element.querySelectorAll("a"), i = 0; i < allLinks.length; i++) {
4 try {
5 var link = allLinks[i],
6 protectionUrlIndex = link.href.indexOf(emailProtectionUrl);
7 if (protectionUrlIndex > -1) { // Has url
8 link.href = "mailto:" + decodeData(link.href,
9 protectionUrlIndex + emailProtectionUrl.length /* Index after protection URL */);
10 }
11 } catch (e) {
12 logError(e)
13 }
14 }
15}

restoreEmailTextNode

Select every protected email elements, if data-cfemail exists; use data to restore child textnode of elements.

1function restoreEmailTextNode(element) {
2 for (var protectedEmails = element.querySelectorAll(protectionClassSelector), i = 0; i < protectedEmails.length; i++) {
3 try {
4 var protectedEmail = protectedEmails[i],
5 protectedEmailParent = protectedEmail.parentNode,
6 dataCfemail = protectedEmail.getAttribute(dataCfmailAttribute);
7 if (dataCfemail) {
8 var email = decodeData(dataCfemail, 0),
9 emailText = document.createTextNode(email);
10 // Replace protected email text to origianl
11 protectedEmailParent.replaceChild(emailText, protectedEmail)
12 }
13 } catch (e) {
14 logError(e)
15 }
16 }
17}

templateRecursiveApply

If website is using template tags to manipulate DOMs, this function will also apply email protection decoding to template's contained elements.

1function templateRecursiveApply(element) {
2 // Find all template tags, initialize from templates content DOM
3 for (var allTemplateTags = element.querySelectorAll("template"), i = 0; i < allTemplateTags.length; i++) {
4 try {
5 initialize(allTemplateTags[i].content)
6 } catch (e) {
7 logError(e)
8 }
9 }
10}

initialize

Entry of email decoding script. All things starts from here.

1function initialize(element) {
2 try {
3 modifyProtectionLink(element);
4 recoverEmails(element);
5 templateRecursiveApply(element);
6 } catch (e) {
7 logError(e)
8 }
9}
10
11initialize(document);

Remove itself

And finally, it will remove it self from DOM. It uses document.currentScript to specify it's own script tag, and document.scripts[document.scripts.length - 1] as a fallback.

1// Get this script
2// https://developer.mozilla.org/en-US/docs/Web/API/Document/currentScript
3var self = document.currentScript || document.scripts[document.scripts.length - 1];
4// Remove this script
5self.parentNode.removeChild(self)

Epilogue

That's it!

It was much easier to analysis than I thought, but also I could learn very useful knowledge and skills from it.

NOTE: I'm not such a good english speaker. If you find any mistakes, typos, awkward grammers or what ever; Please let me know, leave the feedback below comment section. It will really helpful for improving my blog posts quality.

Thanks for reading. 😃

:wq