1 module hunt.net.util.UrlEncoded;
2 
3 import hunt.collection;
4 
5 import hunt.text.Charset;
6 import hunt.Exceptions;
7 import hunt.text.Common;
8 import hunt.util.StringBuilder;
9 import hunt.util.ConverterUtils;
10 
11 import hunt.logging.ConsoleLogger;
12 
13 import std.conv;
14 import std.array;
15 
16 
17 /**
18  * Handles coding of MIME "x-www-form-urlencoded".
19  * <p>
20  * This class handles the encoding and decoding for either the query string of a
21  * URL or the _content of a POST HTTP request.
22  * </p>
23  * <b>Notes</b>
24  * <p>
25  * The UTF-8 charset is assumed, unless otherwise defined by either passing a
26  * parameter or setting the "org.hunt.utils.UrlEncoding.charset" System
27  * property.
28  * </p>
29  * <p>
30  * The hashtable either contains string single values, vectors of string or
31  * arrays of Strings.
32  * </p>
33  * <p>
34  * This class is only partially synchronised. In particular, simple get
35  * operations are not protected from concurrent updates.
36  * </p>
37  *
38  */
39 class UrlEncoded  : MultiMap!string { 
40     
41     enum string ENCODING = StandardCharsets.UTF_8;
42 
43 
44     this() {
45     }
46 
47     this(string query) {
48         decodeTo(query, this, ENCODING);
49     }
50 
51     void decode(string query) {
52         decodeTo(query, this, ENCODING);
53     }
54 
55     void decode(string query, string charset) {
56         decodeTo(query, this, charset);
57     }
58 
59     /**
60      * Encode MultiMap with % encoding for UTF8 sequences.
61      *
62      * @return the MultiMap as a string with % encoding
63      */
64     string encode() {
65         return encode(ENCODING, true);
66     }
67 
68     string encode(bool equalsForNullValue) {
69         return encode(ENCODING, equalsForNullValue);
70     }
71 
72     /**
73      * Encode MultiMap with % encoding for arbitrary string sequences.
74      *
75      * @param charset the charset to use for encoding
76      * @return the MultiMap as a string encoded with % encodings
77      */
78     string encode(string charset) {
79         return encode(charset, false);
80     }
81 
82     /**
83      * Encode MultiMap with % encoding.
84      *
85      * @param charset            the charset to encode with
86      * @param equalsForNullValue if True, then an '=' is always used, even
87      *                           for parameters without a value. e.g. <code>"blah?a=&amp;b=&amp;c="</code>.
88      * @return the MultiMap as a string encoded with % encodings
89      */
90     string encode(string charset, bool equalsForNullValue) {
91         return encode(this, charset, equalsForNullValue);
92     }
93 
94     /**
95      * Encode MultiMap with % encoding.
96      *
97      * @param map                the map to encode
98      * @param charset            the charset to use for encoding (uses default encoding if null)
99      * @param equalsForNullValue if True, then an '=' is always used, even
100      *                           for parameters without a value. e.g. <code>"blah?a=&amp;b=&amp;c="</code>.
101      * @return the MultiMap as a string encoded with % encodings.
102      */
103     static string encode(MultiMap!string map, string charset, bool equalsForNullValue) {
104         if (charset is null)
105             charset = ENCODING;
106 
107         StringBuilder result = new StringBuilder(128);
108 
109         bool delim = false;
110         foreach(string key, List!string list; map)
111         {
112             int s = 0;
113             if(list !is null)
114                 s = list.size();
115 
116             if (delim) {
117                 result.append('&');
118             }
119 
120             if (s == 0) {
121                 result.append(encodeString(key, charset));
122                 if (equalsForNullValue)
123                     result.append('=');
124             } else {
125                 for (int i = 0; i < s; i++) {
126                     if (i > 0)
127                         result.append('&');
128                     string val = list.get(i);
129                     result.append(encodeString(key, charset));
130 
131                     if (val !is null) {
132                         if (val.length > 0) {
133                             result.append('=');
134                             result.append(encodeString(val, charset));
135                         } else if (equalsForNullValue)
136                             result.append('=');
137                     } else if (equalsForNullValue)
138                         result.append('=');
139                 }
140             }
141             delim = true;
142         }
143         return result.toString();
144     }
145 
146     /**
147      * Decoded parameters to Map.
148      *
149      * @param content the string containing the encoded parameters
150      * @param map     the MultiMap to put parsed query parameters into
151      * @param charset the charset to use for decoding
152      */
153     static void decodeTo(string content, MultiMap!string map, string charset = ENCODING) {
154         if (charset.empty)
155             charset = ENCODING;
156 
157         synchronized (map) {
158             string key = null;
159             string value = null;
160             int mark = -1;
161             bool encoded = false;
162             for (int i = 0; i < content.length; i++) {
163                 char c = content[i];
164                 switch (c) {
165                     case '&':
166                         int l = i - mark - 1;
167                         value = l == 0 ? "" :
168                                 (encoded ? decodeString(content, mark + 1, l) : content.substring(mark + 1, i));
169                         mark = i;
170                         encoded = false;
171                         if (key !is null) {
172                             map.add(key, value);
173                         } else if (value !is null && value.length > 0) {
174                             map.add(value, "");
175                         }
176                         key = null;
177                         value = null;
178                         break;
179                     case '=':
180                         if (key !is null)
181                             break;
182                         key = encoded ? decodeString(content, mark + 1, i - mark - 1) : content.substring(mark + 1, i);
183                         mark = i;
184                         encoded = false;
185                         break;
186                     case '+':
187                         encoded = true;
188                         break;
189                     case '%':
190                         encoded = true;
191                         break;
192                     default: break;
193                 }
194             }
195 
196             int contentLen = cast(int)content.length;
197 
198             if (key !is null) {
199                 int l =  contentLen - mark - 1;
200                 value = l == 0 ? "" : (encoded ? decodeString(content, mark + 1, l) : content.substring(mark + 1));
201                 version(HUNT_HTTP_DEBUG) tracef("key=%s, value=%s", key, value);
202                 map.add(key, value);
203             } else if (mark < contentLen) {
204                 version(HUNT_HTTP_DEBUG) tracef("empty value: content=%s, key=%s", content, key);
205                 key = encoded
206                         ? decodeString(content, mark + 1, contentLen - mark - 1, charset)
207                         : content.substring(mark + 1);
208                 if (!key.empty) {
209                     map.add(key, "");
210                 }
211             } else {
212                 warningf("No key found.");
213             }
214         }
215     }
216 
217     /**
218      * Decode string with % encoding.
219      * This method makes the assumption that the majority of calls
220      * will need no decoding.
221      *
222      * @param encoded the encoded string to decode
223      * @return the decoded string
224      */
225     static string decodeString(string encoded) {
226         return decodeString(encoded, 0, cast(int)encoded.length);
227     }
228 
229     /**
230      * Decode string with % encoding.
231      * This method makes the assumption that the majority of calls
232      * will need no decoding.
233      *
234      * @param encoded the encoded string to decode
235      * @param offset  the offset in the encoded string to decode from
236      * @param length  the length of characters in the encoded string to decode
237      * @param charset the charset to use for decoding
238      * @return the decoded string
239      */
240     static string decodeString(string encoded, int offset, int length, string charset = ENCODING) {
241         StringBuilder buffer = null;
242 
243         for (int i = 0; i < length; i++) {
244             char c = encoded.charAt(offset + i);
245             if (c < 0 || c > 0xff) {
246                 if (buffer is null) {
247                     buffer = new StringBuilder(length);
248                     buffer.append(encoded, offset, offset + i + 1);
249                 } else
250                     buffer.append(c);
251             } else if (c == '+') {
252                 if (buffer is null) {
253                     buffer = new StringBuilder(length);
254                     buffer.append(encoded, offset, offset + i);
255                 }
256 
257                 buffer.append(' ');
258             } else if (c == '%') {
259                 if (buffer is null) {
260                     buffer = new StringBuilder(length);
261                     buffer.append(encoded, offset, offset + i);
262                 }
263 
264                 byte[] ba = new byte[length];
265                 int n = 0;
266                 while (c >= 0 && c <= 0xff) {
267                     if (c == '%') {
268                         if (i + 2 < length) {
269                             int o = offset + i + 1;
270                             i += 3;
271                             ba[n] = cast(byte) ConverterUtils.parseInt(encoded, o, 2, 16);
272                             n++;
273                         } else {
274                             ba[n++] = cast(byte) '?';
275                             i = length;
276                         }
277                     } else if (c == '+') {
278                         ba[n++] = cast(byte) ' ';
279                         i++;
280                     } else {
281                         ba[n++] = cast(byte) c;
282                         i++;
283                     }
284 
285                     if (i >= length)
286                         break;
287                     c = encoded.charAt(offset + i);
288                 }
289 
290                 i--;
291                 buffer.append(cast(string)(ba[0 .. n]));
292 
293             } else if (buffer !is null)
294                 buffer.append(c);
295         }
296 
297         if (buffer is null) {
298             if (offset == 0 && encoded.length == length)
299                 return encoded;
300             return encoded.substring(offset, offset + length);
301         }
302 
303         return buffer.toString();
304     }
305 
306 
307     /**
308      * Perform URL encoding.
309      *
310      * @param string the string to encode
311      * @return encoded string.
312      */
313     static string encodeString(string string) {
314         return encodeString(string, ENCODING);
315     }
316 
317     /**
318      * Perform URL encoding.
319      *
320      * @param string  the string to encode
321      * @param charset the charset to use for encoding
322      * @return encoded string.
323      */
324     static string encodeString(string str, string charset) {
325         if (charset is null)
326             charset = ENCODING;
327         byte[] bytes = cast(byte[])str;
328         // bytes = string.getBytes(charset);
329 
330         int len = cast(int)bytes.length;
331         byte[] encoded = new byte[bytes.length * 3];
332         int n = 0;
333         bool noEncode = true;
334 
335         for (int i = 0; i < len; i++) {
336             byte b = bytes[i];
337 
338             if (b == ' ') {
339                 noEncode = false;
340                 encoded[n++] = cast(byte) '+';
341             } else if (b >= 'a' && b <= 'z' ||
342                     b >= 'A' && b <= 'Z' ||
343                     b >= '0' && b <= '9') {
344                 encoded[n++] = b;
345             } else {
346                 noEncode = false;
347                 encoded[n++] = cast(byte) '%';
348                 byte nibble = cast(byte) ((b & 0xf0) >> 4);
349                 if (nibble >= 10)
350                     encoded[n++] = cast(byte) ('A' + nibble - 10);
351                 else
352                     encoded[n++] = cast(byte) ('0' + nibble);
353                 nibble = cast(byte) (b & 0xf);
354                 if (nibble >= 10)
355                     encoded[n++] = cast(byte) ('A' + nibble - 10);
356                 else
357                     encoded[n++] = cast(byte) ('0' + nibble);
358             }
359         }
360 
361         if (noEncode)
362             return str;
363 
364         return cast(string)(encoded[0 .. n]);
365     }
366 }