1 module hunt.net.util.UrlEncoded;
2 
3 import hunt.collection;
4 
5 import hunt.text.Charset;
6 import hunt.Exceptions;
7 import hunt.text.Common;
8 import hunt.util.StringBuilder;
9 import hunt.util.ConverterUtils;
10 
11 import hunt.logging;
12 
13 import std.ascii;
14 import std.conv;
15 import std.array;
16 
17 
18 /* rfc1738:
19 
20    ...The characters ";",
21    "/", "?", ":", "@", "=" and "&" are the characters which may be
22    reserved for special meaning within a scheme...
23 
24    ...Thus, only alphanumerics, the special characters "$-_.+!*'(),", and
25    reserved characters used for their reserved purposes may be used
26    unencoded within a URL...
27 
28    For added safety, we only leave -_. unencoded.
29  */
30 private string urlEncode(string s, bool raw) {
31 
32     Appender!string sb;
33     sb.reserve(s.length * 3);
34 
35     foreach(char c; s) {
36 		if (!raw && c == ' ') {
37 			sb.put('+');
38 		} else if ((c < '0' && c != '-' && c != '.') ||
39 				(c < 'A' && c > '9') ||
40 				(c > 'Z' && c < 'a' && c != '_') ||
41 				(c > 'z' && (!raw || c != '~'))) {
42 			sb.put('%');
43 			sb.put(hexDigits[c >> 4]);
44 			sb.put(hexDigits[c & 15]);
45 		} else {
46 			sb.put(c);
47 		}
48     }
49 
50     return sb.data;
51 }
52 
53 
54 private string urlDecode(string str) {
55     Appender!string sb;
56     sb.reserve(str.length);
57 
58     size_t len = str.length;
59 	immutable(char) *data = str.ptr;
60 
61 	while (len--) {
62 		if (*data == '+') {
63 			sb.put(' ');
64 		}
65 		else if (*data == '%' && len >= 2 && isHexDigit(data[1])
66 				 && isHexDigit(data[2])) {
67             sb.put(cast(char)to!int(data[1..3], 16));
68 			data += 2;
69 			len -= 2;
70 		} else {
71             sb.put(*data);
72 		}
73 		data++;
74 	}
75 
76     return sb.data;
77 }
78 
79 
80 // unittest {
81 //     string s = `abcd 1234567890ABCD1234~!@#$%^&*()_+{}<>?:"[]\|';/.,`;
82 
83         // RFC1738
84 //     string r = urlEncode(s, false);
85 //     // abcd+1234567890ABCD1234%7E%21%40%23%24%25%5E%26%2A%28%29_%2B%7B%7D%3C%3E%3F%3A%22%5B%5D%5C%7C%27%3B%2F.%2C
86 
87 //     r = urlDecode(r);
88 //     assert(r == s);
89 
90         // RFC-3986    
91 //     r = urlEncode(s, true);
92 //     // abcd%201234567890ABCD1234~%21%40%23%24%25%5E%26%2A%28%29_%2B%7B%7D%3C%3E%3F%3A%22%5B%5D%5C%7C%27%3B%2F.%2C
93     
94 //     r = urlDecode(r);
95 //     writefln("Decode: %s", r);
96 //     assert(r == s);
97 
98 //     r = urlEncode("中 文", true);
99 //     // %E4%B8%AD%20%E6%96%87
100 // }
101 
102 
103 enum UrlEncodeStyle {
104     HtmlForm,
105     URI
106 }
107 
108 /**
109  * Handles coding of MIME "x-www-form-urlencoded".
110  * <p>
111  * This class handles the encoding and decoding for either the query string of a
112  * URL or the _content of a POST HTTP request.
113  * </p>
114  * <b>Notes</b>
115  * <p>
116  * The UTF-8 charset is assumed, unless otherwise defined by either passing a
117  * parameter or setting the "org.hunt.utils.UrlEncoding.charset" System
118  * property.
119  * </p>
120  * <p>
121  * The hashtable either contains string single values, vectors of string or
122  * arrays of Strings.
123  * </p>
124  * <p>
125  * This class is only partially synchronised. In particular, simple get
126  * operations are not protected from concurrent updates.
127  * </p>
128  * 
129  * See_Also:
130  *    https://www.w3.org/TR/REC-html40/interact/forms.html#h-17.13.4
131  *    https://stackoverflow.com/questions/996139/urlencode-vs-rawurlencode
132  */
133 class UrlEncoded  : MultiMap!string { 
134     
135     enum string ENCODING = StandardCharsets.UTF_8;
136 
137     private UrlEncodeStyle _encodeStyle = UrlEncodeStyle.URI;
138 
139 
140     this(UrlEncodeStyle encodeStyle = UrlEncodeStyle.URI) {
141         _encodeStyle = encodeStyle;
142     }
143     
144 
145     this(string query, UrlEncodeStyle encodeStyle = UrlEncodeStyle.URI) {
146         _encodeStyle = encodeStyle;
147         decode(query);
148     }
149 
150     UrlEncodeStyle encodeStyle() {
151         return _encodeStyle;
152     }
153 
154     /**
155      * Encode MultiMap with % encoding for UTF8 sequences.
156      *
157      * @return the MultiMap as a string with % encoding
158      */
159     string encode() {
160         return encode(true);
161     }
162 
163     /**
164      * Encode MultiMap with % encoding.
165      *
166      * @param charset            the charset to encode with
167      * @param equalsForNullValue if True, then an '=' is always used, even
168      *                           for parameters without a value. e.g. <code>"blah?a=&amp;b=&amp;c="</code>.
169      * @return the MultiMap as a string encoded with % encodings
170      */
171     string encode(bool equalsForNullValue) {
172 
173         StringBuilder result = new StringBuilder(128);
174 
175         bool delim = false;
176         foreach(string key, List!string list; this)
177         {
178             int s = 0;
179             if(list !is null)
180                 s = list.size();
181 
182             if (delim) {
183                 result.append('&');
184             }
185 
186             if (s == 0) {
187                 result.append(encodeString(key, _encodeStyle));
188                 if (equalsForNullValue)
189                     result.append('=');
190             } else {
191                 for (int i = 0; i < s; i++) {
192                     if (i > 0)
193                         result.append('&');
194                     string val = list.get(i);
195                     result.append(encodeString(key, _encodeStyle));
196 
197                     if (val !is null) {
198                         if (val.length > 0) {
199                             result.append('=');
200                             result.append(encodeString(val, _encodeStyle));
201                         } else if (equalsForNullValue)
202                             result.append('=');
203                     } else if (equalsForNullValue)
204                         result.append('=');
205                 }
206             }
207             delim = true;
208         }
209         return result.toString();        
210     }
211 
212 
213     /**
214      * Decoded parameters to Map.
215      *
216      * @param content the string containing the encoded parameters
217      * @param map     the MultiMap to put parsed query parameters into
218      * @param charset the charset to use for decoding
219      */
220     void decode(string content, string charset = ENCODING) {
221 
222         string key = null;
223         string value = null;
224         int mark = -1;
225         bool encoded = false;
226         for (int i = 0; i < content.length; i++) {
227             char c = content[i];
228             switch (c) {
229                 case '&':
230                     int l = i - mark - 1;
231                     value = l == 0 ? "" :
232                             (encoded ? decodeString(content, mark + 1, l) : content[mark + 1 .. i]);
233                     mark = i;
234 
235                     encoded = false;
236                     if (key !is null) {
237                         version(HUNT_HTTP_DEBUG) tracef("key=%s, value=%s", key, value);
238                         this.add(key, value);
239                     } else if (value !is null && value.length > 0) {
240                         this.add(value, "");
241                     }
242                     key = null;
243                     value = null;
244                     break;
245                 case '=':
246                     if (key !is null)
247                         break;
248                     key = encoded ? decodeString(content, mark + 1, i - mark - 1) : content[mark + 1 .. i];
249                     mark = i;
250                     encoded = false;
251                     break;
252                 case '+':
253                     encoded = true;
254                     break;
255                 case '%':
256                     encoded = true;
257                     break;
258                 default: break;
259             }
260         }
261 
262             int contentLen = cast(int)content.length;
263 
264             if (key !is null) {
265                 int l =  contentLen - mark - 1;
266             value = l == 0 ? "" : (encoded ? decodeString(content, mark + 1, l) : content[mark + 1 .. $]);
267                 version(HUNT_HTTP_DEBUG) tracef("key=%s, value=%s", key, value);
268                 this.add(key, value);
269             } else if (mark < contentLen) {
270                 version(HUNT_HTTP_DEBUG) tracef("empty value: content=%s, key=%s", content, key);
271                 key = encoded
272                         ? decodeString(content, mark + 1, contentLen - mark - 1)
273                     : content[mark + 1 .. $];
274                 if (!key.empty) {
275                 version(HUNT_HTTP_DEBUG) tracef("key=%s, value=", key);
276                     this.add(key, "");
277                 }
278             } else {
279                 warningf("No key found.");
280             }
281         }
282 
283     /**
284      * Decode string with % encoding.
285      * This method makes the assumption that the majority of calls
286      * will need no decoding.
287      *
288      * @param encoded the encoded string to decode
289      * @return the decoded string
290      */
291     static string decodeString(string encoded) {
292         return urlDecode(encoded);
293     }
294 
295     /**
296      * Decode string with % encoding.
297      * This method makes the assumption that the majority of calls
298      * will need no decoding.
299      *
300      * @param encoded the encoded string to decode
301      * @param offset  the offset in the encoded string to decode from
302      * @param length  the length of characters in the encoded string to decode
303      * @param charset the charset to use for decoding
304      * @return the decoded string
305      */
306     static string decodeString(string encoded, size_t offset, size_t length) {
307         return urlDecode(encoded[offset .. offset+length]);
308     }
309 
310 
311     /**
312      * Perform URL encoding.
313      *
314      * @param string the string to encode
315      * @return encoded string.
316      */
317     static string encodeString(string str, UrlEncodeStyle encodeStyle = UrlEncodeStyle.URI) {
318         return urlEncode(str, encodeStyle == UrlEncodeStyle.URI);       
319     }
320 }