1 module hunt.net.util.UrlEncoded; 2 3 import hunt.collection; 4 5 import hunt.text.Charset; 6 import hunt.Exceptions; 7 import hunt.text.Common; 8 import hunt.util.StringBuilder; 9 import hunt.util.ConverterUtils; 10 11 import hunt.logging; 12 13 import std.ascii; 14 import std.conv; 15 import std.array; 16 17 18 /* rfc1738: 19 20 ...The characters ";", 21 "/", "?", ":", "@", "=" and "&" are the characters which may be 22 reserved for special meaning within a scheme... 23 24 ...Thus, only alphanumerics, the special characters "$-_.+!*'(),", and 25 reserved characters used for their reserved purposes may be used 26 unencoded within a URL... 27 28 For added safety, we only leave -_. unencoded. 29 */ 30 private string urlEncode(string s, bool raw) { 31 32 Appender!string sb; 33 sb.reserve(s.length * 3); 34 35 foreach(char c; s) { 36 if (!raw && c == ' ') { 37 sb.put('+'); 38 } else if ((c < '0' && c != '-' && c != '.') || 39 (c < 'A' && c > '9') || 40 (c > 'Z' && c < 'a' && c != '_') || 41 (c > 'z' && (!raw || c != '~'))) { 42 sb.put('%'); 43 sb.put(hexDigits[c >> 4]); 44 sb.put(hexDigits[c & 15]); 45 } else { 46 sb.put(c); 47 } 48 } 49 50 return sb.data; 51 } 52 53 54 private string urlDecode(string str) { 55 Appender!string sb; 56 sb.reserve(str.length); 57 58 size_t len = str.length; 59 immutable(char) *data = str.ptr; 60 61 while (len--) { 62 if (*data == '+') { 63 sb.put(' '); 64 } 65 else if (*data == '%' && len >= 2 && isHexDigit(data[1]) 66 && isHexDigit(data[2])) { 67 sb.put(cast(char)to!int(data[1..3], 16)); 68 data += 2; 69 len -= 2; 70 } else { 71 sb.put(*data); 72 } 73 data++; 74 } 75 76 return sb.data; 77 } 78 79 80 // unittest { 81 // string s = `abcd 1234567890ABCD1234~!@#$%^&*()_+{}<>?:"[]\|';/.,`; 82 83 // RFC1738 84 // string r = urlEncode(s, false); 85 // // abcd+1234567890ABCD1234%7E%21%40%23%24%25%5E%26%2A%28%29_%2B%7B%7D%3C%3E%3F%3A%22%5B%5D%5C%7C%27%3B%2F.%2C 86 87 // r = urlDecode(r); 88 // assert(r == s); 89 90 // RFC-3986 91 // r = urlEncode(s, true); 92 // // abcd%201234567890ABCD1234~%21%40%23%24%25%5E%26%2A%28%29_%2B%7B%7D%3C%3E%3F%3A%22%5B%5D%5C%7C%27%3B%2F.%2C 93 94 // r = urlDecode(r); 95 // writefln("Decode: %s", r); 96 // assert(r == s); 97 98 // r = urlEncode("中 文", true); 99 // // %E4%B8%AD%20%E6%96%87 100 // } 101 102 103 enum UrlEncodeStyle { 104 HtmlForm, 105 URI 106 } 107 108 /** 109 * Handles coding of MIME "x-www-form-urlencoded". 110 * <p> 111 * This class handles the encoding and decoding for either the query string of a 112 * URL or the _content of a POST HTTP request. 113 * </p> 114 * <b>Notes</b> 115 * <p> 116 * The UTF-8 charset is assumed, unless otherwise defined by either passing a 117 * parameter or setting the "org.hunt.utils.UrlEncoding.charset" System 118 * property. 119 * </p> 120 * <p> 121 * The hashtable either contains string single values, vectors of string or 122 * arrays of Strings. 123 * </p> 124 * <p> 125 * This class is only partially synchronised. In particular, simple get 126 * operations are not protected from concurrent updates. 127 * </p> 128 * 129 * See_Also: 130 * https://www.w3.org/TR/REC-html40/interact/forms.html#h-17.13.4 131 * https://stackoverflow.com/questions/996139/urlencode-vs-rawurlencode 132 */ 133 class UrlEncoded : MultiMap!string { 134 135 enum string ENCODING = StandardCharsets.UTF_8; 136 137 private UrlEncodeStyle _encodeStyle = UrlEncodeStyle.URI; 138 139 140 this(UrlEncodeStyle encodeStyle = UrlEncodeStyle.URI) { 141 _encodeStyle = encodeStyle; 142 } 143 144 145 this(string query, UrlEncodeStyle encodeStyle = UrlEncodeStyle.URI) { 146 _encodeStyle = encodeStyle; 147 decode(query); 148 } 149 150 UrlEncodeStyle encodeStyle() { 151 return _encodeStyle; 152 } 153 154 /** 155 * Encode MultiMap with % encoding for UTF8 sequences. 156 * 157 * @return the MultiMap as a string with % encoding 158 */ 159 string encode() { 160 return encode(true); 161 } 162 163 /** 164 * Encode MultiMap with % encoding. 165 * 166 * @param charset the charset to encode with 167 * @param equalsForNullValue if True, then an '=' is always used, even 168 * for parameters without a value. e.g. <code>"blah?a=&b=&c="</code>. 169 * @return the MultiMap as a string encoded with % encodings 170 */ 171 string encode(bool equalsForNullValue) { 172 173 StringBuilder result = new StringBuilder(128); 174 175 bool delim = false; 176 foreach(string key, List!string list; this) 177 { 178 int s = 0; 179 if(list !is null) 180 s = list.size(); 181 182 if (delim) { 183 result.append('&'); 184 } 185 186 if (s == 0) { 187 result.append(encodeString(key, _encodeStyle)); 188 if (equalsForNullValue) 189 result.append('='); 190 } else { 191 for (int i = 0; i < s; i++) { 192 if (i > 0) 193 result.append('&'); 194 string val = list.get(i); 195 result.append(encodeString(key, _encodeStyle)); 196 197 if (val !is null) { 198 if (val.length > 0) { 199 result.append('='); 200 result.append(encodeString(val, _encodeStyle)); 201 } else if (equalsForNullValue) 202 result.append('='); 203 } else if (equalsForNullValue) 204 result.append('='); 205 } 206 } 207 delim = true; 208 } 209 return result.toString(); 210 } 211 212 213 /** 214 * Decoded parameters to Map. 215 * 216 * @param content the string containing the encoded parameters 217 * @param map the MultiMap to put parsed query parameters into 218 * @param charset the charset to use for decoding 219 */ 220 void decode(string content, string charset = ENCODING) { 221 222 string key = null; 223 string value = null; 224 int mark = -1; 225 bool encoded = false; 226 for (int i = 0; i < content.length; i++) { 227 char c = content[i]; 228 switch (c) { 229 case '&': 230 int l = i - mark - 1; 231 value = l == 0 ? "" : 232 (encoded ? decodeString(content, mark + 1, l) : content[mark + 1 .. i]); 233 mark = i; 234 235 encoded = false; 236 if (key !is null) { 237 version(HUNT_HTTP_DEBUG) tracef("key=%s, value=%s", key, value); 238 this.add(key, value); 239 } else if (value !is null && value.length > 0) { 240 this.add(value, ""); 241 } 242 key = null; 243 value = null; 244 break; 245 case '=': 246 if (key !is null) 247 break; 248 key = encoded ? decodeString(content, mark + 1, i - mark - 1) : content[mark + 1 .. i]; 249 mark = i; 250 encoded = false; 251 break; 252 case '+': 253 encoded = true; 254 break; 255 case '%': 256 encoded = true; 257 break; 258 default: break; 259 } 260 } 261 262 int contentLen = cast(int)content.length; 263 264 if (key !is null) { 265 int l = contentLen - mark - 1; 266 value = l == 0 ? "" : (encoded ? decodeString(content, mark + 1, l) : content[mark + 1 .. $]); 267 version(HUNT_HTTP_DEBUG) tracef("key=%s, value=%s", key, value); 268 this.add(key, value); 269 } else if (mark < contentLen) { 270 version(HUNT_HTTP_DEBUG) tracef("empty value: content=%s, key=%s", content, key); 271 key = encoded 272 ? decodeString(content, mark + 1, contentLen - mark - 1) 273 : content[mark + 1 .. $]; 274 if (!key.empty) { 275 version(HUNT_HTTP_DEBUG) tracef("key=%s, value=", key); 276 this.add(key, ""); 277 } 278 } else { 279 warningf("No key found."); 280 } 281 } 282 283 /** 284 * Decode string with % encoding. 285 * This method makes the assumption that the majority of calls 286 * will need no decoding. 287 * 288 * @param encoded the encoded string to decode 289 * @return the decoded string 290 */ 291 static string decodeString(string encoded) { 292 return urlDecode(encoded); 293 } 294 295 /** 296 * Decode string with % encoding. 297 * This method makes the assumption that the majority of calls 298 * will need no decoding. 299 * 300 * @param encoded the encoded string to decode 301 * @param offset the offset in the encoded string to decode from 302 * @param length the length of characters in the encoded string to decode 303 * @param charset the charset to use for decoding 304 * @return the decoded string 305 */ 306 static string decodeString(string encoded, size_t offset, size_t length) { 307 return urlDecode(encoded[offset .. offset+length]); 308 } 309 310 311 /** 312 * Perform URL encoding. 313 * 314 * @param string the string to encode 315 * @return encoded string. 316 */ 317 static string encodeString(string str, UrlEncodeStyle encodeStyle = UrlEncodeStyle.URI) { 318 return urlEncode(str, encodeStyle == UrlEncodeStyle.URI); 319 } 320 }