Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| SHA1 Hash: | 99e16bb580d60e2ea0f85868dc50dd7ab9921af7 |
|---|---|
| Date: | 2008-09-18 13:22:14 |
| User: | stephan |
| Comment: | added url parser |
Tags And Properties
- branch=trunk inherited from [d45e7467f2]
- sym-trunk inherited from [d45e7467f2]
Changes
Added parsepp_url.hpp
| Old () | New (89f8cbcef8071b5b) | |||
|---|---|---|---|---|
| > | 1 | #ifndef S11N_NET_PARSEPP_URL_H_INCLUDED | ||
| > | 2 | #define S11N_NET_PARSEPP_URL_H_INCLUDED 1 | ||
| > | 3 | |||
| > | 4 | #include "parsepp.hpp" | ||
| > | 5 | #include <map> | ||
| > | 6 | #include <string> | ||
| > | 7 | |||
| > | 8 | #define CERR std::cerr << __FILE__ << ":" << std::dec << __LINE__ << " : " | ||
| > | 9 | #define COUT std::cout << __FILE__ << ":" << std::dec << __LINE__ << " : " | ||
| > | 10 | #define DOUT if(0) COUT | ||
| > | 11 | |||
| > | 12 | namespace Ps { | ||
| > | 13 | |||
| > | 14 | /** | ||
| > | 15 | Ps::url encapsulates a parser for URLs. It does no escaping at all - that is | ||
| > | 16 | up to client code. | ||
| > | 17 | */ | ||
| > | 18 | namespace url { | ||
| > | 19 | using namespace Ps; | ||
| > | 20 | #define RL rule_list | ||
| > | 21 | #define CL char_list | ||
| > | 22 | /** | ||
| > | 23 | A holding buffer for parsing key-value pairs. | ||
| > | 24 | */ | ||
| > | 25 | struct url_state | ||
| > | 26 | { | ||
| > | 27 | typedef std::map<std::string,std::string> kvpmap; | ||
| > | 28 | kvpmap params; // key/value pairs for GET params | ||
| > | 29 | kvpmap parts; // key/value pairs for various parts of URL | ||
| > | 30 | std::string key; // last-read key. | ||
| > | 31 | void clear() | ||
| > | 32 | { | ||
| > | 33 | this->params.clear(); | ||
| > | 34 | this->parts.clear(); | ||
| > | 35 | this->key.clear(); | ||
| > | 36 | } | ||
| > | 37 | }; | ||
| > | 38 | |||
| > | 39 | /** | ||
| > | 40 | Sets the url_state's current key value. | ||
| > | 41 | */ | ||
| > | 42 | struct a_kvp_set_key | ||
| > | 43 | { | ||
| > | 44 | static void matched( Ps::parser_state &, const std::string & m, url_state & s ) | ||
| > | 45 | { | ||
| > | 46 | //DOUT << "set_key: " << m << '\n'; | ||
| > | 47 | s.params[s.key = m] = ""; // ensure an empty value in case the val field is empty. | ||
| > | 48 | } | ||
| > | 49 | }; | ||
| > | 50 | |||
| > | 51 | /** Action to set a value. Uses the last-read key as the lookup key. */ | ||
| > | 52 | struct a_kvp_set_val | ||
| > | 53 | { | ||
| > | 54 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | ||
| > | 55 | { | ||
| > | 56 | //DOUT << "set_val: " << s.key << '='<<v << '\n'; | ||
| > | 57 | s.params[s.key] = v; | ||
| > | 58 | s.key = ""; | ||
| > | 59 | } | ||
| > | 60 | }; | ||
| > | 61 | |||
| > | 62 | /** | ||
| > | 63 | Matches on a key-name field (an Identifier-style string). On a match | ||
| > | 64 | It activated a_kvp_set_key. | ||
| > | 65 | */ | ||
| > | 66 | struct r_kvp_key | ||
| > | 67 | : r_action< r_identifier, a_kvp_set_key > | ||
| > | 68 | {}; | ||
| > | 69 | |||
| > | 70 | /** | ||
| > | 71 | Matches any char up to a '&'. Calls a_kvp_set_val on match. | ||
| > | 72 | */ | ||
| > | 73 | struct r_kvp_val | ||
| > | 74 | : r_action< r_star< r_notch< '&' > >, a_kvp_set_val > | ||
| > | 75 | {}; | ||
| > | 76 | |||
| > | 77 | /** Reads a single key/val pair. */ | ||
| > | 78 | struct r_kvp | ||
| > | 79 | : r_and< RL< r_kvp_key, r_ch<'='>, r_opt<r_kvp_val> > > | ||
| > | 80 | {}; | ||
| > | 81 | |||
| > | 82 | /** Reads a sequence of key/val pairs, delimited by '&' (http-style). */ | ||
| > | 83 | struct r_kvps | ||
| > | 84 | : r_and< RL< r_kvp, r_star< r_and< RL< r_ch<'&'>, r_opt<r_kvp> > > > > > | ||
| > | 85 | {}; | ||
| > | 86 | |||
| > | 87 | struct a_port | ||
| > | 88 | { | ||
| > | 89 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | ||
| > | 90 | { | ||
| > | 91 | DOUT << "port=" << v <<'\n'; | ||
| > | 92 | s.parts["port"] = v; | ||
| > | 93 | } | ||
| > | 94 | }; | ||
| > | 95 | |||
| > | 96 | struct r_port | ||
| > | 97 | : r_and< RL< r_ch<':'>, r_action< r_plus<r_digit>, a_port > > > | ||
| > | 98 | {}; | ||
| > | 99 | |||
| > | 100 | struct a_host | ||
| > | 101 | { | ||
| > | 102 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | ||
| > | 103 | { | ||
| > | 104 | DOUT << "host=" << v <<'\n'; | ||
| > | 105 | s.parts["host"] = v; | ||
| > | 106 | } | ||
| > | 107 | }; | ||
| > | 108 | |||
| > | 109 | struct r_hostname | ||
| > | 110 | : r_plus< r_and< RL< r_plus< | ||
| > | 111 | r_and< RL< r_or< RL< r_alnum, r_ch<'-'>, r_ch<'_'> > >, r_opt< r_ch<'.'> > > > | ||
| > | 112 | > > > > | ||
| > | 113 | /* FIXME: matches with a trailing dot (e.g. foo.com.) */ | ||
| > | 114 | /* FIXME: matches many bogus hostnames, like "----" or "foo.c-" */ | ||
| > | 115 | {}; | ||
| > | 116 | |||
| > | 117 | struct r_host | ||
| > | 118 | : r_action< r_hostname, a_host > | ||
| > | 119 | {}; | ||
| > | 120 | |||
| > | 121 | struct a_scheme | ||
| > | 122 | { | ||
| > | 123 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | ||
| > | 124 | { | ||
| > | 125 | std::string val = v.substr( 0, v.find(':') ); | ||
| > | 126 | DOUT << "scheme=" << val <<'\n'; | ||
| > | 127 | s.parts["scheme"] = val; | ||
| > | 128 | } | ||
| > | 129 | }; | ||
| > | 130 | |||
| > | 131 | struct r_scheme | ||
| > | 132 | : r_and< RL< r_action< r_identifier, a_scheme >, r_ch<':'>, r_plus< r_ch<'/'> > > > | ||
| > | 133 | // ^^^ FIXME: keep the last / for file:/ and file:/// | ||
| > | 134 | {}; | ||
| > | 135 | |||
| > | 136 | struct a_path | ||
| > | 137 | { | ||
| > | 138 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | ||
| > | 139 | { | ||
| > | 140 | DOUT << "path=" << v <<'\n'; | ||
| > | 141 | s.parts["path"] = v; | ||
| > | 142 | } | ||
| > | 143 | }; | ||
| > | 144 | |||
| > | 145 | struct r_params | ||
| > | 146 | : r_and< RL< r_ch<'?'>, r_kvps > > | ||
| > | 147 | {}; | ||
| > | 148 | |||
| > | 149 | struct r_path | ||
| > | 150 | : r_action< r_star< r_notch<'?'> >, a_path > | ||
| > | 151 | {}; | ||
| > | 152 | |||
| > | 153 | struct a_username | ||
| > | 154 | { | ||
| > | 155 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | ||
| > | 156 | { | ||
| > | 157 | DOUT << "user=" << v <<'\n'; | ||
| > | 158 | s.parts["user"] = v; | ||
| > | 159 | } | ||
| > | 160 | }; | ||
| > | 161 | |||
| > | 162 | struct r_user | ||
| > | 163 | : r_plus<r_alnum> | ||
| > | 164 | {}; | ||
| > | 165 | |||
| > | 166 | struct a_password | ||
| > | 167 | { | ||
| > | 168 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | ||
| > | 169 | { | ||
| > | 170 | DOUT << "password=" << v <<'\n'; | ||
| > | 171 | s.parts["password"] = v; | ||
| > | 172 | } | ||
| > | 173 | }; | ||
| > | 174 | |||
| > | 175 | struct r_password | ||
| > | 176 | : r_action< r_plus< r_notch<'@'> >, a_password > | ||
| > | 177 | {}; | ||
| > | 178 | |||
| > | 179 | struct a_login | ||
| > | 180 | { | ||
| > | 181 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | ||
| > | 182 | { | ||
| > | 183 | std::string val = v; | ||
| > | 184 | if( val.empty() ) return; // can't happen? | ||
| > | 185 | // chop trailing '@' (work around unfortunate parser design aspect) | ||
| > | 186 | if( '@' == val[val.size()-1] ) val.resize(val.size()-1); | ||
| > | 187 | std::string::size_type pos = val.find(':'); | ||
| > | 188 | |||
| > | 189 | if( pos != std::string::npos ) | ||
| > | 190 | { | ||
| > | 191 | s.parts["user"] = val.substr( 0, pos ); | ||
| > | 192 | s.parts["password"] = val.substr( pos+1 ); | ||
| > | 193 | DOUT << "user=" << s.parts["user"] <<'\n'; | ||
| > | 194 | DOUT << "password=" << s.parts["password"] <<'\n'; | ||
| > | 195 | } | ||
| > | 196 | else | ||
| > | 197 | { | ||
| > | 198 | s.parts["user"] = val; | ||
| > | 199 | DOUT << "user=" << val <<'\n'; | ||
| > | 200 | } | ||
| > | 201 | } | ||
| > | 202 | }; | ||
| > | 203 | |||
| > | 204 | /** | ||
| > | 205 | r_alnum+(:[^@]))?@ | ||
| > | 206 | */ | ||
| > | 207 | struct r_login | ||
| > | 208 | /** | ||
| > | 209 | action< | ||
| > | 210 | seq< r_user, opt< seq< one<':'>, r_password > >, | ||
| > | 211 | one<'@'> >, a_login> | ||
| > | 212 | */ | ||
| > | 213 | : r_action< | ||
| > | 214 | r_and< RL< | ||
| > | 215 | r_user, | ||
| > | 216 | r_opt< r_and<RL<r_ch<':'>,r_password > > >, | ||
| > | 217 | r_ch<'@'> | ||
| > | 218 | > >, | ||
| > | 219 | a_login> | ||
| > | 220 | {}; | ||
| > | 221 | |||
| > | 222 | struct r_filepath | ||
| > | 223 | : r_plus< r_any > | ||
| > | 224 | {}; | ||
| > | 225 | |||
| > | 226 | /** | ||
| > | 227 | File URLs need special handling, due to the varying conventions regarding | ||
| > | 228 | the number of leading slashes. e.g. file:/etc/hosts vs file:///etc/hosts, | ||
| > | 229 | the second is correct but many implementations allow the first form. | ||
| > | 230 | |||
| > | 231 | seq< action< seq< string<'f','i','l','e'>, one<':'> >, a_scheme >, | ||
| > | 232 | seq< opt< string<'/','/'> >, action< r_filepath, a_path > > > | ||
| > | 233 | |||
| > | 234 | */ | ||
| > | 235 | struct r_fileurl | ||
| > | 236 | : r_and< RL< | ||
| > | 237 | r_action< r_and< RL< | ||
| > | 238 | r_chseq<CL<'f','i','l','e'> >, | ||
| > | 239 | r_ch<':'> | ||
| > | 240 | > >, a_scheme >, | ||
| > | 241 | r_and<RL< r_opt< r_repeat< r_ch<'/'>,2> >, | ||
| > | 242 | r_action< r_filepath, a_path > | ||
| > | 243 | > > | ||
| > | 244 | > > | ||
| > | 245 | {}; | ||
| > | 246 | |||
| > | 247 | struct r_stdurl | ||
| > | 248 | : r_and< RL< r_scheme, | ||
| > | 249 | r_opt<r_login>, | ||
| > | 250 | r_host, | ||
| > | 251 | r_opt< r_and< RL< r_opt<r_port>, r_opt<r_path>, r_opt<r_params> > > > | ||
| > | 252 | > > | ||
| > | 253 | {}; | ||
| > | 254 | |||
| > | 255 | struct r_url | ||
| > | 256 | : r_or< RL< r_fileurl, r_stdurl > > | ||
| > | 257 | {}; | ||
| > | 258 | /** | ||
| > | 259 | Bugs: | ||
| > | 260 | |||
| > | 261 | - http://user:@host... sets the hostname to 'user' and path to ':@host...' | ||
| > | 262 | */ | ||
| > | 263 | |||
| > | 264 | #undef RL | ||
| > | 265 | #undef CL | ||
| > | 266 | |||
| > | 267 | /** | ||
| > | 268 | Convenience function to parse the given URL and stuff the data into | ||
| > | 269 | tgt. | ||
| > | 270 | */ | ||
| > | 271 | inline bool parse_url( std::string const & src, url_state & tgt ) | ||
| > | 272 | { | ||
| > | 273 | return Ps::parse< r_url >( src, tgt ); | ||
| > | 274 | } | ||
| > | 275 | |||
| > | 276 | }} // namespaces | ||
| > | 277 | |||
| > | 278 | #endif // S11N_NET_PARSEPP_URL_H_INCLUDED | ||
Added url.cpp
| Old () | New (097a71e90f68aee7) | |||
|---|---|---|---|---|
| > | 1 | #include "parsepp.hpp" | ||
| > | 2 | #include "parsepp_url.hpp" | ||
| > | 3 | #include <map> | ||
| > | 4 | #include <string> | ||
| > | 5 | |||
| > | 6 | #define CERR std::cerr << __FILE__ << ":" << std::dec << __LINE__ << " : " | ||
| > | 7 | #define COUT std::cout << __FILE__ << ":" << std::dec << __LINE__ << " : " | ||
| > | 8 | #define DOUT if(0) COUT | ||
| > | 9 | |||
| > | 10 | |||
| > | 11 | int main( int argc, char ** argv ) | ||
| > | 12 | { | ||
| > | 13 | std::string scr = "abc=def&ghi=value #2"; | ||
| > | 14 | std::string line; | ||
| > | 15 | using namespace Ps; | ||
| > | 16 | url::url_state state; | ||
| > | 17 | for ( int arg = 0; arg < argc; ++arg ) { | ||
| > | 18 | if( (argc>1) && (0==arg) ) continue; | ||
| > | 19 | line = (argc>1) ? argv[ arg ] : scr.c_str(); | ||
| > | 20 | COUT << "Trying: " << line << '\n'; | ||
| > | 21 | if ( Ps::url::parse_url( line, state ) ) { | ||
| > | 22 | COUT << "Parsed OK: " << line << '\n'; | ||
| > | 23 | } | ||
| > | 24 | else { | ||
| > | 25 | COUT << "Parsed FAILED: " << line << '\n'; | ||
| > | 26 | return 1; | ||
| > | 27 | } | ||
| > | 28 | COUT << "URL parts:\n"; | ||
| > | 29 | typedef url::url_state::kvpmap MT; | ||
| > | 30 | MT::const_iterator it = state.parts.begin(); | ||
| > | 31 | MT::const_iterator et = state.parts.end(); | ||
| > | 32 | for( ; it != et; ++it ) | ||
| > | 33 | { | ||
| > | 34 | COUT << '\t' << (*it).first << '=' << (*it).second << '\n'; | ||
| > | 35 | } | ||
| > | 36 | COUT << "kvp map entry count: "<<state.params.size()<<std::endl; | ||
| > | 37 | it = state.params.begin(); | ||
| > | 38 | et = state.params.end(); | ||
| > | 39 | for( ; it != et; ++it ) | ||
| > | 40 | { | ||
| > | 41 | COUT << '\t' << (*it).first << '=' << (*it).second << '\n'; | ||
| > | 42 | } | ||
| > | 43 | state.clear(); | ||
| > | 44 | } | ||
| > | 45 | |||
| > | 46 | |||
| > | 47 | return 0; | ||
| > | 48 | } | ||