Check-in [99e16bb580]

Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
SHA1 Hash:99e16bb580d60e2ea0f85868dc50dd7ab9921af7
Date: 2008-09-18 13:22:14
User: stephan
Comment:added url parser
Tags And Properties
Changes
hide diffs unified diffs patch

Added parsepp_url.hpp

Old () New (89f8cbcef8071b5b)
> 1 #ifndef S11N_NET_PARSEPP_URL_H_INCLUDED
> 2 #define S11N_NET_PARSEPP_URL_H_INCLUDED 1
> 3
> 4 #include "parsepp.hpp"
> 5 #include <map>
> 6 #include <string>
> 7
> 8 #define CERR std::cerr << __FILE__ << ":" << std::dec << __LINE__ << " : "
> 9 #define COUT std::cout << __FILE__ << ":" << std::dec << __LINE__ << " : "
> 10 #define DOUT if(0) COUT
> 11
> 12 namespace Ps {
> 13
> 14 /**
> 15 Ps::url encapsulates a parser for URLs. It does no escaping at all - that is
> 16 up to client code.
> 17 */
> 18 namespace url {
> 19 using namespace Ps;
> 20 #define RL rule_list
> 21 #define CL char_list
> 22 /**
> 23 A holding buffer for parsing key-value pairs.
> 24 */
> 25 struct url_state
> 26 {
> 27 typedef std::map<std::string,std::string> kvpmap;
> 28 kvpmap params; // key/value pairs for GET params
> 29 kvpmap parts; // key/value pairs for various parts of URL
> 30 std::string key; // last-read key.
> 31 void clear()
> 32 {
> 33 this->params.clear();
> 34 this->parts.clear();
> 35 this->key.clear();
> 36 }
> 37 };
> 38
> 39 /**
> 40 Sets the url_state's current key value.
> 41 */
> 42 struct a_kvp_set_key
> 43 {
> 44 static void matched( Ps::parser_state &, const std::string & m, url_state & s )
> 45 {
> 46 //DOUT << "set_key: " << m << '\n';
> 47 s.params[s.key = m] = ""; // ensure an empty value in case the val field is empty.
> 48 }
> 49 };
> 50
> 51 /** Action to set a value. Uses the last-read key as the lookup key. */
> 52 struct a_kvp_set_val
> 53 {
> 54 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
> 55 {
> 56 //DOUT << "set_val: " << s.key << '='<<v << '\n';
> 57 s.params[s.key] = v;
> 58 s.key = "";
> 59 }
> 60 };
> 61
> 62 /**
> 63 Matches on a key-name field (an Identifier-style string). On a match
> 64 It activated a_kvp_set_key.
> 65 */
> 66 struct r_kvp_key
> 67 : r_action< r_identifier, a_kvp_set_key >
> 68 {};
> 69
> 70 /**
> 71 Matches any char up to a '&'. Calls a_kvp_set_val on match.
> 72 */
> 73 struct r_kvp_val
> 74 : r_action< r_star< r_notch< '&' > >, a_kvp_set_val >
> 75 {};
> 76
> 77 /** Reads a single key/val pair. */
> 78 struct r_kvp
> 79 : r_and< RL< r_kvp_key, r_ch<'='>, r_opt<r_kvp_val> > >
> 80 {};
> 81
> 82 /** Reads a sequence of key/val pairs, delimited by '&' (http-style). */
> 83 struct r_kvps
> 84 : r_and< RL< r_kvp, r_star< r_and< RL< r_ch<'&'>, r_opt<r_kvp> > > > > >
> 85 {};
> 86
> 87 struct a_port
> 88 {
> 89 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
> 90 {
> 91 DOUT << "port=" << v <<'\n';
> 92 s.parts["port"] = v;
> 93 }
> 94 };
> 95
> 96 struct r_port
> 97 : r_and< RL< r_ch<':'>, r_action< r_plus<r_digit>, a_port > > >
> 98 {};
> 99
> 100 struct a_host
> 101 {
> 102 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
> 103 {
> 104 DOUT << "host=" << v <<'\n';
> 105 s.parts["host"] = v;
> 106 }
> 107 };
> 108
> 109 struct r_hostname
> 110 : r_plus< r_and< RL< r_plus<
> 111 r_and< RL< r_or< RL< r_alnum, r_ch<'-'>, r_ch<'_'> > >, r_opt< r_ch<'.'> > > >
> 112 > > > >
> 113 /* FIXME: matches with a trailing dot (e.g. foo.com.) */
> 114 /* FIXME: matches many bogus hostnames, like "----" or "foo.c-" */
> 115 {};
> 116
> 117 struct r_host
> 118 : r_action< r_hostname, a_host >
> 119 {};
> 120
> 121 struct a_scheme
> 122 {
> 123 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
> 124 {
> 125 std::string val = v.substr( 0, v.find(':') );
> 126 DOUT << "scheme=" << val <<'\n';
> 127 s.parts["scheme"] = val;
> 128 }
> 129 };
> 130
> 131 struct r_scheme
> 132 : r_and< RL< r_action< r_identifier, a_scheme >, r_ch<':'>, r_plus< r_ch<'/'> > > >
> 133 // ^^^ FIXME: keep the last / for file:/ and file:///
> 134 {};
> 135
> 136 struct a_path
> 137 {
> 138 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
> 139 {
> 140 DOUT << "path=" << v <<'\n';
> 141 s.parts["path"] = v;
> 142 }
> 143 };
> 144
> 145 struct r_params
> 146 : r_and< RL< r_ch<'?'>, r_kvps > >
> 147 {};
> 148
> 149 struct r_path
> 150 : r_action< r_star< r_notch<'?'> >, a_path >
> 151 {};
> 152
> 153 struct a_username
> 154 {
> 155 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
> 156 {
> 157 DOUT << "user=" << v <<'\n';
> 158 s.parts["user"] = v;
> 159 }
> 160 };
> 161
> 162 struct r_user
> 163 : r_plus<r_alnum>
> 164 {};
> 165
> 166 struct a_password
> 167 {
> 168 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
> 169 {
> 170 DOUT << "password=" << v <<'\n';
> 171 s.parts["password"] = v;
> 172 }
> 173 };
> 174
> 175 struct r_password
> 176 : r_action< r_plus< r_notch<'@'> >, a_password >
> 177 {};
> 178
> 179 struct a_login
> 180 {
> 181 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
> 182 {
> 183 std::string val = v;
> 184 if( val.empty() ) return; // can't happen?
> 185 // chop trailing '@' (work around unfortunate parser design aspect)
> 186 if( '@' == val[val.size()-1] ) val.resize(val.size()-1);
> 187 std::string::size_type pos = val.find(':');
> 188
> 189 if( pos != std::string::npos )
> 190 {
> 191 s.parts["user"] = val.substr( 0, pos );
> 192 s.parts["password"] = val.substr( pos+1 );
> 193 DOUT << "user=" << s.parts["user"] <<'\n';
> 194 DOUT << "password=" << s.parts["password"] <<'\n';
> 195 }
> 196 else
> 197 {
> 198 s.parts["user"] = val;
> 199 DOUT << "user=" << val <<'\n';
> 200 }
> 201 }
> 202 };
> 203
> 204 /**
> 205 r_alnum+(:[^@]))?@
> 206 */
> 207 struct r_login
> 208 /**
> 209 action<
> 210 seq< r_user, opt< seq< one<':'>, r_password > >,
> 211 one<'@'> >, a_login>
> 212 */
> 213 : r_action<
> 214 r_and< RL<
> 215 r_user,
> 216 r_opt< r_and<RL<r_ch<':'>,r_password > > >,
> 217 r_ch<'@'>
> 218 > >,
> 219 a_login>
> 220 {};
> 221
> 222 struct r_filepath
> 223 : r_plus< r_any >
> 224 {};
> 225
> 226 /**
> 227 File URLs need special handling, due to the varying conventions regarding
> 228 the number of leading slashes. e.g. file:/etc/hosts vs file:///etc/hosts,
> 229 the second is correct but many implementations allow the first form.
> 230
> 231 seq< action< seq< string<'f','i','l','e'>, one<':'> >, a_scheme >,
> 232 seq< opt< string<'/','/'> >, action< r_filepath, a_path > > >
> 233
> 234 */
> 235 struct r_fileurl
> 236 : r_and< RL<
> 237 r_action< r_and< RL<
> 238 r_chseq<CL<'f','i','l','e'> >,
> 239 r_ch<':'>
> 240 > >, a_scheme >,
> 241 r_and<RL< r_opt< r_repeat< r_ch<'/'>,2> >,
> 242 r_action< r_filepath, a_path >
> 243 > >
> 244 > >
> 245 {};
> 246
> 247 struct r_stdurl
> 248 : r_and< RL< r_scheme,
> 249 r_opt<r_login>,
> 250 r_host,
> 251 r_opt< r_and< RL< r_opt<r_port>, r_opt<r_path>, r_opt<r_params> > > >
> 252 > >
> 253 {};
> 254
> 255 struct r_url
> 256 : r_or< RL< r_fileurl, r_stdurl > >
> 257 {};
> 258 /**
> 259 Bugs:
> 260
> 261 - http://user:@host... sets the hostname to 'user' and path to ':@host...'
> 262 */
> 263
> 264 #undef RL
> 265 #undef CL
> 266
> 267 /**
> 268 Convenience function to parse the given URL and stuff the data into
> 269 tgt.
> 270 */
> 271 inline bool parse_url( std::string const & src, url_state & tgt )
> 272 {
> 273 return Ps::parse< r_url >( src, tgt );
> 274 }
> 275
> 276 }} // namespaces
> 277
> 278 #endif // S11N_NET_PARSEPP_URL_H_INCLUDED

Added url.cpp

Old () New (097a71e90f68aee7)
> 1 #include "parsepp.hpp"
> 2 #include "parsepp_url.hpp"
> 3 #include <map>
> 4 #include <string>
> 5
> 6 #define CERR std::cerr << __FILE__ << ":" << std::dec << __LINE__ << " : "
> 7 #define COUT std::cout << __FILE__ << ":" << std::dec << __LINE__ << " : "
> 8 #define DOUT if(0) COUT
> 9
> 10
> 11 int main( int argc, char ** argv )
> 12 {
> 13 std::string scr = "abc=def&ghi=value #2";
> 14 std::string line;
> 15 using namespace Ps;
> 16 url::url_state state;
> 17 for ( int arg = 0; arg < argc; ++arg ) {
> 18 if( (argc>1) && (0==arg) ) continue;
> 19 line = (argc>1) ? argv[ arg ] : scr.c_str();
> 20 COUT << "Trying: " << line << '\n';
> 21 if ( Ps::url::parse_url( line, state ) ) {
> 22 COUT << "Parsed OK: " << line << '\n';
> 23 }
> 24 else {
> 25 COUT << "Parsed FAILED: " << line << '\n';
> 26 return 1;
> 27 }
> 28 COUT << "URL parts:\n";
> 29 typedef url::url_state::kvpmap MT;
> 30 MT::const_iterator it = state.parts.begin();
> 31 MT::const_iterator et = state.parts.end();
> 32 for( ; it != et; ++it )
> 33 {
> 34 COUT << '\t' << (*it).first << '=' << (*it).second << '\n';
> 35 }
> 36 COUT << "kvp map entry count: "<<state.params.size()<<std::endl;
> 37 it = state.params.begin();
> 38 et = state.params.end();
> 39 for( ; it != et; ++it )
> 40 {
> 41 COUT << '\t' << (*it).first << '=' << (*it).second << '\n';
> 42 }
> 43 state.clear();
> 44 }
> 45
> 46
> 47 return 0;
> 48 }