Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| SHA1 Hash: | 67e91438e664cd39afc8ed818f62a62dc68039f7 |
|---|---|
| Date: | 2008-09-18 14:56:40 |
| User: | stephan |
| Comment: | added unescaping of %XX sequences in GET parameter values |
Tags And Properties
- branch=trunk inherited from [d45e7467f2]
- sym-trunk inherited from [d45e7467f2]
Changes
[hide diffs]Changes to parsepp_url.hpp
@@ -2,16 +2,40 @@
#define S11N_NET_PARSEPP_URL_H_INCLUDED 1
#include "parsepp.hpp"
#include <map>
#include <string>
+#include <sstream>
namespace Ps {
/**
- Ps::url encapsulates a parser for URLs. It does no escaping at all - that is
- up to client code.
+ Ps::url encapsulates a parser for URLs. It unescaped %XX sequences in the
+ value parts of URL parameter values, but does no other unescaping.
+
+ For historical reasons, file:// URLs are handled a bit less strictly
+ than other URL schemes. The following are valid for this parser:
+
+ file:relative/path
+
+ file://relative/path
+
+ file:/absolute/path
+
+ file:///absolute/path
+
+
+ Aside from that, it handles normal URLs:
+
+ protocol://[user[:[password]]@]hostname/path?KVP
+
+ Where KVP are key/value pairs in the form KEY[=[VALUE]], separated
+ by a '&' character. The VALUE part of each pair gets unescaped,
+ such that %XX (where XX is a hexidecimal value of a character) get
+ transformed into their ASCII value (note that results are undefined
+ here with characters >127).
+
*/
namespace url {
using namespace Ps;
#define RL rule_list
#define CL char_list
@@ -21,11 +45,11 @@
struct url_state
{
typedef std::map<std::string,std::string> kvpmap;
kvpmap params; // key/value pairs for GET params
kvpmap parts; // key/value pairs for various parts of URL
- std::string key; // last-read key.
+ std::string key; // buffer for the last-read key.
void clear()
{
this->params.clear();
this->parts.clear();
this->key.clear();
@@ -42,17 +66,62 @@
//COUT << "set_key: " << m << '\n';
s.params[s.key = m] = ""; // ensure an empty value in case the val field is empty.
}
};
- /** Action to set a value. Uses the last-read key as the lookup key. */
+
+ template <bool IsEsc>
+ struct a_esc_append
+ {
+ static void matched( Ps::parser_state &, const std::string & m, std::string & s )
+ {
+ CERR << "a_esc_append<"<<IsEsc<<">:["<<m<<"]\n";
+ if( IsEsc )
+ {
+ std::istringstream is(m);
+ int x = '?';
+ is >> std::hex >> x; // weird: reading directly into ch missing the std::hex handling
+ s.push_back(std::string::value_type(x));
+ }
+ else
+ {
+ s += m;
+ }
+ }
+ };
+
+
+ /**
+ Action to set a value. Uses the last-read key as the lookup key.
+
+ The value is automatically un-percent-escaped (e.g. %20 is converted
+ to a space).
+ */
struct a_kvp_set_val
{
- static void matched( Ps::parser_state &, const std::string & v, url_state & s )
+ static void matched( Ps::parser_state &, const std::string & m, url_state & s )
{
- //COUT << "set_val: " << s.key << '='<<v << '\n';
- s.params[s.key] = v;
+ COUT << "a_kvp_set_val: " << s.key << '='<<m << '\n';
+ if( ! m.empty() )
+ {
+ std::string unesc;
+ typedef r_ch<'%'> PCT;
+ typedef r_repeat<r_xdigit,2> DIGITS;
+ typedef r_and<RL<PCT,DIGITS> > ESC;
+ typedef r_and<RL<PCT,
+ r_action< DIGITS, a_esc_append<true> >
+ > > AESC;
+ typedef r_action< r_plus< r_and<RL<r_notat<ESC>,r_advance<1> > > >, a_esc_append<false> > NESC;
+ typedef r_plus< r_or< RL< AESC, NESC > > > START;
+ Ps::parser_state ps(m);
+ Ps::parse<START>(ps, unesc);
+ s.params[s.key] = unesc;
+ }
+ else
+ {
+ s.params[s.key] = m;
+ }
s.key = "";
}
};
/**