Check-in [67e91438e6]

Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview

SHA1 Hash:67e91438e664cd39afc8ed818f62a62dc68039f7
Date: 2008-09-18 14:56:40
User: stephan
Comment:added unescaping of %XX sequences in GET parameter values

Tags And Properties
Changes
[hide diffs]

Changes to parsepp_url.hpp

@@ -2,16 +2,40 @@
 #define S11N_NET_PARSEPP_URL_H_INCLUDED 1
 
 #include "parsepp.hpp"
 #include <map>
 #include <string>
+#include <sstream>
 
 namespace Ps {
 
 /**
-   Ps::url encapsulates a parser for URLs. It does no escaping at all - that is
-   up to client code.
+   Ps::url encapsulates a parser for URLs. It unescaped %XX sequences in the
+   value parts of URL parameter values, but does no other unescaping.
+
+   For historical reasons, file:// URLs are handled a bit less strictly
+   than other URL schemes. The following are valid for this parser:
+
+   file:relative/path
+
+   file://relative/path
+
+   file:/absolute/path
+
+   file:///absolute/path
+
+
+   Aside from that, it handles normal URLs:
+
+   protocol://[user[:[password]]@]hostname/path?KVP
+
+   Where KVP are key/value pairs in the form KEY[=[VALUE]], separated
+   by a '&' character. The VALUE part of each pair gets unescaped,
+   such that %XX (where XX is a hexidecimal value of a character) get
+   transformed into their ASCII value (note that results are undefined
+   here with characters >127).
+
 */
 namespace url {
    using namespace Ps;
 #define RL rule_list
 #define CL char_list
@@ -21,11 +45,11 @@
     struct url_state
     {
 	typedef std::map<std::string,std::string> kvpmap;
 	kvpmap params; // key/value pairs for GET params
 	kvpmap parts; // key/value pairs for various parts of URL
-	std::string key; // last-read key.
+	std::string key; // buffer for the last-read key.
 	void clear()
 	{
 	    this->params.clear();
 	    this->parts.clear();
 	    this->key.clear();
@@ -42,17 +66,62 @@
 	   //COUT << "set_key: " << m << '\n';
 	   s.params[s.key = m] = ""; // ensure an empty value in case the val field is empty.
        }
     };
 
-    /** Action to set a value. Uses the last-read key as the lookup key. */
+
+    template <bool IsEsc>
+    struct a_esc_append
+    {
+	static void matched( Ps::parser_state &, const std::string & m, std::string & s )
+	{
+	    CERR << "a_esc_append<"<<IsEsc<<">:["<<m<<"]\n";
+	   if( IsEsc )
+	   {
+	       std::istringstream is(m);
+	       int x = '?';
+	       is >> std::hex >> x; // weird: reading directly into ch missing the std::hex handling
+	       s.push_back(std::string::value_type(x));
+	   }
+	   else
+	   {
+	       s += m;
+	   }
+       }
+    };
+
+
+    /**
+       Action to set a value. Uses the last-read key as the lookup key.
+
+       The value is automatically un-percent-escaped (e.g. %20 is converted
+       to a space).
+    */
     struct a_kvp_set_val
     {
-       static void matched( Ps::parser_state &, const std::string & v, url_state & s )
+       static void matched( Ps::parser_state &, const std::string & m, url_state & s )
        {
-	   //COUT << "set_val: " << s.key << '='<<v << '\n';
-	   s.params[s.key] = v;
+	   COUT << "a_kvp_set_val: " << s.key << '='<<m << '\n';
+	   if( ! m.empty() )
+	   {
+	       std::string unesc;
+	       typedef r_ch<'%'> PCT;
+	       typedef r_repeat<r_xdigit,2> DIGITS;
+	       typedef r_and<RL<PCT,DIGITS> > ESC;
+	       typedef r_and<RL<PCT,
+		   r_action< DIGITS, a_esc_append<true> >
+		   > > AESC;
+	       typedef r_action< r_plus< r_and<RL<r_notat<ESC>,r_advance<1> > > >, a_esc_append<false> > NESC;
+	       typedef r_plus< r_or< RL< AESC, NESC > > > START;
+	       Ps::parser_state ps(m);
+	       Ps::parse<START>(ps, unesc);
+	       s.params[s.key] = unesc;
+	   }
+	   else
+	   {
+	       s.params[s.key] = m;
+	   }
 	   s.key = "";
        }
     };
 
     /**