Check-in [ef13396f46]

Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
SHA1 Hash:ef13396f4628bcc4554b841ede8bb1de09d4a958
Date: 2008-09-18 17:57:24
User: stephan
Comment:added a few routines and moved others into new files
Tags And Properties
Changes
hide diffs unified diffs patch

Changes to parsepp.hpp

Old (11715259fcc08ebe) New (f67998f8636f5364)
1 #ifndef s11n_net_PARSEPP_HPP_INCLUDED 1 #ifndef s11n_net_PARSEPP_HPP_INCLUDED
2 #define s11n_net_PARSEPP_HPP_INCLUDED 2 #define s11n_net_PARSEPP_HPP_INCLUDED
3 3
4 #include <map> 4 #include <map>
5 #include <string> 5 #include <string>
1061 hidden lines
1067 parse_iterator pos( in.pos() ); 1067 parse_iterator pos( in.pos() );
1068 Action::matched( in, std::string(start,pos), st ); 1068 Action::matched( in, std::string(start,pos), st );
1069 return true; 1069 return true;
1070 } 1070 }
1071 }; 1071 };
> 1072 /**
> 1073 Alias for r_action<Rule,Action>
> 1074 */
> 1075 template <typename Rule, typename Action>
> 1076 struct r_if : r_action< Rule, Action >
> 1077 {};
> 1078
> 1079 /**
> 1080 Similar to r_action, if Rule matches then ActionIf::matched()
> 1081 is called and true is returned, otherwise ActionElse::matched()
> 1082 is called and true is returned.
> 1083
> 1084 There is no guaranty as to whether this function consumes input
> 1085 - that depends entirely on Rule. If Rule does not match then no
> 1086 input is consumed (assuming Rule conforms to the rules).
> 1087 */
> 1088 template <typename Rule, typename ActionIf, typename ActionElse>
> 1089 struct r_ifelse
> 1090 {
> 1091 typedef r_ifelse type;
> 1092 template <typename ClientState>
> 1093 inline static bool matches( parser_state & in, ClientState & st )
> 1094 {
> 1095 if( ! r_action<Rule,ActionIf>::matches(in,st) )
> 1096 {
> 1097 r_action<r_success,ActionElse>::matches(in,st);
> 1098 }
> 1099 return true;
> 1100 }
> 1101 };
1072 1102
1073 1103
1074 namespace Detail { 1104 namespace Detail {
1075 using namespace Ps; 1105 using namespace Ps;
1076 /** Internal implementation detail of a_actions_impl<>. */ 1106 /** Internal implementation detail of a_actions_impl<>. */
236 hidden lines
1313 ++in; 1343 ++in;
1314 } 1344 }
1315 return sentry( i == Count ); 1345 return sentry( i == Count );
1316 } 1346 }
1317 }; 1347 };
1318 <
1319 /** <
1320 Consumes no input and throws an ExceptionType. <
1321 */ <
1322 template <typename ExceptionType> <
1323 struct r_throw_base <
1324 { <
1325 typedef r_throw_base base; <
1326 template <typename State> <
1327 static bool matches( parser_state &, State & ) <
1328 { <
1329 throw ExceptionType(); <
1330 } <
1331 }; <
1332 <
1333 /** <
1334 Specialized to ensure the error point is marked. <
1335 */ <
1336 template <> <
1337 struct r_throw_base<parse_error> <
1338 { <
1339 typedef r_throw_base type; <
1340 template <typename State> <
1341 static bool matches( parser_state & in, State & ) <
1342 { <
1343 throw parse_error(in, "parse error triggered by r_throw" ); <
1344 return false; <
1345 } <
1346 }; <
1347 <
1348 typedef r_throw_base<parse_error> r_throw; <
1349 <
1350 <
1351 /** <
1352 A list of "standard" error IDs, for use with the error_msg<> <
1353 template. <
1354 */ <
1355 enum StandardErrorIDs { <
1356 UnknownError = 0, <
1357 UnexpectedCharacter = 1, <
1358 UserIDs = 1000 /* client-side IDs should start here. */ <
1359 }; <
1360 template <int ErrorNumber = UnknownError> <
1361 struct error_msg <
1362 { <
1363 template <typename State> <
1364 static std::string message( parser_state &, State & ) <
1365 { <
1366 return "Unknown/unspecified parsing error"; <
1367 } <
1368 }; <
1369 <
1370 <
1371 /** <
1372 Specialization for UnexpectedCharacter errors. <
1373 */ <
1374 template <> <
1375 struct error_msg<UnexpectedCharacter> <
1376 { <
1377 template <typename State> <
1378 static std::string message( parser_state & ps, State & ) <
1379 { <
1380 std::string msg("Unexpected character '"); <
1381 msg.push_back(*ps.pos()); <
1382 msg.push_back('\''); <
1383 return msg; <
1384 } <
1385 }; <
1386 <
1387 /** <
1388 Similar to r_throw, this rule throws a parse_error <
1389 exception. The what() text of the exception is the text of <
1390 error_msg<ErrorNumber>, allowing one to specialize error_msg to <
1391 create custom error messages. The where() part of the exception <
1392 <
1393 <
1394 Design note: another alternative to solve this problem would be <
1395 to use a static map<int,string>, but then we'd need to provide <
1396 .cpp files along with the .hpp files for this lib, and i don't <
1397 wanna do that. <
1398 */ <
1399 template <int ErrorNumber> <
1400 struct r_error <
1401 { <
1402 typedef r_error type; <
1403 template <typename State> <
1404 static bool matches( parser_state & in, State & st ) <
1405 { <
1406 throw parse_error( in, error_msg<ErrorNumber>::message(in,st) ); <
1407 return false; <
1408 } <
1409 }; <
1410 <
1411 1348
1412 /** 1349 /**
1413 Matches any characters in the range [Min..Max] 1350 Matches any characters in the range [Min..Max]
1414 */ 1351 */
1415 template< int Min, int Max > 1352 template< int Min, int Max >
366 hidden lines
1782 Matches the combination \r\n or a single \n. 1719 Matches the combination \r\n or a single \n.
1783 */ 1720 */
1784 struct r_eol : 1721 struct r_eol :
1785 r_or< rule_list< r_chseq< char_list<'\r','\n'> >, r_ch<'\n'> > > 1722 r_or< rule_list< r_chseq< char_list<'\r','\n'> >, r_ch<'\n'> > >
1786 {}; 1723 {};
1787 <
1788 <
1789 /** Parser for C++-style comments. */ <
1790 struct r_comment_cpp : <
1791 r_and< rule_list< <
1792 r_chseq< char_list<'/','/'> >, <
1793 r_star< r_notch<'\n'> >, <
1794 r_eol <
1795 > > <
1796 {}; <
1797 <
1798 template <typename R> <
1799 struct r_throw_if : r_and< rule_list< <
1800 r_and< rule_list<R,r_throw> >, <
1801 r_success > > <
1802 {}; <
1803 <
1804 namespace Detail <
1805 { <
1806 /** Inner part of a C++ comment. Consumes until '*' followed by '/'. */ <
1807 struct r_comment_c_inner : r_and< rule_list< <
1808 r_star< r_and< rule_list< <
1809 r_notat< r_chseq< char_list<'*','/'> > >, <
1810 r_advance<1> <
1811 > > >, <
1812 r_throw_if< r_eof > <
1813 > > <
1814 {}; <
1815 } <
1816 /** Parser for C-style comments. */ <
1817 struct r_comment_c : <
1818 r_and< rule_list< <
1819 r_chseq< char_list<'/','*'> >, <
1820 Detail::r_comment_c_inner, <
1821 r_chseq< char_list<'*','/'> > <
1822 > > <
1823 {}; <
1824 <
1825 1724
1826 1725
1827 namespace Detail { 1726 namespace Detail {
1828 using namespace Ps; 1727 using namespace Ps;
1829 struct line_col_state 1728 struct line_col_state
52 hidden lines
1882 1781
1883 1782
1884 } // namespace 1783 } // namespace
1885 1784
1886 #endif // s11n_net_PARSEPP_HPP_INCLUDED 1785 #endif // s11n_net_PARSEPP_HPP_INCLUDED

Added parsepp_cish.hpp

Old () New (a8228ff6b11adbb2)
> 1 #ifndef s11n_net_PARSEPP_CISH_HPP_INCLUDED
> 2 #define s11n_net_PARSEPP_CISH_HPP_INCLUDED
> 3
> 4 #include "parsepp.hpp"
> 5 #include "parsepp_typelist.hpp"
> 6 #include "parsepp_err.hpp"
> 7 namespace Ps {
> 8
> 9
> 10 /** Parser for C++-style comments. */
> 11 struct r_comment_cpp :
> 12 r_and< rule_list<
> 13 r_chseq< char_list<'/','/'> >,
> 14 r_star< r_notch<'\n'> >,
> 15 r_eol
> 16 > >
> 17 {};
> 18
> 19 namespace Detail
> 20 {
> 21 /** Inner part of a C++ comment. Consumes until '*' followed by '/'. */
> 22 struct r_comment_c_inner : r_and< rule_list<
> 23 r_star< r_and< rule_list<
> 24 r_notat< r_chseq< char_list<'*','/'> > >,
> 25 r_advance<1>
> 26 > > >,
> 27 r_error_if< r_eof, Errors::UnclosedComment >
> 28 > >
> 29 {};
> 30 }
> 31 /** Parser for C-style comments. */
> 32 struct r_comment_c :
> 33 r_and< rule_list<
> 34 r_chseq< char_list<'/','*'> >,
> 35 Detail::r_comment_c_inner,
> 36 r_chseq< char_list<'*','/'> >
> 37 > >
> 38 {};
> 39
> 40 } // namespace
> 41
> 42 #endif // s11n_net_PARSEPP_CISH_HPP_INCLUDED

Added parsepp_err.hpp

Old () New (c8dbcf42b9e25c15)
> 1 #ifndef s11n_net_PARSEPP_ERRORS_HPP_INCLUDED
> 2 #define s11n_net_PARSEPP_ERRORS_HPP_INCLUDED
> 3 /************************************************************************
> 4 This file contains supplemental error-handling-related code for the
> 5 parsepp toolkit.
> 6
> 7 Author: Stephan Beal (http://wanderinghorse.net/home/stephan)
> 8 License: Public Domain
> 9 ************************************************************************/
> 10 #include <map>
> 11 #include <string>
> 12 #include <cassert>
> 13 #include <iostream>
> 14 #include <sstream>
> 15 #include <stdexcept>
> 16 #include <vector>
> 17 #include <list>
> 18 #include <set>
> 19
> 20 #include "parsepp.hpp"
> 21 #include "parsepp_typelist.hpp"
> 22
> 23 namespace Ps {
> 24 /**
> 25 Consumes no input and throws an ExceptionType.
> 26 */
> 27 template <typename ExceptionType>
> 28 struct r_throw_base
> 29 {
> 30 typedef r_throw_base base;
> 31 template <typename State>
> 32 static bool matches( parser_state &, State & )
> 33 {
> 34 throw ExceptionType();
> 35 }
> 36 };
> 37
> 38 /**
> 39 Specialized to ensure the error point is marked.
> 40 */
> 41 template <>
> 42 struct r_throw_base<parse_error>
> 43 {
> 44 typedef r_throw_base type;
> 45 template <typename State>
> 46 static bool matches( parser_state & in, State & )
> 47 {
> 48 throw parse_error(in, "parse error triggered by r_throw" );
> 49 return false;
> 50 }
> 51 };
> 52
> 53 typedef r_throw_base<parse_error> r_throw;
> 54
> 55
> 56 /**
> 57 A list of "standard" error IDs, for use with the error_msg<>
> 58 template. Client-side parsers which use r_error, a_error,
> 59 r_error_if, r_error_unless and related functions may specialize
> 60 error_msg<int> with their own error number values. Clients
> 61 should not use values less than UserErrorBegin. The values need
> 62 to be unique within the context of an application (including
> 63 its libraries). Thus two parsers may use the same unique IDs if
> 64 there is no chance that they will end up in the same library.
> 65 */
> 66 struct Errors
> 67 {
> 68 enum IDs {
> 69 Unknown = 0,
> 70 UnexpectedCharacter,
> 71 IllegalCharacter,
> 72 UnexpectedEOF,
> 73 UnclosedComment,
> 74 UserErrorBegin = 1000 /* client-side IDs should start here. */
> 75 };
> 76 };
> 77 template <int ErrorNumber = Errors::Unknown>
> 78 struct error_msg
> 79 {
> 80 template <typename State>
> 81 static std::string message( parser_state &, State & )
> 82 {
> 83 return "Unknown (or unspecified) parsing error";
> 84 }
> 85 };
> 86
> 87 /**
> 88 Specialization for UnexpectedCharacter errors.
> 89 */
> 90 template <>
> 91 struct error_msg<Errors::UnexpectedCharacter>
> 92 {
> 93 template <typename State>
> 94 static std::string message( parser_state & ps, State & )
> 95 {
> 96 std::string msg("Unexpected character '");
> 97 if( ps.eof() ) msg.append("EOF");
> 98 else msg.push_back(*ps.pos());
> 99 msg.push_back('\'');
> 100 return msg;
> 101 }
> 102 };
> 103 template <>
> 104 struct error_msg<Errors::UnexpectedEOF>
> 105 {
> 106 template <typename State>
> 107 static std::string message( parser_state & ps, State & )
> 108 {
> 109 return "Unexpected end of input";
> 110 }
> 111 };
> 112 template <>
> 113 struct error_msg<Errors::UnclosedComment>
> 114 {
> 115 template <typename State>
> 116 static std::string message( parser_state & ps, State & )
> 117 {
> 118 return "Reached EOF inside of a multi-line comment";
> 119 }
> 120 };
> 121 /**
> 122 Specialization for IllegalCharacter errors.
> 123 */
> 124 template <>
> 125 struct error_msg<Errors::IllegalCharacter>
> 126 {
> 127 template <typename State>
> 128 static std::string message( parser_state & ps, State & )
> 129 {
> 130 std::string msg("Illegal character '");
> 131 msg.push_back(*ps.pos());
> 132 msg.push_back('\'');
> 133 return msg;
> 134 }
> 135 };
> 136
> 137 /**
> 138 Similar to r_throw, this rule throws a parse_error
> 139 exception. The what() text of the exception is the text of
> 140 error_msg<ErrorNumber>, allowing one to specialize error_msg to
> 141 create custom error messages. The where() part of the exception
> 142
> 143 Design note: another alternative to solve this problem would be
> 144 to use a static map<int,string>, but then we'd need to provide
> 145 .cpp files along with the .hpp files for this lib, and i don't
> 146 wanna do that.
> 147 */
> 148 template <int ErrorNumber = Errors::Unknown>
> 149 struct r_error
> 150 {
> 151 typedef r_error type;
> 152 template <typename State>
> 153 static bool matches( parser_state & in, State & st )
> 154 {
> 155 throw parse_error( in, error_msg<ErrorNumber>::message(in,st) );
> 156 return false;
> 157 }
> 158 };
> 159
> 160 /**
> 161 If Rule matches then the effect is the same as calling
> 162 r_error<ErrorNumber>::matches(), otherwise it returns
> 163 true. If Rule matches then the input interator is moved
> 164 back to the point it was at before the match, to allow
> 165 more accurate error reporting.
> 166 */
> 167 template <typename Rule, int ErrorNumber = Errors::Unknown>
> 168 struct r_error_if
> 169 {
> 170 typedef r_error_if type;
> 171 template <typename State>
> 172 static bool matches( parser_state & in, State & st )
> 173 {
> 174 parse_iterator pos(in.pos());
> 175 if( Rule::matches(in,st) )
> 176 {
> 177 in.pos(pos);
> 178 r_error<ErrorNumber>::matches(in,st);
> 179 }
> 180 return true;
> 181 }
> 182 };
> 183
> 184 template <typename R>
> 185 struct r_throw_if : r_and< rule_list<
> 186 r_and< rule_list<R,r_throw> >,
> 187 r_success > >
> 188 {};
> 189
> 190
> 191
> 192 /**
> 193 The evil twin of r_error_if, this Rule throws an error
> 194 if Rule does NOT match.
> 195 */
> 196 template <typename Rule, int ErrorNumber = Errors::Unknown>
> 197 struct r_error_unless
> 198 {
> 199 typedef r_error_unless type;
> 200 template <typename State>
> 201 static bool matches( parser_state & in, State & st )
> 202 {
> 203 parse_iterator pos(in.pos());
> 204 if( ! Rule::matches(in,st) )
> 205 {
> 206 in.pos(pos);
> 207 r_error<ErrorNumber>::matches(in,st);
> 208 }
> 209 return true;
> 210 }
> 211 };
> 212
> 213 /**
> 214 This is an Action form of the r_error Rule, mainly for use with
> 215 r_ifelse and similar Rules.
> 216 */
> 217 template <int ErrorNumber>
> 218 struct a_error
> 219 {
> 220 typedef a_error type;
> 221 template <typename State>
> 222 static void matched( parser_state & in, std::string const &, State & st )
> 223 {
> 224 throw parse_error( in, error_msg<ErrorNumber>::message(in,st) );
> 225 }
> 226 };
> 227
> 228 } // namespace
> 229
> 230 #endif // s11n_net_PARSEPP_ERRORS_HPP_INCLUDED

Changes to parsepp_url.hpp

Old (23247e29208cbda2) New (7be932681cc889c1)
1 #ifndef S11N_NET_PARSEPP_URL_H_INCLUDED 1 #ifndef S11N_NET_PARSEPP_URL_H_INCLUDED
2 #define S11N_NET_PARSEPP_URL_H_INCLUDED 1 2 #define S11N_NET_PARSEPP_URL_H_INCLUDED 1
3 3
4 #include "parsepp.hpp" 4 #include "parsepp.hpp"
> 5 #include "parsepp_err.hpp"
5 #include <map> 6 #include <map>
6 #include <string> 7 #include <string>
7 #include <sstream> 8 #include <sstream>
8 9
9 namespace Ps { 10 namespace Ps {
23 hidden lines
33 such that %XX (where XX is a hexidecimal value of a character) get 34 such that %XX (where XX is a hexidecimal value of a character) get
34 transformed into their ASCII value (note that results are undefined 35 transformed into their ASCII value (note that results are undefined
35 here with characters >127). 36 here with characters >127).
36 37
37 */ 38 */
38 namespace url { | 39 namespace url {
| 40 enum ParseErrors {
| 41 ErrorStart = Ps::Errors::UserErrorBegin + 1,
| 42 ErrorNoPort,
| 43 ErrorInvalidHost
| 44 };
| 45 }
| 46
| 47 template <>
| 48 struct error_msg<url::ErrorNoPort>
| 49 {
| 50 template <typename State>
| 51 static std::string message( parser_state &, State & )
| 52 {
| 53 return "Port number missing after ':'";
| 54 }
| 55 };
| 56
| 57 template <>
| 58 struct error_msg<url::ErrorInvalidHost>
| 59 {
| 60 template <typename State>
| 61 static std::string message( parser_state &ps, State & )
| 62 {
| 63 return "Invalid host name at: "+std::string(ps.begin().iter(),ps.maxpos().iter());
| 64 }
| 65 };
| 66
| 67 } // namespace Ps
| 68
| 69 namespace Ps { namespace url {
39 using namespace Ps; 70 using namespace Ps;
40 #define RL rule_list 71 #define RL rule_list
41 #define CL char_list 72 #define CL char_list
42 /** 73 /**
43 A holding buffer for parsing key-value pairs. 74 A holding buffer for parsing key-value pairs.
44 */ 75 */
45 struct url_state 76 struct url_state
46 { 77 {
47 typedef std::map<std::string,std::string> kvpmap; 78 typedef std::map<std::string,std::string> kvpmap;
48 kvpmap params; // key/value pairs for GET params | 79 /**
49 kvpmap parts; // key/value pairs for various parts of URL | 80 Key/value pairs for GET params. Parsed URLs will
50 std::string key; // buffer for the last-read key. | 81 have their decoded GET arguments put here.
| 82 */
| 83 kvpmap params;
| 84 /**
| 85 Key/value pairs for various parts of URL. The standard keys include:
| 86
| 87 - scheme (e.g. http, ftp, file)
| 88 - host (e.g. foo.com)
| 89 - path (e.g. /index.html)
| 90 - port (e.g. 8080)
| 91 - user (e.g. myloginname)
| 92 - password (unencrypted plain text which may not contain an '@' character)
| 93
| 94 Those keys match to the parts of this expression:
| 95
| 96 scheme://[user[:[password]]@host[:port][path]
| 97
| 98 Any GET parameters at the end a parsed URL will be put in
| 99 this->params.
| 100
| 101 URL parser implementations differ on how they handle the
| 102 leading slash of a URL path, and whether or not that slash is
| 103 included in the path. If i'm not mistaken, WWW standards say
| 104 it is *not* part of the path. Most implementations, if i'm
| 105 also not mistaken, treat a leading slash as part of the path
| 106 (this is also how, e.g. Apache, expects a GET path,
| 107 AFAIK). This implementation treats the leading slash as
| 108 significant and keeps it.
| 109 */
| 110 kvpmap parts;
| 111 /**
| 112 This is a temporary buffer used by the parser. It holds the
| 113 most recently parsed GET parameter key.
| 114 */
| 115 std::string key;
| 116 /**
| 117 Clears the state.
| 118 */
51 void clear() 119 void clear()
52 { 120 {
53 this->params.clear(); 121 this->params.clear();
54 this->parts.clear(); 122 this->parts.clear();
55 this->key.clear(); 123 this->key.clear();
56 } 124 }
57 }; 125 };
58 126
59 /** | 127 namespace Detail {
60 Sets the url_state's current key value. | 128
61 */ | 129 /**
62 struct a_kvp_set_key | 130 Sets the url_state's current key value.
63 { | 131 */
64 static void matched( Ps::parser_state &, const std::string & m, url_state & s ) | 132 struct a_kvp_set_key
65 { | 133 {
66 //COUT << "set_key: " << m << '\n'; | 134 static void matched( Ps::parser_state &, const std::string & m, url_state & s )
67 s.params[s.key = m] = ""; // ensure an empty value in case the val field is empty. | 135 {
68 } | 136 //COUT << "set_key: " << m << '\n';
69 }; | 137 s.params[s.key = m] = ""; // ensure an empty value in case the val field is empty.
70 | 138 }
71 | 139 };
72 /** | 140
73 Internal helper. If IsEsc then all matches are assumed to be | 141
74 %XX escape sequences, otherwise they are assumed to be unescaped | 142 /**
75 text. | 143 Internal helper. If IsEsc then all matches are assumed to be
76 */ | 144 %XX escape sequences, otherwise they are assumed to be unescaped
77 template <bool IsEsc> | 145 text.
78 struct a_esc_append | 146 */
79 { | 147 template <bool IsEsc>
80 static void matched( Ps::parser_state &, const std::string & m, std::string & s ) | 148 struct a_esc_append
| 149 {
| 150 static void matched( Ps::parser_state &, const std::string & m, std::string & s )
| 151 {
| 152 //CERR << "a_esc_append<"<<IsEsc<<">:["<<m<<"]\n";
| 153 if( IsEsc )
| 154 {
| 155 std::istringstream is(m);
| 156 short x = '?';
| 157 is >> std::hex >> x; // weird: reading directly into a char-type x misses the std::hex handling
| 158 s.push_back(std::string::value_type(x));
| 159 }
| 160 else
| 161 {
| 162 s += m;
| 163 }
| 164 }
| 165 };
| 166
| 167
| 168 /**
| 169 Action to set a value. Uses the last-read key as the lookup key.
| 170
| 171 The value is automatically un-percent-escaped (e.g. %20 is converted
| 172 to a space).
| 173 */
| 174 struct a_kvp_set_val
81 { 175 {
82 //CERR << "a_esc_append<"<<IsEsc<<">:["<<m<<"]\n"; | 176 static void matched( Ps::parser_state &, const std::string & m, url_state & s )
83 if( IsEsc ) |
84 { 177 {
85 std::istringstream is(m); | 178 //COUT << "a_kvp_set_val: " << s.key << '='<<m << '\n';
86 short x = '?'; | 179 if( ! m.empty() )
87 is >> std::hex >> x; // weird: reading directly into a char-type x misses the std::hex handling | 180 {
88 s.push_back(std::string::value_type(x)); | 181 std::string unesc;
| 182 typedef r_ch<'%'> PCT;
| 183 typedef r_repeat<r_xdigit,2> DIGITS;
| 184 typedef r_and<RL<PCT,DIGITS> > ESC;
| 185 typedef r_and<RL<PCT,
| 186 r_action< DIGITS, a_esc_append<true> >
| 187 > > AESC;
| 188 typedef r_action< r_plus< r_and<RL<r_notat<ESC>,r_advance<1> > > >, a_esc_append<false> > NESC;
| 189 typedef r_plus< r_or< RL< AESC, NESC > > > START;
| 190 Ps::parser_state ps(m);
| 191 Ps::parse<START>(ps, unesc);
| 192 s.params[s.key] = unesc;
| 193 }
| 194 else
| 195 {
| 196 s.params[s.key] = m;
| 197 }
| 198 s.key = "";
89 } 199 }
90 else | 200 };
| 201
| 202 /**
| 203 Matches on a key-name field (an Identifier-style string). On a match
| 204 It activated a_kvp_set_key.
| 205 */
| 206 struct r_kvp_key
| 207 : r_action< r_identifier, a_kvp_set_key >
| 208 {};
| 209
| 210 /**
| 211 Matches any char up to a '&'. Calls a_kvp_set_val on match.
| 212 */
| 213 struct r_kvp_val
| 214 : r_action< r_star< r_notch< '&' > >, a_kvp_set_val >
| 215 {};
| 216
| 217 /** Reads a single key/val pair. */
| 218 struct r_kvp
| 219 : r_and< RL<
| 220 r_kvp_key,
| 221 r_ch<'='>,
| 222 r_opt<r_kvp_val>
| 223 > >
| 224 {};
| 225
| 226 /** Reads a sequence of key/val pairs, delimited by '&' (http-style). */
| 227 struct r_kvps
| 228 : r_and< RL< r_kvp, r_star< r_and< RL< r_ch<'&'>, r_opt<r_kvp> > > > > >
| 229 {};
| 230
| 231 struct a_port
| 232 {
| 233 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
91 { 234 {
92 s += m; | 235 //COUT << "port=" << v <<'\n';
| 236 s.parts["port"] = v;
93 } 237 }
94 } | 238 };
95 }; | 239
96 | 240 struct r_port
97 | 241 : r_and< RL< r_ch<':'>,
98 /** | 242 r_error_unless< r_action< r_plus<r_digit>, a_port >, ErrorNoPort >
99 Action to set a value. Uses the last-read key as the lookup key. | 243 > >
100 | 244 {};
101 The value is automatically un-percent-escaped (e.g. %20 is converted | 245
102 to a space). | 246 struct a_host
103 */ |
104 struct a_kvp_set_val |
105 { |
106 static void matched( Ps::parser_state &, const std::string & m, url_state & s ) |
107 { |
108 //COUT << "a_kvp_set_val: " << s.key << '='<<m << '\n'; |
109 if( ! m.empty() ) |
110 { |
111 std::string unesc; |
112 typedef r_ch<'%'> PCT; |
113 typedef r_repeat<r_xdigit,2> DIGITS; |
114 typedef r_and<RL<PCT,DIGITS> > ESC; |
115 typedef r_and<RL<PCT, |
116 r_action< DIGITS, a_esc_append<true> > |
117 > > AESC; |
118 typedef r_action< r_plus< r_and<RL<r_notat<ESC>,r_advance<1> > > >, a_esc_append<false> > NESC; |
119 typedef r_plus< r_or< RL< AESC, NESC > > > START; |
120 Ps::parser_state ps(m); |
121 Ps::parse<START>(ps, unesc); |
122 s.params[s.key] = unesc; |
123 } |
124 else |
125 { |
126 s.params[s.key] = m; |
127 } |
128 s.key = ""; |
129 } |
130 }; |
131 |
132 /** |
133 Matches on a key-name field (an Identifier-style string). On a match |
134 It activated a_kvp_set_key. |
135 */ |
136 struct r_kvp_key |
137 : r_action< r_identifier, a_kvp_set_key > |
138 {}; |
139 |
140 /** |
141 Matches any char up to a '&'. Calls a_kvp_set_val on match. |
142 */ |
143 struct r_kvp_val |
144 : r_action< r_star< r_notch< '&' > >, a_kvp_set_val > |
145 {}; |
146 |
147 /** Reads a single key/val pair. */ |
148 struct r_kvp |
149 : r_and< RL< |
150 r_kvp_key, |
151 r_ch<'='>, |
152 r_opt<r_kvp_val> |
153 > > |
154 {}; |
155 |
156 /** Reads a sequence of key/val pairs, delimited by '&' (http-style). */ |
157 struct r_kvps |
158 : r_and< RL< r_kvp, r_star< r_and< RL< r_ch<'&'>, r_opt<r_kvp> > > > > > |
159 {}; |
160 |
161 struct a_port |
162 { |
163 static void matched( Ps::parser_state &, const std::string & v, url_state & s ) |
164 { 247 {
165 //COUT << "port=" << v <<'\n'; | 248 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
166 s.parts["port"] = v; | 249 {
167 } | 250 //COUT << "host=" << v <<'\n';
168 }; | 251 s.parts["host"] = v;
169 | 252 }
170 struct r_port | 253 };
171 : r_and< RL< r_ch<':'>, r_action< r_plus<r_digit>, a_port > > > | 254 struct r_hostname
172 {}; | 255 : r_plus< r_and< RL< r_plus<
173 |
174 struct a_host |
175 { |
176 static void matched( Ps::parser_state &, const std::string & v, url_state & s ) |
177 { |
178 //COUT << "host=" << v <<'\n'; |
179 s.parts["host"] = v; |
180 } |
181 }; |
182 |
183 struct r_hostname |
184 : r_plus< r_and< RL< r_plus< |
185 r_and< RL< r_or< RL< r_alnum, r_ch<'-'>, r_ch<'_'> > >, r_opt< r_ch<'.'> > > > 256 r_and< RL< r_or< RL< r_alnum, r_ch<'-'>, r_ch<'_'> > >, r_opt< r_ch<'.'> > > >
186 > > > > 257 > > > >
187 /* FIXME: matches with a trailing dot (e.g. foo.com.) */ | 258 /* FIXME: matches with a trailing dot (e.g. foo.com.) */
188 /* FIXME: matches many bogus hostnames, like "----" or "foo.c-" */ | 259 /* FIXME: matches many bogus hostnames, like "----" or "foo.c-" */
189 {}; | 260 {};
190 261
191 struct r_host | 262 struct r_host
192 : r_action< r_hostname, a_host > | 263 : r_action< r_hostname, a_host >
193 {}; | 264 {};
194 265
195 struct a_scheme | 266 struct a_scheme
196 { | 267 {
197 static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | 268 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
| 269 {
| 270 std::string val = v.substr( 0, v.find(':') );
| 271 //COUT << "scheme=" << val <<'\n';
| 272 s.parts["scheme"] = val;
| 273 }
| 274 };
| 275
| 276 struct r_scheme
| 277 : r_and< RL< r_action< r_identifier, a_scheme >, r_ch<':'>, r_plus< r_ch<'/'> > > >
| 278 {};
| 279
| 280 struct a_path
198 { 281 {
199 std::string val = v.substr( 0, v.find(':') ); | 282 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
200 //COUT << "scheme=" << val <<'\n'; | 283 {
201 s.parts["scheme"] = val; | 284 //COUT << "path=" << v <<'\n';
202 } | 285 s.parts["path"] = v;
203 }; | 286 }
| 287 };
204 288
205 struct r_scheme | 289 struct r_params
206 : r_and< RL< r_action< r_identifier, a_scheme >, r_ch<':'>, r_plus< r_ch<'/'> > > > | 290 : r_and< RL< r_ch<'?'>, r_kvps > >
207 {}; | 291 {};
208 292
209 struct a_path | 293 struct r_path
210 { | 294 : r_action< r_star< r_notch<'?'> >, a_path >
211 static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | 295 {};
| 296
| 297 struct a_username
212 { 298 {
213 //COUT << "path=" << v <<'\n'; | 299 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
214 s.parts["path"] = v; | 300 {
215 } | 301 //COUT << "user=" << v <<'\n';
216 }; | 302 s.parts["user"] = v;
| 303 }
| 304 };
217 305
218 struct r_params | 306 struct r_user
219 : r_and< RL< r_ch<'?'>, r_kvps > > | 307 : r_plus<r_identifier>
220 {}; | 308 {};
221 309
222 struct r_path | 310 struct a_password
223 : r_action< r_star< r_notch<'?'> >, a_path > |
224 {}; |
225 |
226 struct a_username |
227 { |
228 static void matched( Ps::parser_state &, const std::string & v, url_state & s ) |
229 { 311 {
230 //COUT << "user=" << v <<'\n'; | 312 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
231 s.parts["user"] = v; | 313 {
232 } | 314 //COUT << "password=" << v <<'\n';
233 }; | 315 s.parts["password"] = v;
| 316 }
| 317 };
234 318
235 struct r_user | 319 struct r_password
236 : r_plus<r_alnum> | 320 : r_action< r_plus< r_notch<'@'> >, a_password >
237 {}; | 321 {};
238 322
239 struct a_password | 323 struct a_login
240 { |
241 static void matched( Ps::parser_state &, const std::string & v, url_state & s ) |
242 { 324 {
243 //COUT << "password=" << v <<'\n'; | 325 static void matched( Ps::parser_state &, const std::string & v, url_state & s )
244 s.parts["password"] = v; |
245 } |
246 }; |
247 |
248 struct r_password |
249 : r_action< r_plus< r_notch<'@'> >, a_password > |
250 {}; |
251 |
252 struct a_login |
253 { |
254 static void matched( Ps::parser_state &, const std::string & v, url_state & s ) |
255 { |
256 std::string val = v; |
257 if( val.empty() ) return; // can't happen? |
258 // chop trailing '@' (work around unfortunate parser design aspect) |
259 if( '@' == val[val.size()-1] ) val.resize(val.size()-1); |
260 std::string::size_type pos = val.find(':'); |
261 |
262 if( pos != std::string::npos ) |
263 { 326 {
264 s.parts["user"] = val.substr( 0, pos ); | 327 std::string val = v;
265 s.parts["password"] = val.substr( pos+1 ); | 328 if( val.empty() ) return; // can't happen?
266 //COUT << "user=" << s.parts["user"] <<'\n'; | 329 // chop trailing '@' (work around unfortunate parser design aspect)
267 //COUT << "password=" << s.parts["password"] <<'\n'; | 330 if( '@' == val[val.size()-1] ) val.resize(val.size()-1);
| 331 std::string::size_type pos = val.find(':');
| 332 if( pos != std::string::npos )
| 333 {
| 334 s.parts["user"] = val.substr( 0, pos );
| 335 s.parts["password"] = val.substr( pos+1 );
| 336 //COUT << "user=" << s.parts["user"] <<'\n';
| 337 //COUT << "password=" << s.parts["password"] <<'\n';
| 338 }
| 339 else
| 340 {
| 341 s.parts["user"] = val;
| 342 //COUT << "user=" << val <<'\n';
| 343 }
268 } 344 }
269 else | 345 };
270 { | 346
271 s.parts["user"] = val; | 347 /**
272 //COUT << "user=" << val <<'\n'; | 348 r_alnum+(:[^@]))?@
273 } | 349 */
274 } | 350 struct r_login
275 }; | 351 : r_action<
276 | 352 r_and< RL<
277 /** | 353 r_user,
278 r_alnum+(:[^@]))?@ | 354 r_opt< r_and<RL<r_ch<':'>,r_opt<r_password> > > >,
279 */ | 355 r_ch<'@'>
280 struct r_login | 356 > >,
281 : r_action< | 357 a_login>
282 r_and< RL< | 358 {};
283 r_user, | 359
284 r_opt< r_and<RL<r_ch<':'>,r_opt<r_password> > > >, | 360 struct r_filepath
285 r_ch<'@'> | 361 : r_plus< r_any >
286 > >, | 362 {};
287 a_login> | 363 } // namespace Detail
288 {}; |
289 |
290 struct r_filepath |
291 : r_plus< r_any > |
292 {}; |
293 364
294 /** 365 /**
295 File URLs need special handling, due to the varying conventions regarding 366 File URLs need special handling, due to the varying conventions regarding
296 the number of leading slashes. e.g. file:/etc/hosts vs file:///etc/hosts, 367 the number of leading slashes. e.g. file:/etc/hosts vs file:///etc/hosts,
297 the second is correct but many implementations allow the first form. 368 the second is correct but many implementations allow the first form.
298 <
299 seq< action< seq< string<'f','i','l','e'>, one<':'> >, a_scheme >, <
300 seq< opt< string<'/','/'> >, action< r_filepath, a_path > > > <
301 <
302 */ 369 */
303 struct r_fileurl : 370 struct r_fileurl :
304 r_and< RL< 371 r_and< RL<
305 r_action< r_and< RL< | 372 r_action< r_and< RL<
306 r_chseq<CL<'f','i','l','e'> >, | 373 r_chseq<CL<'f','i','l','e'> >,
307 r_ch<':'> | 374 r_ch<':'>
308 > >, a_scheme >, | 375 > >, Detail::a_scheme >,
309 r_notat< r_ch<':'> >, | 376 r_error_if< r_ch<':'>, Ps::Errors::IllegalCharacter >,
310 r_and<RL< r_opt< r_repeat< r_ch<'/'>,2> >, | 377 r_opt< r_repeat< r_ch<'/'>,2> >,
311 r_action< r_filepath, a_path > | 378 r_action< Detail::r_filepath, Detail::a_path >
312 > > 379 > >
> 380 {};
> 381
> 382 /**
> 383 Parses a more or less standard URL.
> 384 */
> 385 struct r_stdurl
> 386 : r_and< RL< Detail::r_scheme,
> 387 r_opt<Detail::r_login>,
> 388 r_error_unless< Detail::r_host, ErrorInvalidHost >,
> 389 r_opt<Detail::r_port>,
> 390 r_opt<Detail::r_path>,
> 391 r_opt<Detail::r_params>
313 > > 392 > >
314 {}; 393 {};
315 394
316 struct r_stdurl | 395 /**
317 : r_and< RL< r_scheme, | 396 A rule for parsing file:// and other:// URLs.
318 r_opt<r_login>, | 397 Use a url_state object as the parse state:
319 r_host, | 398
320 r_opt< r_and< RL< r_opt<r_port>, r_opt<r_path>, r_opt<r_params> > > > | 399 \code
321 > > | 400 url_state st;
322 {}; | 401 if( Ps::parse<Ps::url::r_url>( "http://s11n.net", st ) ) { ... }
| 402 \endcode
| 403
| 404 Be aware that some types of syntax errors cause exceptions to
| 405 be thrown. We only throw errors when:
| 406
| 407 - we can easily write the rules handling to do so.
| 408 - we can provide an exact error location and halfway meaningful message.
323 409
> 410 If you don't want exceptions then use parse_url() instead, which
> 411 swallows exceptions.
> 412 */
324 struct r_url 413 struct r_url
325 : r_or< RL< r_fileurl, r_stdurl > > 414 : r_or< RL< r_fileurl, r_stdurl > >
326 {}; 415 {};
327 416
328 #undef RL 417 #undef RL
329 #undef CL 418 #undef CL
330 419
> 420
331 /** 421 /**
332 Convenience function to parse the given URL and stuff the data 422 Convenience function to parse the given URL and stuff the data
333 into tgt. On success, tgt contains valid URL data. On error, 423 into tgt. On success, tgt contains valid URL data. On error,
334 tgt is not modified. 424 tgt is not modified.
> 425
> 426 This function silently swallows all exceptions and translates them
> 427 to a false return value. If you want more precise information about
> 428 certain types of parse errors, use Ps::parse<r_url>(src,tgt) instead,
> 429 as it will propagate the exception back to you.
335 */ 430 */
336 inline bool parse_url( std::string const & src, url_state & tgt ) 431 inline bool parse_url( std::string const & src, url_state & tgt )
337 { 432 {
338 url_state tmp; | 433 try
339 return Ps::parse< r_url >( src, tmp ) | 434 {
340 ? ((tgt=tmp),true) | 435 url_state tmp;
341 : false; | 436 return Ps::parse< r_url >( src, tmp )
| 437 ? ((tgt=tmp),true)
| 438 : false;
| 439 }
| 440 catch(...)
| 441 {
| 442 return false;
| 443 }
342 } 444 }
343 445
344 }} // namespaces 446 }} // namespaces
345 447
346 #endif // S11N_NET_PARSEPP_URL_H_INCLUDED 448 #endif // S11N_NET_PARSEPP_URL_H_INCLUDED

Changes to url.cpp

Old (0632db388b79a9f9) New (c056d13716177953)
1 #include "parsepp.hpp" 1 #include "parsepp.hpp"
2 2
3 #define CERR std::cerr << __FILE__ << ":" << std::dec << __LINE__ << " : " 3 #define CERR std::cerr << __FILE__ << ":" << std::dec << __LINE__ << " : "
4 #define COUT std::cout << __FILE__ << ":" << std::dec << __LINE__ << " : " 4 #define COUT std::cout << __FILE__ << ":" << std::dec << __LINE__ << " : "
5 #define DOUT if(0) COUT 5 #define DOUT if(0) COUT
14 hidden lines
20 { 20 {
21 for ( int arg = 0; arg < argc; ++arg ) { 21 for ( int arg = 0; arg < argc; ++arg ) {
22 if( (argc>1) && (0==arg) ) continue; 22 if( (argc>1) && (0==arg) ) continue;
23 line = (argc>1) ? argv[ arg ] : scr.c_str(); 23 line = (argc>1) ? argv[ arg ] : scr.c_str();
24 COUT << "Trying: " << line << '\n'; 24 COUT << "Trying: " << line << '\n';
25 if ( Ps::url::parse_url( line, state ) ) { | 25 if ( parse<Ps::url::r_url>( line, state ) ) {
26 COUT << "Parsed OK: " << line << '\n'; 26 COUT << "Parsed OK: " << line << '\n';
27 } 27 }
28 else { 28 else {
29 COUT << "Parsed FAILED: " << line << '\n'; 29 COUT << "Parsed FAILED: " << line << '\n';
30 return 1; 30 return 1;
21 hidden lines
52 CERR << "Parse exception:\n" << ex.what() << '\n'; 52 CERR << "Parse exception:\n" << ex.what() << '\n';
53 return 1; 53 return 1;
54 } 54 }
55 return 0; 55 return 0;
56 } 56 }