Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| SHA1 Hash: | ef13396f4628bcc4554b841ede8bb1de09d4a958 |
|---|---|
| Date: | 2008-09-18 17:57:24 |
| User: | stephan |
| Comment: | added a few routines and moved others into new files |
Tags And Properties
- branch=trunk inherited from [d45e7467f2]
- sym-trunk inherited from [d45e7467f2]
Changes
Changes to parsepp.hpp
| Old (11715259fcc08ebe) | New (f67998f8636f5364) | |||
|---|---|---|---|---|
| 1 | #ifndef s11n_net_PARSEPP_HPP_INCLUDED | 1 | #ifndef s11n_net_PARSEPP_HPP_INCLUDED | |
| 2 | #define s11n_net_PARSEPP_HPP_INCLUDED | 2 | #define s11n_net_PARSEPP_HPP_INCLUDED | |
| 3 | 3 | |||
| 4 | #include <map> | 4 | #include <map> | |
| 5 | #include <string> | 5 | #include <string> | |
| 1061 hidden lines | ||||
| 1067 | parse_iterator pos( in.pos() ); | 1067 | parse_iterator pos( in.pos() ); | |
| 1068 | Action::matched( in, std::string(start,pos), st ); | 1068 | Action::matched( in, std::string(start,pos), st ); | |
| 1069 | return true; | 1069 | return true; | |
| 1070 | } | 1070 | } | |
| 1071 | }; | 1071 | }; | |
| > | 1072 | /** | ||
| > | 1073 | Alias for r_action<Rule,Action> | ||
| > | 1074 | */ | ||
| > | 1075 | template <typename Rule, typename Action> | ||
| > | 1076 | struct r_if : r_action< Rule, Action > | ||
| > | 1077 | {}; | ||
| > | 1078 | |||
| > | 1079 | /** | ||
| > | 1080 | Similar to r_action, if Rule matches then ActionIf::matched() | ||
| > | 1081 | is called and true is returned, otherwise ActionElse::matched() | ||
| > | 1082 | is called and true is returned. | ||
| > | 1083 | |||
| > | 1084 | There is no guaranty as to whether this function consumes input | ||
| > | 1085 | - that depends entirely on Rule. If Rule does not match then no | ||
| > | 1086 | input is consumed (assuming Rule conforms to the rules). | ||
| > | 1087 | */ | ||
| > | 1088 | template <typename Rule, typename ActionIf, typename ActionElse> | ||
| > | 1089 | struct r_ifelse | ||
| > | 1090 | { | ||
| > | 1091 | typedef r_ifelse type; | ||
| > | 1092 | template <typename ClientState> | ||
| > | 1093 | inline static bool matches( parser_state & in, ClientState & st ) | ||
| > | 1094 | { | ||
| > | 1095 | if( ! r_action<Rule,ActionIf>::matches(in,st) ) | ||
| > | 1096 | { | ||
| > | 1097 | r_action<r_success,ActionElse>::matches(in,st); | ||
| > | 1098 | } | ||
| > | 1099 | return true; | ||
| > | 1100 | } | ||
| > | 1101 | }; | ||
| 1072 | 1102 | |||
| 1073 | 1103 | |||
| 1074 | namespace Detail { | 1104 | namespace Detail { | |
| 1075 | using namespace Ps; | 1105 | using namespace Ps; | |
| 1076 | /** Internal implementation detail of a_actions_impl<>. */ | 1106 | /** Internal implementation detail of a_actions_impl<>. */ | |
| 236 hidden lines | ||||
| 1313 | ++in; | 1343 | ++in; | |
| 1314 | } | 1344 | } | |
| 1315 | return sentry( i == Count ); | 1345 | return sentry( i == Count ); | |
| 1316 | } | 1346 | } | |
| 1317 | }; | 1347 | }; | |
| 1318 | < | |||
| 1319 | /** | < | ||
| 1320 | Consumes no input and throws an ExceptionType. | < | ||
| 1321 | */ | < | ||
| 1322 | template <typename ExceptionType> | < | ||
| 1323 | struct r_throw_base | < | ||
| 1324 | { | < | ||
| 1325 | typedef r_throw_base base; | < | ||
| 1326 | template <typename State> | < | ||
| 1327 | static bool matches( parser_state &, State & ) | < | ||
| 1328 | { | < | ||
| 1329 | throw ExceptionType(); | < | ||
| 1330 | } | < | ||
| 1331 | }; | < | ||
| 1332 | < | |||
| 1333 | /** | < | ||
| 1334 | Specialized to ensure the error point is marked. | < | ||
| 1335 | */ | < | ||
| 1336 | template <> | < | ||
| 1337 | struct r_throw_base<parse_error> | < | ||
| 1338 | { | < | ||
| 1339 | typedef r_throw_base type; | < | ||
| 1340 | template <typename State> | < | ||
| 1341 | static bool matches( parser_state & in, State & ) | < | ||
| 1342 | { | < | ||
| 1343 | throw parse_error(in, "parse error triggered by r_throw" ); | < | ||
| 1344 | return false; | < | ||
| 1345 | } | < | ||
| 1346 | }; | < | ||
| 1347 | < | |||
| 1348 | typedef r_throw_base<parse_error> r_throw; | < | ||
| 1349 | < | |||
| 1350 | < | |||
| 1351 | /** | < | ||
| 1352 | A list of "standard" error IDs, for use with the error_msg<> | < | ||
| 1353 | template. | < | ||
| 1354 | */ | < | ||
| 1355 | enum StandardErrorIDs { | < | ||
| 1356 | UnknownError = 0, | < | ||
| 1357 | UnexpectedCharacter = 1, | < | ||
| 1358 | UserIDs = 1000 /* client-side IDs should start here. */ | < | ||
| 1359 | }; | < | ||
| 1360 | template <int ErrorNumber = UnknownError> | < | ||
| 1361 | struct error_msg | < | ||
| 1362 | { | < | ||
| 1363 | template <typename State> | < | ||
| 1364 | static std::string message( parser_state &, State & ) | < | ||
| 1365 | { | < | ||
| 1366 | return "Unknown/unspecified parsing error"; | < | ||
| 1367 | } | < | ||
| 1368 | }; | < | ||
| 1369 | < | |||
| 1370 | < | |||
| 1371 | /** | < | ||
| 1372 | Specialization for UnexpectedCharacter errors. | < | ||
| 1373 | */ | < | ||
| 1374 | template <> | < | ||
| 1375 | struct error_msg<UnexpectedCharacter> | < | ||
| 1376 | { | < | ||
| 1377 | template <typename State> | < | ||
| 1378 | static std::string message( parser_state & ps, State & ) | < | ||
| 1379 | { | < | ||
| 1380 | std::string msg("Unexpected character '"); | < | ||
| 1381 | msg.push_back(*ps.pos()); | < | ||
| 1382 | msg.push_back('\''); | < | ||
| 1383 | return msg; | < | ||
| 1384 | } | < | ||
| 1385 | }; | < | ||
| 1386 | < | |||
| 1387 | /** | < | ||
| 1388 | Similar to r_throw, this rule throws a parse_error | < | ||
| 1389 | exception. The what() text of the exception is the text of | < | ||
| 1390 | error_msg<ErrorNumber>, allowing one to specialize error_msg to | < | ||
| 1391 | create custom error messages. The where() part of the exception | < | ||
| 1392 | < | |||
| 1393 | < | |||
| 1394 | Design note: another alternative to solve this problem would be | < | ||
| 1395 | to use a static map<int,string>, but then we'd need to provide | < | ||
| 1396 | .cpp files along with the .hpp files for this lib, and i don't | < | ||
| 1397 | wanna do that. | < | ||
| 1398 | */ | < | ||
| 1399 | template <int ErrorNumber> | < | ||
| 1400 | struct r_error | < | ||
| 1401 | { | < | ||
| 1402 | typedef r_error type; | < | ||
| 1403 | template <typename State> | < | ||
| 1404 | static bool matches( parser_state & in, State & st ) | < | ||
| 1405 | { | < | ||
| 1406 | throw parse_error( in, error_msg<ErrorNumber>::message(in,st) ); | < | ||
| 1407 | return false; | < | ||
| 1408 | } | < | ||
| 1409 | }; | < | ||
| 1410 | < | |||
| 1411 | 1348 | |||
| 1412 | /** | 1349 | /** | |
| 1413 | Matches any characters in the range [Min..Max] | 1350 | Matches any characters in the range [Min..Max] | |
| 1414 | */ | 1351 | */ | |
| 1415 | template< int Min, int Max > | 1352 | template< int Min, int Max > | |
| 366 hidden lines | ||||
| 1782 | Matches the combination \r\n or a single \n. | 1719 | Matches the combination \r\n or a single \n. | |
| 1783 | */ | 1720 | */ | |
| 1784 | struct r_eol : | 1721 | struct r_eol : | |
| 1785 | r_or< rule_list< r_chseq< char_list<'\r','\n'> >, r_ch<'\n'> > > | 1722 | r_or< rule_list< r_chseq< char_list<'\r','\n'> >, r_ch<'\n'> > > | |
| 1786 | {}; | 1723 | {}; | |
| 1787 | < | |||
| 1788 | < | |||
| 1789 | /** Parser for C++-style comments. */ | < | ||
| 1790 | struct r_comment_cpp : | < | ||
| 1791 | r_and< rule_list< | < | ||
| 1792 | r_chseq< char_list<'/','/'> >, | < | ||
| 1793 | r_star< r_notch<'\n'> >, | < | ||
| 1794 | r_eol | < | ||
| 1795 | > > | < | ||
| 1796 | {}; | < | ||
| 1797 | < | |||
| 1798 | template <typename R> | < | ||
| 1799 | struct r_throw_if : r_and< rule_list< | < | ||
| 1800 | r_and< rule_list<R,r_throw> >, | < | ||
| 1801 | r_success > > | < | ||
| 1802 | {}; | < | ||
| 1803 | < | |||
| 1804 | namespace Detail | < | ||
| 1805 | { | < | ||
| 1806 | /** Inner part of a C++ comment. Consumes until '*' followed by '/'. */ | < | ||
| 1807 | struct r_comment_c_inner : r_and< rule_list< | < | ||
| 1808 | r_star< r_and< rule_list< | < | ||
| 1809 | r_notat< r_chseq< char_list<'*','/'> > >, | < | ||
| 1810 | r_advance<1> | < | ||
| 1811 | > > >, | < | ||
| 1812 | r_throw_if< r_eof > | < | ||
| 1813 | > > | < | ||
| 1814 | {}; | < | ||
| 1815 | } | < | ||
| 1816 | /** Parser for C-style comments. */ | < | ||
| 1817 | struct r_comment_c : | < | ||
| 1818 | r_and< rule_list< | < | ||
| 1819 | r_chseq< char_list<'/','*'> >, | < | ||
| 1820 | Detail::r_comment_c_inner, | < | ||
| 1821 | r_chseq< char_list<'*','/'> > | < | ||
| 1822 | > > | < | ||
| 1823 | {}; | < | ||
| 1824 | < | |||
| 1825 | 1724 | |||
| 1826 | 1725 | |||
| 1827 | namespace Detail { | 1726 | namespace Detail { | |
| 1828 | using namespace Ps; | 1727 | using namespace Ps; | |
| 1829 | struct line_col_state | 1728 | struct line_col_state | |
| 52 hidden lines | ||||
| 1882 | 1781 | |||
| 1883 | 1782 | |||
| 1884 | } // namespace | 1783 | } // namespace | |
| 1885 | 1784 | |||
| 1886 | #endif // s11n_net_PARSEPP_HPP_INCLUDED | 1785 | #endif // s11n_net_PARSEPP_HPP_INCLUDED | |
Added parsepp_cish.hpp
| Old () | New (a8228ff6b11adbb2) | |||
|---|---|---|---|---|
| > | 1 | #ifndef s11n_net_PARSEPP_CISH_HPP_INCLUDED | ||
| > | 2 | #define s11n_net_PARSEPP_CISH_HPP_INCLUDED | ||
| > | 3 | |||
| > | 4 | #include "parsepp.hpp" | ||
| > | 5 | #include "parsepp_typelist.hpp" | ||
| > | 6 | #include "parsepp_err.hpp" | ||
| > | 7 | namespace Ps { | ||
| > | 8 | |||
| > | 9 | |||
| > | 10 | /** Parser for C++-style comments. */ | ||
| > | 11 | struct r_comment_cpp : | ||
| > | 12 | r_and< rule_list< | ||
| > | 13 | r_chseq< char_list<'/','/'> >, | ||
| > | 14 | r_star< r_notch<'\n'> >, | ||
| > | 15 | r_eol | ||
| > | 16 | > > | ||
| > | 17 | {}; | ||
| > | 18 | |||
| > | 19 | namespace Detail | ||
| > | 20 | { | ||
| > | 21 | /** Inner part of a C++ comment. Consumes until '*' followed by '/'. */ | ||
| > | 22 | struct r_comment_c_inner : r_and< rule_list< | ||
| > | 23 | r_star< r_and< rule_list< | ||
| > | 24 | r_notat< r_chseq< char_list<'*','/'> > >, | ||
| > | 25 | r_advance<1> | ||
| > | 26 | > > >, | ||
| > | 27 | r_error_if< r_eof, Errors::UnclosedComment > | ||
| > | 28 | > > | ||
| > | 29 | {}; | ||
| > | 30 | } | ||
| > | 31 | /** Parser for C-style comments. */ | ||
| > | 32 | struct r_comment_c : | ||
| > | 33 | r_and< rule_list< | ||
| > | 34 | r_chseq< char_list<'/','*'> >, | ||
| > | 35 | Detail::r_comment_c_inner, | ||
| > | 36 | r_chseq< char_list<'*','/'> > | ||
| > | 37 | > > | ||
| > | 38 | {}; | ||
| > | 39 | |||
| > | 40 | } // namespace | ||
| > | 41 | |||
| > | 42 | #endif // s11n_net_PARSEPP_CISH_HPP_INCLUDED | ||
Added parsepp_err.hpp
| Old () | New (c8dbcf42b9e25c15) | |||
|---|---|---|---|---|
| > | 1 | #ifndef s11n_net_PARSEPP_ERRORS_HPP_INCLUDED | ||
| > | 2 | #define s11n_net_PARSEPP_ERRORS_HPP_INCLUDED | ||
| > | 3 | /************************************************************************ | ||
| > | 4 | This file contains supplemental error-handling-related code for the | ||
| > | 5 | parsepp toolkit. | ||
| > | 6 | |||
| > | 7 | Author: Stephan Beal (http://wanderinghorse.net/home/stephan) | ||
| > | 8 | License: Public Domain | ||
| > | 9 | ************************************************************************/ | ||
| > | 10 | #include <map> | ||
| > | 11 | #include <string> | ||
| > | 12 | #include <cassert> | ||
| > | 13 | #include <iostream> | ||
| > | 14 | #include <sstream> | ||
| > | 15 | #include <stdexcept> | ||
| > | 16 | #include <vector> | ||
| > | 17 | #include <list> | ||
| > | 18 | #include <set> | ||
| > | 19 | |||
| > | 20 | #include "parsepp.hpp" | ||
| > | 21 | #include "parsepp_typelist.hpp" | ||
| > | 22 | |||
| > | 23 | namespace Ps { | ||
| > | 24 | /** | ||
| > | 25 | Consumes no input and throws an ExceptionType. | ||
| > | 26 | */ | ||
| > | 27 | template <typename ExceptionType> | ||
| > | 28 | struct r_throw_base | ||
| > | 29 | { | ||
| > | 30 | typedef r_throw_base base; | ||
| > | 31 | template <typename State> | ||
| > | 32 | static bool matches( parser_state &, State & ) | ||
| > | 33 | { | ||
| > | 34 | throw ExceptionType(); | ||
| > | 35 | } | ||
| > | 36 | }; | ||
| > | 37 | |||
| > | 38 | /** | ||
| > | 39 | Specialized to ensure the error point is marked. | ||
| > | 40 | */ | ||
| > | 41 | template <> | ||
| > | 42 | struct r_throw_base<parse_error> | ||
| > | 43 | { | ||
| > | 44 | typedef r_throw_base type; | ||
| > | 45 | template <typename State> | ||
| > | 46 | static bool matches( parser_state & in, State & ) | ||
| > | 47 | { | ||
| > | 48 | throw parse_error(in, "parse error triggered by r_throw" ); | ||
| > | 49 | return false; | ||
| > | 50 | } | ||
| > | 51 | }; | ||
| > | 52 | |||
| > | 53 | typedef r_throw_base<parse_error> r_throw; | ||
| > | 54 | |||
| > | 55 | |||
| > | 56 | /** | ||
| > | 57 | A list of "standard" error IDs, for use with the error_msg<> | ||
| > | 58 | template. Client-side parsers which use r_error, a_error, | ||
| > | 59 | r_error_if, r_error_unless and related functions may specialize | ||
| > | 60 | error_msg<int> with their own error number values. Clients | ||
| > | 61 | should not use values less than UserErrorBegin. The values need | ||
| > | 62 | to be unique within the context of an application (including | ||
| > | 63 | its libraries). Thus two parsers may use the same unique IDs if | ||
| > | 64 | there is no chance that they will end up in the same library. | ||
| > | 65 | */ | ||
| > | 66 | struct Errors | ||
| > | 67 | { | ||
| > | 68 | enum IDs { | ||
| > | 69 | Unknown = 0, | ||
| > | 70 | UnexpectedCharacter, | ||
| > | 71 | IllegalCharacter, | ||
| > | 72 | UnexpectedEOF, | ||
| > | 73 | UnclosedComment, | ||
| > | 74 | UserErrorBegin = 1000 /* client-side IDs should start here. */ | ||
| > | 75 | }; | ||
| > | 76 | }; | ||
| > | 77 | template <int ErrorNumber = Errors::Unknown> | ||
| > | 78 | struct error_msg | ||
| > | 79 | { | ||
| > | 80 | template <typename State> | ||
| > | 81 | static std::string message( parser_state &, State & ) | ||
| > | 82 | { | ||
| > | 83 | return "Unknown (or unspecified) parsing error"; | ||
| > | 84 | } | ||
| > | 85 | }; | ||
| > | 86 | |||
| > | 87 | /** | ||
| > | 88 | Specialization for UnexpectedCharacter errors. | ||
| > | 89 | */ | ||
| > | 90 | template <> | ||
| > | 91 | struct error_msg<Errors::UnexpectedCharacter> | ||
| > | 92 | { | ||
| > | 93 | template <typename State> | ||
| > | 94 | static std::string message( parser_state & ps, State & ) | ||
| > | 95 | { | ||
| > | 96 | std::string msg("Unexpected character '"); | ||
| > | 97 | if( ps.eof() ) msg.append("EOF"); | ||
| > | 98 | else msg.push_back(*ps.pos()); | ||
| > | 99 | msg.push_back('\''); | ||
| > | 100 | return msg; | ||
| > | 101 | } | ||
| > | 102 | }; | ||
| > | 103 | template <> | ||
| > | 104 | struct error_msg<Errors::UnexpectedEOF> | ||
| > | 105 | { | ||
| > | 106 | template <typename State> | ||
| > | 107 | static std::string message( parser_state & ps, State & ) | ||
| > | 108 | { | ||
| > | 109 | return "Unexpected end of input"; | ||
| > | 110 | } | ||
| > | 111 | }; | ||
| > | 112 | template <> | ||
| > | 113 | struct error_msg<Errors::UnclosedComment> | ||
| > | 114 | { | ||
| > | 115 | template <typename State> | ||
| > | 116 | static std::string message( parser_state & ps, State & ) | ||
| > | 117 | { | ||
| > | 118 | return "Reached EOF inside of a multi-line comment"; | ||
| > | 119 | } | ||
| > | 120 | }; | ||
| > | 121 | /** | ||
| > | 122 | Specialization for IllegalCharacter errors. | ||
| > | 123 | */ | ||
| > | 124 | template <> | ||
| > | 125 | struct error_msg<Errors::IllegalCharacter> | ||
| > | 126 | { | ||
| > | 127 | template <typename State> | ||
| > | 128 | static std::string message( parser_state & ps, State & ) | ||
| > | 129 | { | ||
| > | 130 | std::string msg("Illegal character '"); | ||
| > | 131 | msg.push_back(*ps.pos()); | ||
| > | 132 | msg.push_back('\''); | ||
| > | 133 | return msg; | ||
| > | 134 | } | ||
| > | 135 | }; | ||
| > | 136 | |||
| > | 137 | /** | ||
| > | 138 | Similar to r_throw, this rule throws a parse_error | ||
| > | 139 | exception. The what() text of the exception is the text of | ||
| > | 140 | error_msg<ErrorNumber>, allowing one to specialize error_msg to | ||
| > | 141 | create custom error messages. The where() part of the exception | ||
| > | 142 | |||
| > | 143 | Design note: another alternative to solve this problem would be | ||
| > | 144 | to use a static map<int,string>, but then we'd need to provide | ||
| > | 145 | .cpp files along with the .hpp files for this lib, and i don't | ||
| > | 146 | wanna do that. | ||
| > | 147 | */ | ||
| > | 148 | template <int ErrorNumber = Errors::Unknown> | ||
| > | 149 | struct r_error | ||
| > | 150 | { | ||
| > | 151 | typedef r_error type; | ||
| > | 152 | template <typename State> | ||
| > | 153 | static bool matches( parser_state & in, State & st ) | ||
| > | 154 | { | ||
| > | 155 | throw parse_error( in, error_msg<ErrorNumber>::message(in,st) ); | ||
| > | 156 | return false; | ||
| > | 157 | } | ||
| > | 158 | }; | ||
| > | 159 | |||
| > | 160 | /** | ||
| > | 161 | If Rule matches then the effect is the same as calling | ||
| > | 162 | r_error<ErrorNumber>::matches(), otherwise it returns | ||
| > | 163 | true. If Rule matches then the input interator is moved | ||
| > | 164 | back to the point it was at before the match, to allow | ||
| > | 165 | more accurate error reporting. | ||
| > | 166 | */ | ||
| > | 167 | template <typename Rule, int ErrorNumber = Errors::Unknown> | ||
| > | 168 | struct r_error_if | ||
| > | 169 | { | ||
| > | 170 | typedef r_error_if type; | ||
| > | 171 | template <typename State> | ||
| > | 172 | static bool matches( parser_state & in, State & st ) | ||
| > | 173 | { | ||
| > | 174 | parse_iterator pos(in.pos()); | ||
| > | 175 | if( Rule::matches(in,st) ) | ||
| > | 176 | { | ||
| > | 177 | in.pos(pos); | ||
| > | 178 | r_error<ErrorNumber>::matches(in,st); | ||
| > | 179 | } | ||
| > | 180 | return true; | ||
| > | 181 | } | ||
| > | 182 | }; | ||
| > | 183 | |||
| > | 184 | template <typename R> | ||
| > | 185 | struct r_throw_if : r_and< rule_list< | ||
| > | 186 | r_and< rule_list<R,r_throw> >, | ||
| > | 187 | r_success > > | ||
| > | 188 | {}; | ||
| > | 189 | |||
| > | 190 | |||
| > | 191 | |||
| > | 192 | /** | ||
| > | 193 | The evil twin of r_error_if, this Rule throws an error | ||
| > | 194 | if Rule does NOT match. | ||
| > | 195 | */ | ||
| > | 196 | template <typename Rule, int ErrorNumber = Errors::Unknown> | ||
| > | 197 | struct r_error_unless | ||
| > | 198 | { | ||
| > | 199 | typedef r_error_unless type; | ||
| > | 200 | template <typename State> | ||
| > | 201 | static bool matches( parser_state & in, State & st ) | ||
| > | 202 | { | ||
| > | 203 | parse_iterator pos(in.pos()); | ||
| > | 204 | if( ! Rule::matches(in,st) ) | ||
| > | 205 | { | ||
| > | 206 | in.pos(pos); | ||
| > | 207 | r_error<ErrorNumber>::matches(in,st); | ||
| > | 208 | } | ||
| > | 209 | return true; | ||
| > | 210 | } | ||
| > | 211 | }; | ||
| > | 212 | |||
| > | 213 | /** | ||
| > | 214 | This is an Action form of the r_error Rule, mainly for use with | ||
| > | 215 | r_ifelse and similar Rules. | ||
| > | 216 | */ | ||
| > | 217 | template <int ErrorNumber> | ||
| > | 218 | struct a_error | ||
| > | 219 | { | ||
| > | 220 | typedef a_error type; | ||
| > | 221 | template <typename State> | ||
| > | 222 | static void matched( parser_state & in, std::string const &, State & st ) | ||
| > | 223 | { | ||
| > | 224 | throw parse_error( in, error_msg<ErrorNumber>::message(in,st) ); | ||
| > | 225 | } | ||
| > | 226 | }; | ||
| > | 227 | |||
| > | 228 | } // namespace | ||
| > | 229 | |||
| > | 230 | #endif // s11n_net_PARSEPP_ERRORS_HPP_INCLUDED | ||
Changes to parsepp_url.hpp
| Old (23247e29208cbda2) | New (7be932681cc889c1) | |||
|---|---|---|---|---|
| 1 | #ifndef S11N_NET_PARSEPP_URL_H_INCLUDED | 1 | #ifndef S11N_NET_PARSEPP_URL_H_INCLUDED | |
| 2 | #define S11N_NET_PARSEPP_URL_H_INCLUDED 1 | 2 | #define S11N_NET_PARSEPP_URL_H_INCLUDED 1 | |
| 3 | 3 | |||
| 4 | #include "parsepp.hpp" | 4 | #include "parsepp.hpp" | |
| > | 5 | #include "parsepp_err.hpp" | ||
| 5 | #include <map> | 6 | #include <map> | |
| 6 | #include <string> | 7 | #include <string> | |
| 7 | #include <sstream> | 8 | #include <sstream> | |
| 8 | 9 | |||
| 9 | namespace Ps { | 10 | namespace Ps { | |
| 23 hidden lines | ||||
| 33 | such that %XX (where XX is a hexidecimal value of a character) get | 34 | such that %XX (where XX is a hexidecimal value of a character) get | |
| 34 | transformed into their ASCII value (note that results are undefined | 35 | transformed into their ASCII value (note that results are undefined | |
| 35 | here with characters >127). | 36 | here with characters >127). | |
| 36 | 37 | |||
| 37 | */ | 38 | */ | |
| 38 | namespace url { | | | 39 | namespace url { |
| | | 40 | enum ParseErrors { | ||
| | | 41 | ErrorStart = Ps::Errors::UserErrorBegin + 1, | ||
| | | 42 | ErrorNoPort, | ||
| | | 43 | ErrorInvalidHost | ||
| | | 44 | }; | ||
| | | 45 | } | ||
| | | 46 | |||
| | | 47 | template <> | ||
| | | 48 | struct error_msg<url::ErrorNoPort> | ||
| | | 49 | { | ||
| | | 50 | template <typename State> | ||
| | | 51 | static std::string message( parser_state &, State & ) | ||
| | | 52 | { | ||
| | | 53 | return "Port number missing after ':'"; | ||
| | | 54 | } | ||
| | | 55 | }; | ||
| | | 56 | |||
| | | 57 | template <> | ||
| | | 58 | struct error_msg<url::ErrorInvalidHost> | ||
| | | 59 | { | ||
| | | 60 | template <typename State> | ||
| | | 61 | static std::string message( parser_state &ps, State & ) | ||
| | | 62 | { | ||
| | | 63 | return "Invalid host name at: "+std::string(ps.begin().iter(),ps.maxpos().iter()); | ||
| | | 64 | } | ||
| | | 65 | }; | ||
| | | 66 | |||
| | | 67 | } // namespace Ps | ||
| | | 68 | |||
| | | 69 | namespace Ps { namespace url { | ||
| 39 | using namespace Ps; | 70 | using namespace Ps; | |
| 40 | #define RL rule_list | 71 | #define RL rule_list | |
| 41 | #define CL char_list | 72 | #define CL char_list | |
| 42 | /** | 73 | /** | |
| 43 | A holding buffer for parsing key-value pairs. | 74 | A holding buffer for parsing key-value pairs. | |
| 44 | */ | 75 | */ | |
| 45 | struct url_state | 76 | struct url_state | |
| 46 | { | 77 | { | |
| 47 | typedef std::map<std::string,std::string> kvpmap; | 78 | typedef std::map<std::string,std::string> kvpmap; | |
| 48 | kvpmap params; // key/value pairs for GET params | | | 79 | /** |
| 49 | kvpmap parts; // key/value pairs for various parts of URL | | | 80 | Key/value pairs for GET params. Parsed URLs will |
| 50 | std::string key; // buffer for the last-read key. | | | 81 | have their decoded GET arguments put here. |
| | | 82 | */ | ||
| | | 83 | kvpmap params; | ||
| | | 84 | /** | ||
| | | 85 | Key/value pairs for various parts of URL. The standard keys include: | ||
| | | 86 | |||
| | | 87 | - scheme (e.g. http, ftp, file) | ||
| | | 88 | - host (e.g. foo.com) | ||
| | | 89 | - path (e.g. /index.html) | ||
| | | 90 | - port (e.g. 8080) | ||
| | | 91 | - user (e.g. myloginname) | ||
| | | 92 | - password (unencrypted plain text which may not contain an '@' character) | ||
| | | 93 | |||
| | | 94 | Those keys match to the parts of this expression: | ||
| | | 95 | |||
| | | 96 | scheme://[user[:[password]]@host[:port][path] | ||
| | | 97 | |||
| | | 98 | Any GET parameters at the end a parsed URL will be put in | ||
| | | 99 | this->params. | ||
| | | 100 | |||
| | | 101 | URL parser implementations differ on how they handle the | ||
| | | 102 | leading slash of a URL path, and whether or not that slash is | ||
| | | 103 | included in the path. If i'm not mistaken, WWW standards say | ||
| | | 104 | it is *not* part of the path. Most implementations, if i'm | ||
| | | 105 | also not mistaken, treat a leading slash as part of the path | ||
| | | 106 | (this is also how, e.g. Apache, expects a GET path, | ||
| | | 107 | AFAIK). This implementation treats the leading slash as | ||
| | | 108 | significant and keeps it. | ||
| | | 109 | */ | ||
| | | 110 | kvpmap parts; | ||
| | | 111 | /** | ||
| | | 112 | This is a temporary buffer used by the parser. It holds the | ||
| | | 113 | most recently parsed GET parameter key. | ||
| | | 114 | */ | ||
| | | 115 | std::string key; | ||
| | | 116 | /** | ||
| | | 117 | Clears the state. | ||
| | | 118 | */ | ||
| 51 | void clear() | 119 | void clear() | |
| 52 | { | 120 | { | |
| 53 | this->params.clear(); | 121 | this->params.clear(); | |
| 54 | this->parts.clear(); | 122 | this->parts.clear(); | |
| 55 | this->key.clear(); | 123 | this->key.clear(); | |
| 56 | } | 124 | } | |
| 57 | }; | 125 | }; | |
| 58 | 126 | |||
| 59 | /** | | | 127 | namespace Detail { |
| 60 | Sets the url_state's current key value. | | | 128 | |
| 61 | */ | | | 129 | /** |
| 62 | struct a_kvp_set_key | | | 130 | Sets the url_state's current key value. |
| 63 | { | | | 131 | */ |
| 64 | static void matched( Ps::parser_state &, const std::string & m, url_state & s ) | | | 132 | struct a_kvp_set_key |
| 65 | { | | | 133 | { |
| 66 | //COUT << "set_key: " << m << '\n'; | | | 134 | static void matched( Ps::parser_state &, const std::string & m, url_state & s ) |
| 67 | s.params[s.key = m] = ""; // ensure an empty value in case the val field is empty. | | | 135 | { |
| 68 | } | | | 136 | //COUT << "set_key: " << m << '\n'; |
| 69 | }; | | | 137 | s.params[s.key = m] = ""; // ensure an empty value in case the val field is empty. |
| 70 | | | 138 | } | |
| 71 | | | 139 | }; | |
| 72 | /** | | | 140 | |
| 73 | Internal helper. If IsEsc then all matches are assumed to be | | | 141 | |
| 74 | %XX escape sequences, otherwise they are assumed to be unescaped | | | 142 | /** |
| 75 | text. | | | 143 | Internal helper. If IsEsc then all matches are assumed to be |
| 76 | */ | | | 144 | %XX escape sequences, otherwise they are assumed to be unescaped |
| 77 | template <bool IsEsc> | | | 145 | text. |
| 78 | struct a_esc_append | | | 146 | */ |
| 79 | { | | | 147 | template <bool IsEsc> |
| 80 | static void matched( Ps::parser_state &, const std::string & m, std::string & s ) | | | 148 | struct a_esc_append |
| | | 149 | { | ||
| | | 150 | static void matched( Ps::parser_state &, const std::string & m, std::string & s ) | ||
| | | 151 | { | ||
| | | 152 | //CERR << "a_esc_append<"<<IsEsc<<">:["<<m<<"]\n"; | ||
| | | 153 | if( IsEsc ) | ||
| | | 154 | { | ||
| | | 155 | std::istringstream is(m); | ||
| | | 156 | short x = '?'; | ||
| | | 157 | is >> std::hex >> x; // weird: reading directly into a char-type x misses the std::hex handling | ||
| | | 158 | s.push_back(std::string::value_type(x)); | ||
| | | 159 | } | ||
| | | 160 | else | ||
| | | 161 | { | ||
| | | 162 | s += m; | ||
| | | 163 | } | ||
| | | 164 | } | ||
| | | 165 | }; | ||
| | | 166 | |||
| | | 167 | |||
| | | 168 | /** | ||
| | | 169 | Action to set a value. Uses the last-read key as the lookup key. | ||
| | | 170 | |||
| | | 171 | The value is automatically un-percent-escaped (e.g. %20 is converted | ||
| | | 172 | to a space). | ||
| | | 173 | */ | ||
| | | 174 | struct a_kvp_set_val | ||
| 81 | { | 175 | { | |
| 82 | //CERR << "a_esc_append<"<<IsEsc<<">:["<<m<<"]\n"; | | | 176 | static void matched( Ps::parser_state &, const std::string & m, url_state & s ) |
| 83 | if( IsEsc ) | | | ||
| 84 | { | 177 | { | |
| 85 | std::istringstream is(m); | | | 178 | //COUT << "a_kvp_set_val: " << s.key << '='<<m << '\n'; |
| 86 | short x = '?'; | | | 179 | if( ! m.empty() ) |
| 87 | is >> std::hex >> x; // weird: reading directly into a char-type x misses the std::hex handling | | | 180 | { |
| 88 | s.push_back(std::string::value_type(x)); | | | 181 | std::string unesc; |
| | | 182 | typedef r_ch<'%'> PCT; | ||
| | | 183 | typedef r_repeat<r_xdigit,2> DIGITS; | ||
| | | 184 | typedef r_and<RL<PCT,DIGITS> > ESC; | ||
| | | 185 | typedef r_and<RL<PCT, | ||
| | | 186 | r_action< DIGITS, a_esc_append<true> > | ||
| | | 187 | > > AESC; | ||
| | | 188 | typedef r_action< r_plus< r_and<RL<r_notat<ESC>,r_advance<1> > > >, a_esc_append<false> > NESC; | ||
| | | 189 | typedef r_plus< r_or< RL< AESC, NESC > > > START; | ||
| | | 190 | Ps::parser_state ps(m); | ||
| | | 191 | Ps::parse<START>(ps, unesc); | ||
| | | 192 | s.params[s.key] = unesc; | ||
| | | 193 | } | ||
| | | 194 | else | ||
| | | 195 | { | ||
| | | 196 | s.params[s.key] = m; | ||
| | | 197 | } | ||
| | | 198 | s.key = ""; | ||
| 89 | } | 199 | } | |
| 90 | else | | | 200 | }; |
| | | 201 | |||
| | | 202 | /** | ||
| | | 203 | Matches on a key-name field (an Identifier-style string). On a match | ||
| | | 204 | It activated a_kvp_set_key. | ||
| | | 205 | */ | ||
| | | 206 | struct r_kvp_key | ||
| | | 207 | : r_action< r_identifier, a_kvp_set_key > | ||
| | | 208 | {}; | ||
| | | 209 | |||
| | | 210 | /** | ||
| | | 211 | Matches any char up to a '&'. Calls a_kvp_set_val on match. | ||
| | | 212 | */ | ||
| | | 213 | struct r_kvp_val | ||
| | | 214 | : r_action< r_star< r_notch< '&' > >, a_kvp_set_val > | ||
| | | 215 | {}; | ||
| | | 216 | |||
| | | 217 | /** Reads a single key/val pair. */ | ||
| | | 218 | struct r_kvp | ||
| | | 219 | : r_and< RL< | ||
| | | 220 | r_kvp_key, | ||
| | | 221 | r_ch<'='>, | ||
| | | 222 | r_opt<r_kvp_val> | ||
| | | 223 | > > | ||
| | | 224 | {}; | ||
| | | 225 | |||
| | | 226 | /** Reads a sequence of key/val pairs, delimited by '&' (http-style). */ | ||
| | | 227 | struct r_kvps | ||
| | | 228 | : r_and< RL< r_kvp, r_star< r_and< RL< r_ch<'&'>, r_opt<r_kvp> > > > > > | ||
| | | 229 | {}; | ||
| | | 230 | |||
| | | 231 | struct a_port | ||
| | | 232 | { | ||
| | | 233 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | ||
| 91 | { | 234 | { | |
| 92 | s += m; | | | 235 | //COUT << "port=" << v <<'\n'; |
| | | 236 | s.parts["port"] = v; | ||
| 93 | } | 237 | } | |
| 94 | } | | | 238 | }; |
| 95 | }; | | | 239 | |
| 96 | | | 240 | struct r_port | |
| 97 | | | 241 | : r_and< RL< r_ch<':'>, | |
| 98 | /** | | | 242 | r_error_unless< r_action< r_plus<r_digit>, a_port >, ErrorNoPort > |
| 99 | Action to set a value. Uses the last-read key as the lookup key. | | | 243 | > > |
| 100 | | | 244 | {}; | |
| 101 | The value is automatically un-percent-escaped (e.g. %20 is converted | | | 245 | |
| 102 | to a space). | | | 246 | struct a_host |
| 103 | */ | | | ||
| 104 | struct a_kvp_set_val | | | ||
| 105 | { | | | ||
| 106 | static void matched( Ps::parser_state &, const std::string & m, url_state & s ) | | | ||
| 107 | { | | | ||
| 108 | //COUT << "a_kvp_set_val: " << s.key << '='<<m << '\n'; | | | ||
| 109 | if( ! m.empty() ) | | | ||
| 110 | { | | | ||
| 111 | std::string unesc; | | | ||
| 112 | typedef r_ch<'%'> PCT; | | | ||
| 113 | typedef r_repeat<r_xdigit,2> DIGITS; | | | ||
| 114 | typedef r_and<RL<PCT,DIGITS> > ESC; | | | ||
| 115 | typedef r_and<RL<PCT, | | | ||
| 116 | r_action< DIGITS, a_esc_append<true> > | | | ||
| 117 | > > AESC; | | | ||
| 118 | typedef r_action< r_plus< r_and<RL<r_notat<ESC>,r_advance<1> > > >, a_esc_append<false> > NESC; | | | ||
| 119 | typedef r_plus< r_or< RL< AESC, NESC > > > START; | | | ||
| 120 | Ps::parser_state ps(m); | | | ||
| 121 | Ps::parse<START>(ps, unesc); | | | ||
| 122 | s.params[s.key] = unesc; | | | ||
| 123 | } | | | ||
| 124 | else | | | ||
| 125 | { | | | ||
| 126 | s.params[s.key] = m; | | | ||
| 127 | } | | | ||
| 128 | s.key = ""; | | | ||
| 129 | } | | | ||
| 130 | }; | | | ||
| 131 | | | |||
| 132 | /** | | | ||
| 133 | Matches on a key-name field (an Identifier-style string). On a match | | | ||
| 134 | It activated a_kvp_set_key. | | | ||
| 135 | */ | | | ||
| 136 | struct r_kvp_key | | | ||
| 137 | : r_action< r_identifier, a_kvp_set_key > | | | ||
| 138 | {}; | | | ||
| 139 | | | |||
| 140 | /** | | | ||
| 141 | Matches any char up to a '&'. Calls a_kvp_set_val on match. | | | ||
| 142 | */ | | | ||
| 143 | struct r_kvp_val | | | ||
| 144 | : r_action< r_star< r_notch< '&' > >, a_kvp_set_val > | | | ||
| 145 | {}; | | | ||
| 146 | | | |||
| 147 | /** Reads a single key/val pair. */ | | | ||
| 148 | struct r_kvp | | | ||
| 149 | : r_and< RL< | | | ||
| 150 | r_kvp_key, | | | ||
| 151 | r_ch<'='>, | | | ||
| 152 | r_opt<r_kvp_val> | | | ||
| 153 | > > | | | ||
| 154 | {}; | | | ||
| 155 | | | |||
| 156 | /** Reads a sequence of key/val pairs, delimited by '&' (http-style). */ | | | ||
| 157 | struct r_kvps | | | ||
| 158 | : r_and< RL< r_kvp, r_star< r_and< RL< r_ch<'&'>, r_opt<r_kvp> > > > > > | | | ||
| 159 | {}; | | | ||
| 160 | | | |||
| 161 | struct a_port | | | ||
| 162 | { | | | ||
| 163 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | | | ||
| 164 | { | 247 | { | |
| 165 | //COUT << "port=" << v <<'\n'; | | | 248 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) |
| 166 | s.parts["port"] = v; | | | 249 | { |
| 167 | } | | | 250 | //COUT << "host=" << v <<'\n'; |
| 168 | }; | | | 251 | s.parts["host"] = v; |
| 169 | | | 252 | } | |
| 170 | struct r_port | | | 253 | }; |
| 171 | : r_and< RL< r_ch<':'>, r_action< r_plus<r_digit>, a_port > > > | | | 254 | struct r_hostname |
| 172 | {}; | | | 255 | : r_plus< r_and< RL< r_plus< |
| 173 | | | |||
| 174 | struct a_host | | | ||
| 175 | { | | | ||
| 176 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | | | ||
| 177 | { | | | ||
| 178 | //COUT << "host=" << v <<'\n'; | | | ||
| 179 | s.parts["host"] = v; | | | ||
| 180 | } | | | ||
| 181 | }; | | | ||
| 182 | | | |||
| 183 | struct r_hostname | | | ||
| 184 | : r_plus< r_and< RL< r_plus< | | | ||
| 185 | r_and< RL< r_or< RL< r_alnum, r_ch<'-'>, r_ch<'_'> > >, r_opt< r_ch<'.'> > > > | 256 | r_and< RL< r_or< RL< r_alnum, r_ch<'-'>, r_ch<'_'> > >, r_opt< r_ch<'.'> > > > | |
| 186 | > > > > | 257 | > > > > | |
| 187 | /* FIXME: matches with a trailing dot (e.g. foo.com.) */ | | | 258 | /* FIXME: matches with a trailing dot (e.g. foo.com.) */ |
| 188 | /* FIXME: matches many bogus hostnames, like "----" or "foo.c-" */ | | | 259 | /* FIXME: matches many bogus hostnames, like "----" or "foo.c-" */ |
| 189 | {}; | | | 260 | {}; |
| 190 | 261 | |||
| 191 | struct r_host | | | 262 | struct r_host |
| 192 | : r_action< r_hostname, a_host > | | | 263 | : r_action< r_hostname, a_host > |
| 193 | {}; | | | 264 | {}; |
| 194 | 265 | |||
| 195 | struct a_scheme | | | 266 | struct a_scheme |
| 196 | { | | | 267 | { |
| 197 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | | | 268 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) |
| | | 269 | { | ||
| | | 270 | std::string val = v.substr( 0, v.find(':') ); | ||
| | | 271 | //COUT << "scheme=" << val <<'\n'; | ||
| | | 272 | s.parts["scheme"] = val; | ||
| | | 273 | } | ||
| | | 274 | }; | ||
| | | 275 | |||
| | | 276 | struct r_scheme | ||
| | | 277 | : r_and< RL< r_action< r_identifier, a_scheme >, r_ch<':'>, r_plus< r_ch<'/'> > > > | ||
| | | 278 | {}; | ||
| | | 279 | |||
| | | 280 | struct a_path | ||
| 198 | { | 281 | { | |
| 199 | std::string val = v.substr( 0, v.find(':') ); | | | 282 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) |
| 200 | //COUT << "scheme=" << val <<'\n'; | | | 283 | { |
| 201 | s.parts["scheme"] = val; | | | 284 | //COUT << "path=" << v <<'\n'; |
| 202 | } | | | 285 | s.parts["path"] = v; |
| 203 | }; | | | 286 | } |
| | | 287 | }; | ||
| 204 | 288 | |||
| 205 | struct r_scheme | | | 289 | struct r_params |
| 206 | : r_and< RL< r_action< r_identifier, a_scheme >, r_ch<':'>, r_plus< r_ch<'/'> > > > | | | 290 | : r_and< RL< r_ch<'?'>, r_kvps > > |
| 207 | {}; | | | 291 | {}; |
| 208 | 292 | |||
| 209 | struct a_path | | | 293 | struct r_path |
| 210 | { | | | 294 | : r_action< r_star< r_notch<'?'> >, a_path > |
| 211 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | | | 295 | {}; |
| | | 296 | |||
| | | 297 | struct a_username | ||
| 212 | { | 298 | { | |
| 213 | //COUT << "path=" << v <<'\n'; | | | 299 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) |
| 214 | s.parts["path"] = v; | | | 300 | { |
| 215 | } | | | 301 | //COUT << "user=" << v <<'\n'; |
| 216 | }; | | | 302 | s.parts["user"] = v; |
| | | 303 | } | ||
| | | 304 | }; | ||
| 217 | 305 | |||
| 218 | struct r_params | | | 306 | struct r_user |
| 219 | : r_and< RL< r_ch<'?'>, r_kvps > > | | | 307 | : r_plus<r_identifier> |
| 220 | {}; | | | 308 | {}; |
| 221 | 309 | |||
| 222 | struct r_path | | | 310 | struct a_password |
| 223 | : r_action< r_star< r_notch<'?'> >, a_path > | | | ||
| 224 | {}; | | | ||
| 225 | | | |||
| 226 | struct a_username | | | ||
| 227 | { | | | ||
| 228 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | | | ||
| 229 | { | 311 | { | |
| 230 | //COUT << "user=" << v <<'\n'; | | | 312 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) |
| 231 | s.parts["user"] = v; | | | 313 | { |
| 232 | } | | | 314 | //COUT << "password=" << v <<'\n'; |
| 233 | }; | | | 315 | s.parts["password"] = v; |
| | | 316 | } | ||
| | | 317 | }; | ||
| 234 | 318 | |||
| 235 | struct r_user | | | 319 | struct r_password |
| 236 | : r_plus<r_alnum> | | | 320 | : r_action< r_plus< r_notch<'@'> >, a_password > |
| 237 | {}; | | | 321 | {}; |
| 238 | 322 | |||
| 239 | struct a_password | | | 323 | struct a_login |
| 240 | { | | | ||
| 241 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | | | ||
| 242 | { | 324 | { | |
| 243 | //COUT << "password=" << v <<'\n'; | | | 325 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) |
| 244 | s.parts["password"] = v; | | | ||
| 245 | } | | | ||
| 246 | }; | | | ||
| 247 | | | |||
| 248 | struct r_password | | | ||
| 249 | : r_action< r_plus< r_notch<'@'> >, a_password > | | | ||
| 250 | {}; | | | ||
| 251 | | | |||
| 252 | struct a_login | | | ||
| 253 | { | | | ||
| 254 | static void matched( Ps::parser_state &, const std::string & v, url_state & s ) | | | ||
| 255 | { | | | ||
| 256 | std::string val = v; | | | ||
| 257 | if( val.empty() ) return; // can't happen? | | | ||
| 258 | // chop trailing '@' (work around unfortunate parser design aspect) | | | ||
| 259 | if( '@' == val[val.size()-1] ) val.resize(val.size()-1); | | | ||
| 260 | std::string::size_type pos = val.find(':'); | | | ||
| 261 | | | |||
| 262 | if( pos != std::string::npos ) | | | ||
| 263 | { | 326 | { | |
| 264 | s.parts["user"] = val.substr( 0, pos ); | | | 327 | std::string val = v; |
| 265 | s.parts["password"] = val.substr( pos+1 ); | | | 328 | if( val.empty() ) return; // can't happen? |
| 266 | //COUT << "user=" << s.parts["user"] <<'\n'; | | | 329 | // chop trailing '@' (work around unfortunate parser design aspect) |
| 267 | //COUT << "password=" << s.parts["password"] <<'\n'; | | | 330 | if( '@' == val[val.size()-1] ) val.resize(val.size()-1); |
| | | 331 | std::string::size_type pos = val.find(':'); | ||
| | | 332 | if( pos != std::string::npos ) | ||
| | | 333 | { | ||
| | | 334 | s.parts["user"] = val.substr( 0, pos ); | ||
| | | 335 | s.parts["password"] = val.substr( pos+1 ); | ||
| | | 336 | //COUT << "user=" << s.parts["user"] <<'\n'; | ||
| | | 337 | //COUT << "password=" << s.parts["password"] <<'\n'; | ||
| | | 338 | } | ||
| | | 339 | else | ||
| | | 340 | { | ||
| | | 341 | s.parts["user"] = val; | ||
| | | 342 | //COUT << "user=" << val <<'\n'; | ||
| | | 343 | } | ||
| 268 | } | 344 | } | |
| 269 | else | | | 345 | }; |
| 270 | { | | | 346 | |
| 271 | s.parts["user"] = val; | | | 347 | /** |
| 272 | //COUT << "user=" << val <<'\n'; | | | 348 | r_alnum+(:[^@]))?@ |
| 273 | } | | | 349 | */ |
| 274 | } | | | 350 | struct r_login |
| 275 | }; | | | 351 | : r_action< |
| 276 | | | 352 | r_and< RL< | |
| 277 | /** | | | 353 | r_user, |
| 278 | r_alnum+(:[^@]))?@ | | | 354 | r_opt< r_and<RL<r_ch<':'>,r_opt<r_password> > > >, |
| 279 | */ | | | 355 | r_ch<'@'> |
| 280 | struct r_login | | | 356 | > >, |
| 281 | : r_action< | | | 357 | a_login> |
| 282 | r_and< RL< | | | 358 | {}; |
| 283 | r_user, | | | 359 | |
| 284 | r_opt< r_and<RL<r_ch<':'>,r_opt<r_password> > > >, | | | 360 | struct r_filepath |
| 285 | r_ch<'@'> | | | 361 | : r_plus< r_any > |
| 286 | > >, | | | 362 | {}; |
| 287 | a_login> | | | 363 | } // namespace Detail |
| 288 | {}; | | | ||
| 289 | | | |||
| 290 | struct r_filepath | | | ||
| 291 | : r_plus< r_any > | | | ||
| 292 | {}; | | | ||
| 293 | 364 | |||
| 294 | /** | 365 | /** | |
| 295 | File URLs need special handling, due to the varying conventions regarding | 366 | File URLs need special handling, due to the varying conventions regarding | |
| 296 | the number of leading slashes. e.g. file:/etc/hosts vs file:///etc/hosts, | 367 | the number of leading slashes. e.g. file:/etc/hosts vs file:///etc/hosts, | |
| 297 | the second is correct but many implementations allow the first form. | 368 | the second is correct but many implementations allow the first form. | |
| 298 | < | |||
| 299 | seq< action< seq< string<'f','i','l','e'>, one<':'> >, a_scheme >, | < | ||
| 300 | seq< opt< string<'/','/'> >, action< r_filepath, a_path > > > | < | ||
| 301 | < | |||
| 302 | */ | 369 | */ | |
| 303 | struct r_fileurl : | 370 | struct r_fileurl : | |
| 304 | r_and< RL< | 371 | r_and< RL< | |
| 305 | r_action< r_and< RL< | | | 372 | r_action< r_and< RL< |
| 306 | r_chseq<CL<'f','i','l','e'> >, | | | 373 | r_chseq<CL<'f','i','l','e'> >, |
| 307 | r_ch<':'> | | | 374 | r_ch<':'> |
| 308 | > >, a_scheme >, | | | 375 | > >, Detail::a_scheme >, |
| 309 | r_notat< r_ch<':'> >, | | | 376 | r_error_if< r_ch<':'>, Ps::Errors::IllegalCharacter >, |
| 310 | r_and<RL< r_opt< r_repeat< r_ch<'/'>,2> >, | | | 377 | r_opt< r_repeat< r_ch<'/'>,2> >, |
| 311 | r_action< r_filepath, a_path > | | | 378 | r_action< Detail::r_filepath, Detail::a_path > |
| 312 | > > | 379 | > > | |
| > | 380 | {}; | ||
| > | 381 | |||
| > | 382 | /** | ||
| > | 383 | Parses a more or less standard URL. | ||
| > | 384 | */ | ||
| > | 385 | struct r_stdurl | ||
| > | 386 | : r_and< RL< Detail::r_scheme, | ||
| > | 387 | r_opt<Detail::r_login>, | ||
| > | 388 | r_error_unless< Detail::r_host, ErrorInvalidHost >, | ||
| > | 389 | r_opt<Detail::r_port>, | ||
| > | 390 | r_opt<Detail::r_path>, | ||
| > | 391 | r_opt<Detail::r_params> | ||
| 313 | > > | 392 | > > | |
| 314 | {}; | 393 | {}; | |
| 315 | 394 | |||
| 316 | struct r_stdurl | | | 395 | /** |
| 317 | : r_and< RL< r_scheme, | | | 396 | A rule for parsing file:// and other:// URLs. |
| 318 | r_opt<r_login>, | | | 397 | Use a url_state object as the parse state: |
| 319 | r_host, | | | 398 | |
| 320 | r_opt< r_and< RL< r_opt<r_port>, r_opt<r_path>, r_opt<r_params> > > > | | | 399 | \code |
| 321 | > > | | | 400 | url_state st; |
| 322 | {}; | | | 401 | if( Ps::parse<Ps::url::r_url>( "http://s11n.net", st ) ) { ... } |
| | | 402 | \endcode | ||
| | | 403 | |||
| | | 404 | Be aware that some types of syntax errors cause exceptions to | ||
| | | 405 | be thrown. We only throw errors when: | ||
| | | 406 | |||
| | | 407 | - we can easily write the rules handling to do so. | ||
| | | 408 | - we can provide an exact error location and halfway meaningful message. | ||
| 323 | 409 | |||
| > | 410 | If you don't want exceptions then use parse_url() instead, which | ||
| > | 411 | swallows exceptions. | ||
| > | 412 | */ | ||
| 324 | struct r_url | 413 | struct r_url | |
| 325 | : r_or< RL< r_fileurl, r_stdurl > > | 414 | : r_or< RL< r_fileurl, r_stdurl > > | |
| 326 | {}; | 415 | {}; | |
| 327 | 416 | |||
| 328 | #undef RL | 417 | #undef RL | |
| 329 | #undef CL | 418 | #undef CL | |
| 330 | 419 | |||
| > | 420 | |||
| 331 | /** | 421 | /** | |
| 332 | Convenience function to parse the given URL and stuff the data | 422 | Convenience function to parse the given URL and stuff the data | |
| 333 | into tgt. On success, tgt contains valid URL data. On error, | 423 | into tgt. On success, tgt contains valid URL data. On error, | |
| 334 | tgt is not modified. | 424 | tgt is not modified. | |
| > | 425 | |||
| > | 426 | This function silently swallows all exceptions and translates them | ||
| > | 427 | to a false return value. If you want more precise information about | ||
| > | 428 | certain types of parse errors, use Ps::parse<r_url>(src,tgt) instead, | ||
| > | 429 | as it will propagate the exception back to you. | ||
| 335 | */ | 430 | */ | |
| 336 | inline bool parse_url( std::string const & src, url_state & tgt ) | 431 | inline bool parse_url( std::string const & src, url_state & tgt ) | |
| 337 | { | 432 | { | |
| 338 | url_state tmp; | | | 433 | try |
| 339 | return Ps::parse< r_url >( src, tmp ) | | | 434 | { |
| 340 | ? ((tgt=tmp),true) | | | 435 | url_state tmp; |
| 341 | : false; | | | 436 | return Ps::parse< r_url >( src, tmp ) |
| | | 437 | ? ((tgt=tmp),true) | ||
| | | 438 | : false; | ||
| | | 439 | } | ||
| | | 440 | catch(...) | ||
| | | 441 | { | ||
| | | 442 | return false; | ||
| | | 443 | } | ||
| 342 | } | 444 | } | |
| 343 | 445 | |||
| 344 | }} // namespaces | 446 | }} // namespaces | |
| 345 | 447 | |||
| 346 | #endif // S11N_NET_PARSEPP_URL_H_INCLUDED | 448 | #endif // S11N_NET_PARSEPP_URL_H_INCLUDED | |
Changes to url.cpp
| Old (0632db388b79a9f9) | New (c056d13716177953) | |||
|---|---|---|---|---|
| 1 | #include "parsepp.hpp" | 1 | #include "parsepp.hpp" | |
| 2 | 2 | |||
| 3 | #define CERR std::cerr << __FILE__ << ":" << std::dec << __LINE__ << " : " | 3 | #define CERR std::cerr << __FILE__ << ":" << std::dec << __LINE__ << " : " | |
| 4 | #define COUT std::cout << __FILE__ << ":" << std::dec << __LINE__ << " : " | 4 | #define COUT std::cout << __FILE__ << ":" << std::dec << __LINE__ << " : " | |
| 5 | #define DOUT if(0) COUT | 5 | #define DOUT if(0) COUT | |
| 14 hidden lines | ||||
| 20 | { | 20 | { | |
| 21 | for ( int arg = 0; arg < argc; ++arg ) { | 21 | for ( int arg = 0; arg < argc; ++arg ) { | |
| 22 | if( (argc>1) && (0==arg) ) continue; | 22 | if( (argc>1) && (0==arg) ) continue; | |
| 23 | line = (argc>1) ? argv[ arg ] : scr.c_str(); | 23 | line = (argc>1) ? argv[ arg ] : scr.c_str(); | |
| 24 | COUT << "Trying: " << line << '\n'; | 24 | COUT << "Trying: " << line << '\n'; | |
| 25 | if ( Ps::url::parse_url( line, state ) ) { | | | 25 | if ( parse<Ps::url::r_url>( line, state ) ) { |
| 26 | COUT << "Parsed OK: " << line << '\n'; | 26 | COUT << "Parsed OK: " << line << '\n'; | |
| 27 | } | 27 | } | |
| 28 | else { | 28 | else { | |
| 29 | COUT << "Parsed FAILED: " << line << '\n'; | 29 | COUT << "Parsed FAILED: " << line << '\n'; | |
| 30 | return 1; | 30 | return 1; | |
| 21 hidden lines | ||||
| 52 | CERR << "Parse exception:\n" << ex.what() << '\n'; | 52 | CERR << "Parse exception:\n" << ex.what() << '\n'; | |
| 53 | return 1; | 53 | return 1; | |
| 54 | } | 54 | } | |
| 55 | return 0; | 55 | return 0; | |
| 56 | } | 56 | } | |