Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| SHA1 Hash: | 7cffcb3d6a6feb3fdb1662d251b2cc2f56ea2932 |
|---|---|
| Date: | 2008-09-18 16:08:32 |
| User: | stephan |
| Comment: | added experimental r_until and r_error |
Tags And Properties
- branch=trunk inherited from [d45e7467f2]
- sym-trunk inherited from [d45e7467f2]
Changes
Changes to parsepp.hpp
| Old (b7440c804d4244fc) | New (11715259fcc08ebe) | |||
|---|---|---|---|---|
| 1 | #ifndef s11n_net_PARSEPP_HPP_INCLUDED | 1 | #ifndef s11n_net_PARSEPP_HPP_INCLUDED | |
| 2 | #define s11n_net_PARSEPP_HPP_INCLUDED | 2 | #define s11n_net_PARSEPP_HPP_INCLUDED | |
| 3 | 3 | |||
| 4 | #include <map> | 4 | #include <map> | |
| 5 | #include <string> | 5 | #include <string> | |
| 885 hidden lines | ||||
| 891 | parser_state ps(in); | 891 | parser_state ps(in); | |
| 892 | return parse<Rule>( ps, st ); | 892 | return parse<Rule>( ps, st ); | |
| 893 | } | 893 | } | |
| 894 | catch( parse_error const & ex ) | 894 | catch( parse_error const & ex ) | |
| 895 | { | 895 | { | |
| 896 | // recreate exception without the state, | | | 896 | // Recreate exception without the state, |
| 897 | // which won't be valid once this function returns. | 897 | // which won't be valid once this function returns. | |
| 898 | std::string msg( ex.what() ); | 898 | std::string msg( ex.what() ); | |
| 899 | msg += "\n Near "+ex.where(); | | | 899 | msg += "\nNear "+ex.where(); |
| 900 | throw std::runtime_error( msg ); | 900 | throw std::runtime_error( msg ); | |
| 901 | } | 901 | } | |
| 902 | return false; | 902 | return false; | |
| 903 | } | 903 | } | |
| 904 | 904 | |||
| 3 hidden lines | ||||
| 908 | immutability. | 908 | immutability. | |
| 909 | */ | 909 | */ | |
| 910 | template <typename Rule, typename ClientState> | 910 | template <typename Rule, typename ClientState> | |
| 911 | bool parse( std::string const & in, ClientState & st) | 911 | bool parse( std::string const & in, ClientState & st) | |
| 912 | { | 912 | { | |
| 913 | parse_iterator it(in); | | | 913 | return parse<Rule>( parse_iterator(in), st ); |
| 914 | return parse<Rule>( it, st ); | | | ||
| 915 | } | 914 | } | |
| 916 | 915 | |||
| 917 | /** | 916 | /** | |
| 918 | A non-consuming parser which returns Val. | 917 | A non-consuming parser which returns Val. | |
| 919 | */ | 918 | */ | |
| 254 hidden lines | ||||
| 1174 | Matches on eof or if Rule matches. It always returns true, but | 1173 | Matches on eof or if Rule matches. It always returns true, but | |
| 1175 | only advances the input if Rule consumes and we're not at | 1174 | only advances the input if Rule consumes and we're not at | |
| 1176 | eof. Note that actions triggered as part of Rule cannot be | 1175 | eof. Note that actions triggered as part of Rule cannot be | |
| 1177 | un-done if the rule later fails to match and the input is | 1176 | un-done if the rule later fails to match and the input is | |
| 1178 | rewound. | 1177 | rewound. | |
| 1179 | < | |||
| 1180 | Note that a break_exception thrown from Rule | < | ||
| 1181 | will propagate out of this class, which means | < | ||
| 1182 | that a Break changes the meaning of "optional" | < | ||
| 1183 | (because "break" trumps "optional"). | < | ||
| 1184 | */ | 1178 | */ | |
| 1185 | template <typename Rule> | 1179 | template <typename Rule> | |
| 1186 | struct r_opt | 1180 | struct r_opt | |
| 1187 | { | 1181 | { | |
| 1188 | typedef r_opt type; | 1182 | typedef r_opt type; | |
| 60 hidden lines | ||||
| 1249 | }; | 1243 | }; | |
| 1250 | 1244 | |||
| 1251 | /** | 1245 | /** | |
| 1252 | Matches Rule at least Min times and at most Max times. If it | 1246 | Matches Rule at least Min times and at most Max times. If it | |
| 1253 | does not match it does not consume, but if forward parsing | 1247 | does not match it does not consume, but if forward parsing | |
| 1254 | caused Actions to be triggered then they are not un-done by | | | 1248 | causes Actions to be triggered then they are not un-done by |
| 1255 | rewinding. | 1249 | rewinding. | |
| 1256 | 1250 | |||
| 1257 | Note that once Max is reached, checking stops. That means | 1251 | Note that once Max is reached, checking stops. That means | |
| 1258 | that Max is not a hard-limit - there may be more than Max | 1252 | that Max is not a hard-limit - there may be more than Max | |
| 1259 | matches in the input. | 1253 | matches in the input. | |
| 92 hidden lines | ||||
| 1352 | }; | 1346 | }; | |
| 1353 | 1347 | |||
| 1354 | typedef r_throw_base<parse_error> r_throw; | 1348 | typedef r_throw_base<parse_error> r_throw; | |
| 1355 | 1349 | |||
| 1356 | 1350 | |||
| > | 1351 | /** | ||
| > | 1352 | A list of "standard" error IDs, for use with the error_msg<> | ||
| > | 1353 | template. | ||
| > | 1354 | */ | ||
| > | 1355 | enum StandardErrorIDs { | ||
| > | 1356 | UnknownError = 0, | ||
| > | 1357 | UnexpectedCharacter = 1, | ||
| > | 1358 | UserIDs = 1000 /* client-side IDs should start here. */ | ||
| > | 1359 | }; | ||
| > | 1360 | template <int ErrorNumber = UnknownError> | ||
| > | 1361 | struct error_msg | ||
| > | 1362 | { | ||
| > | 1363 | template <typename State> | ||
| > | 1364 | static std::string message( parser_state &, State & ) | ||
| > | 1365 | { | ||
| > | 1366 | return "Unknown/unspecified parsing error"; | ||
| > | 1367 | } | ||
| > | 1368 | }; | ||
| > | 1369 | |||
| > | 1370 | |||
| > | 1371 | /** | ||
| > | 1372 | Specialization for UnexpectedCharacter errors. | ||
| > | 1373 | */ | ||
| > | 1374 | template <> | ||
| > | 1375 | struct error_msg<UnexpectedCharacter> | ||
| > | 1376 | { | ||
| > | 1377 | template <typename State> | ||
| > | 1378 | static std::string message( parser_state & ps, State & ) | ||
| > | 1379 | { | ||
| > | 1380 | std::string msg("Unexpected character '"); | ||
| > | 1381 | msg.push_back(*ps.pos()); | ||
| > | 1382 | msg.push_back('\''); | ||
| > | 1383 | return msg; | ||
| > | 1384 | } | ||
| > | 1385 | }; | ||
| > | 1386 | |||
| > | 1387 | /** | ||
| > | 1388 | Similar to r_throw, this rule throws a parse_error | ||
| > | 1389 | exception. The what() text of the exception is the text of | ||
| > | 1390 | error_msg<ErrorNumber>, allowing one to specialize error_msg to | ||
| > | 1391 | create custom error messages. The where() part of the exception | ||
| > | 1392 | |||
| > | 1393 | |||
| > | 1394 | Design note: another alternative to solve this problem would be | ||
| > | 1395 | to use a static map<int,string>, but then we'd need to provide | ||
| > | 1396 | .cpp files along with the .hpp files for this lib, and i don't | ||
| > | 1397 | wanna do that. | ||
| > | 1398 | */ | ||
| > | 1399 | template <int ErrorNumber> | ||
| > | 1400 | struct r_error | ||
| > | 1401 | { | ||
| > | 1402 | typedef r_error type; | ||
| > | 1403 | template <typename State> | ||
| > | 1404 | static bool matches( parser_state & in, State & st ) | ||
| > | 1405 | { | ||
| > | 1406 | throw parse_error( in, error_msg<ErrorNumber>::message(in,st) ); | ||
| > | 1407 | return false; | ||
| > | 1408 | } | ||
| > | 1409 | }; | ||
| > | 1410 | |||
| 1357 | 1411 | |||
| 1358 | /** | 1412 | /** | |
| 1359 | Matches any characters in the range [Min..Max] | 1413 | Matches any characters in the range [Min..Max] | |
| 1360 | */ | 1414 | */ | |
| 1361 | template< int Min, int Max > | 1415 | template< int Min, int Max > | |
| 13 hidden lines | ||||
| 1375 | } | 1429 | } | |
| 1376 | } | 1430 | } | |
| 1377 | return false; | 1431 | return false; | |
| 1378 | } | 1432 | } | |
| 1379 | }; | 1433 | }; | |
| > | 1434 | |||
| > | 1435 | /** | ||
| > | 1436 | EXPERIMENTAL! | ||
| > | 1437 | |||
| > | 1438 | Matches all input up to the point where Rule::matches() | ||
| > | 1439 | returns true. Each time Rule does not match, the input | ||
| > | 1440 | iterator is bumped up by one and we try again. | ||
| > | 1441 | |||
| > | 1442 | This rule returns true only if Rule is ever matched, but it may | ||
| > | 1443 | or may not consume input. If Rule matches immediately, no input | ||
| > | 1444 | is consumed, otherwise input is consumed up to the point where | ||
| > | 1445 | Rule will match. Thus when this rule finishes, either Rule | ||
| > | 1446 | *will* match the next input or we are at eof. | ||
| > | 1447 | |||
| > | 1448 | Caveats: | ||
| > | 1449 | |||
| > | 1450 | [In theory] the given Rule should not normally call actions | ||
| > | 1451 | because it would then be easy to accidentally trigger the | ||
| > | 1452 | action twice in down-stream parse rules. So... | ||
| > | 1453 | |||
| > | 1454 | Instead of this: | ||
| > | 1455 | |||
| > | 1456 | r_until< r_action<MyRule, MyAction > > | ||
| > | 1457 | |||
| > | 1458 | Use: | ||
| > | 1459 | |||
| > | 1460 | r_action< r_until<MyRule>, MyAction > | ||
| > | 1461 | |||
| > | 1462 | Additionally: | ||
| > | 1463 | |||
| > | 1464 | - Rule should not be a rule which doesn't consume (e.g. r_at). | ||
| > | 1465 | |||
| > | 1466 | - DO NOT use r_eof (or equivalent) as Rule - it won't behave as | ||
| > | 1467 | expected because this routine also has to do its own eof checks | ||
| > | 1468 | and we run into an ambiguity. | ||
| > | 1469 | */ | ||
| > | 1470 | template <typename Rule> | ||
| > | 1471 | struct r_until | ||
| > | 1472 | { | ||
| > | 1473 | typedef r_until type; | ||
| > | 1474 | template <typename ClientState> | ||
| > | 1475 | inline static bool matches( parser_state & in, ClientState & st ) | ||
| > | 1476 | { | ||
| > | 1477 | parse_iterator thepos(in.pos()); | ||
| > | 1478 | parse_iterator begin(thepos); | ||
| > | 1479 | bool gotMatch = Rule::matches( in, st ); | ||
| > | 1480 | if( gotMatch ) | ||
| > | 1481 | { | ||
| > | 1482 | in.pos(thepos); | ||
| > | 1483 | return true; | ||
| > | 1484 | } | ||
| > | 1485 | while( ! gotMatch ) | ||
| > | 1486 | { | ||
| > | 1487 | thepos = ++in; | ||
| > | 1488 | if( in.eof() ) | ||
| > | 1489 | { | ||
| > | 1490 | break; | ||
| > | 1491 | } | ||
| > | 1492 | gotMatch = Rule::matches( in, st ); | ||
| > | 1493 | } | ||
| > | 1494 | in.pos( gotMatch ? thepos : begin ); | ||
| > | 1495 | return gotMatch; | ||
| > | 1496 | } | ||
| > | 1497 | }; | ||
| > | 1498 | |||
| > | 1499 | |||
| 1380 | 1500 | |||
| 1381 | /** | 1501 | /** | |
| 1382 | Matches only the character CH. If CaseSensitive is true then CH | 1502 | Matches only the character CH. If CaseSensitive is true then CH | |
| 1383 | must match exactly, otherwise the upper- or lower-case form of | 1503 | must match exactly, otherwise the upper- or lower-case form of | |
| 1384 | CH will also match. Also, if CaseSensitive is true then CH MUST | 1504 | CH will also match. Also, if CaseSensitive is true then CH MUST | |
| 372 hidden lines | ||||
| 1757 | Detail::line_col_state st; | 1877 | Detail::line_col_state st; | |
| 1758 | parse<Detail::r_linecol>( inp, st ); | 1878 | parse<Detail::r_linecol>( inp, st ); | |
| 1759 | line = st.line; | 1879 | line = st.line; | |
| 1760 | col = st.col; | 1880 | col = st.col; | |
| 1761 | } | 1881 | } | |
| > | 1882 | |||
| > | 1883 | |||
| 1762 | } // namespace | 1884 | } // namespace | |
| 1763 | 1885 | |||
| 1764 | #endif // s11n_net_PARSEPP_HPP_INCLUDED | 1886 | #endif // s11n_net_PARSEPP_HPP_INCLUDED | |