Check-in [7cffcb3d6a]

Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
SHA1 Hash:7cffcb3d6a6feb3fdb1662d251b2cc2f56ea2932
Date: 2008-09-18 16:08:32
User: stephan
Comment:added experimental r_until and r_error
Tags And Properties
Changes
hide diffs unified diffs patch

Changes to parsepp.hpp

Old (b7440c804d4244fc) New (11715259fcc08ebe)
1 #ifndef s11n_net_PARSEPP_HPP_INCLUDED 1 #ifndef s11n_net_PARSEPP_HPP_INCLUDED
2 #define s11n_net_PARSEPP_HPP_INCLUDED 2 #define s11n_net_PARSEPP_HPP_INCLUDED
3 3
4 #include <map> 4 #include <map>
5 #include <string> 5 #include <string>
885 hidden lines
891 parser_state ps(in); 891 parser_state ps(in);
892 return parse<Rule>( ps, st ); 892 return parse<Rule>( ps, st );
893 } 893 }
894 catch( parse_error const & ex ) 894 catch( parse_error const & ex )
895 { 895 {
896 // recreate exception without the state, | 896 // Recreate exception without the state,
897 // which won't be valid once this function returns. 897 // which won't be valid once this function returns.
898 std::string msg( ex.what() ); 898 std::string msg( ex.what() );
899 msg += "\n Near "+ex.where(); | 899 msg += "\nNear "+ex.where();
900 throw std::runtime_error( msg ); 900 throw std::runtime_error( msg );
901 } 901 }
902 return false; 902 return false;
903 } 903 }
904 904
3 hidden lines
908 immutability. 908 immutability.
909 */ 909 */
910 template <typename Rule, typename ClientState> 910 template <typename Rule, typename ClientState>
911 bool parse( std::string const & in, ClientState & st) 911 bool parse( std::string const & in, ClientState & st)
912 { 912 {
913 parse_iterator it(in); | 913 return parse<Rule>( parse_iterator(in), st );
914 return parse<Rule>( it, st ); |
915 } 914 }
916 915
917 /** 916 /**
918 A non-consuming parser which returns Val. 917 A non-consuming parser which returns Val.
919 */ 918 */
254 hidden lines
1174 Matches on eof or if Rule matches. It always returns true, but 1173 Matches on eof or if Rule matches. It always returns true, but
1175 only advances the input if Rule consumes and we're not at 1174 only advances the input if Rule consumes and we're not at
1176 eof. Note that actions triggered as part of Rule cannot be 1175 eof. Note that actions triggered as part of Rule cannot be
1177 un-done if the rule later fails to match and the input is 1176 un-done if the rule later fails to match and the input is
1178 rewound. 1177 rewound.
1179 <
1180 Note that a break_exception thrown from Rule <
1181 will propagate out of this class, which means <
1182 that a Break changes the meaning of "optional" <
1183 (because "break" trumps "optional"). <
1184 */ 1178 */
1185 template <typename Rule> 1179 template <typename Rule>
1186 struct r_opt 1180 struct r_opt
1187 { 1181 {
1188 typedef r_opt type; 1182 typedef r_opt type;
60 hidden lines
1249 }; 1243 };
1250 1244
1251 /** 1245 /**
1252 Matches Rule at least Min times and at most Max times. If it 1246 Matches Rule at least Min times and at most Max times. If it
1253 does not match it does not consume, but if forward parsing 1247 does not match it does not consume, but if forward parsing
1254 caused Actions to be triggered then they are not un-done by | 1248 causes Actions to be triggered then they are not un-done by
1255 rewinding. 1249 rewinding.
1256 1250
1257 Note that once Max is reached, checking stops. That means 1251 Note that once Max is reached, checking stops. That means
1258 that Max is not a hard-limit - there may be more than Max 1252 that Max is not a hard-limit - there may be more than Max
1259 matches in the input. 1253 matches in the input.
92 hidden lines
1352 }; 1346 };
1353 1347
1354 typedef r_throw_base<parse_error> r_throw; 1348 typedef r_throw_base<parse_error> r_throw;
1355 1349
1356 1350
> 1351 /**
> 1352 A list of "standard" error IDs, for use with the error_msg<>
> 1353 template.
> 1354 */
> 1355 enum StandardErrorIDs {
> 1356 UnknownError = 0,
> 1357 UnexpectedCharacter = 1,
> 1358 UserIDs = 1000 /* client-side IDs should start here. */
> 1359 };
> 1360 template <int ErrorNumber = UnknownError>
> 1361 struct error_msg
> 1362 {
> 1363 template <typename State>
> 1364 static std::string message( parser_state &, State & )
> 1365 {
> 1366 return "Unknown/unspecified parsing error";
> 1367 }
> 1368 };
> 1369
> 1370
> 1371 /**
> 1372 Specialization for UnexpectedCharacter errors.
> 1373 */
> 1374 template <>
> 1375 struct error_msg<UnexpectedCharacter>
> 1376 {
> 1377 template <typename State>
> 1378 static std::string message( parser_state & ps, State & )
> 1379 {
> 1380 std::string msg("Unexpected character '");
> 1381 msg.push_back(*ps.pos());
> 1382 msg.push_back('\'');
> 1383 return msg;
> 1384 }
> 1385 };
> 1386
> 1387 /**
> 1388 Similar to r_throw, this rule throws a parse_error
> 1389 exception. The what() text of the exception is the text of
> 1390 error_msg<ErrorNumber>, allowing one to specialize error_msg to
> 1391 create custom error messages. The where() part of the exception
> 1392
> 1393
> 1394 Design note: another alternative to solve this problem would be
> 1395 to use a static map<int,string>, but then we'd need to provide
> 1396 .cpp files along with the .hpp files for this lib, and i don't
> 1397 wanna do that.
> 1398 */
> 1399 template <int ErrorNumber>
> 1400 struct r_error
> 1401 {
> 1402 typedef r_error type;
> 1403 template <typename State>
> 1404 static bool matches( parser_state & in, State & st )
> 1405 {
> 1406 throw parse_error( in, error_msg<ErrorNumber>::message(in,st) );
> 1407 return false;
> 1408 }
> 1409 };
> 1410
1357 1411
1358 /** 1412 /**
1359 Matches any characters in the range [Min..Max] 1413 Matches any characters in the range [Min..Max]
1360 */ 1414 */
1361 template< int Min, int Max > 1415 template< int Min, int Max >
13 hidden lines
1375 } 1429 }
1376 } 1430 }
1377 return false; 1431 return false;
1378 } 1432 }
1379 }; 1433 };
> 1434
> 1435 /**
> 1436 EXPERIMENTAL!
> 1437
> 1438 Matches all input up to the point where Rule::matches()
> 1439 returns true. Each time Rule does not match, the input
> 1440 iterator is bumped up by one and we try again.
> 1441
> 1442 This rule returns true only if Rule is ever matched, but it may
> 1443 or may not consume input. If Rule matches immediately, no input
> 1444 is consumed, otherwise input is consumed up to the point where
> 1445 Rule will match. Thus when this rule finishes, either Rule
> 1446 *will* match the next input or we are at eof.
> 1447
> 1448 Caveats:
> 1449
> 1450 [In theory] the given Rule should not normally call actions
> 1451 because it would then be easy to accidentally trigger the
> 1452 action twice in down-stream parse rules. So...
> 1453
> 1454 Instead of this:
> 1455
> 1456 r_until< r_action<MyRule, MyAction > >
> 1457
> 1458 Use:
> 1459
> 1460 r_action< r_until<MyRule>, MyAction >
> 1461
> 1462 Additionally:
> 1463
> 1464 - Rule should not be a rule which doesn't consume (e.g. r_at).
> 1465
> 1466 - DO NOT use r_eof (or equivalent) as Rule - it won't behave as
> 1467 expected because this routine also has to do its own eof checks
> 1468 and we run into an ambiguity.
> 1469 */
> 1470 template <typename Rule>
> 1471 struct r_until
> 1472 {
> 1473 typedef r_until type;
> 1474 template <typename ClientState>
> 1475 inline static bool matches( parser_state & in, ClientState & st )
> 1476 {
> 1477 parse_iterator thepos(in.pos());
> 1478 parse_iterator begin(thepos);
> 1479 bool gotMatch = Rule::matches( in, st );
> 1480 if( gotMatch )
> 1481 {
> 1482 in.pos(thepos);
> 1483 return true;
> 1484 }
> 1485 while( ! gotMatch )
> 1486 {
> 1487 thepos = ++in;
> 1488 if( in.eof() )
> 1489 {
> 1490 break;
> 1491 }
> 1492 gotMatch = Rule::matches( in, st );
> 1493 }
> 1494 in.pos( gotMatch ? thepos : begin );
> 1495 return gotMatch;
> 1496 }
> 1497 };
> 1498
> 1499
1380 1500
1381 /** 1501 /**
1382 Matches only the character CH. If CaseSensitive is true then CH 1502 Matches only the character CH. If CaseSensitive is true then CH
1383 must match exactly, otherwise the upper- or lower-case form of 1503 must match exactly, otherwise the upper- or lower-case form of
1384 CH will also match. Also, if CaseSensitive is true then CH MUST 1504 CH will also match. Also, if CaseSensitive is true then CH MUST
372 hidden lines
1757 Detail::line_col_state st; 1877 Detail::line_col_state st;
1758 parse<Detail::r_linecol>( inp, st ); 1878 parse<Detail::r_linecol>( inp, st );
1759 line = st.line; 1879 line = st.line;
1760 col = st.col; 1880 col = st.col;
1761 } 1881 }
> 1882
> 1883
1762 } // namespace 1884 } // namespace
1763 1885
1764 #endif // s11n_net_PARSEPP_HPP_INCLUDED 1886 #endif // s11n_net_PARSEPP_HPP_INCLUDED