Check-in [7cffcb3d6a]

Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview

SHA1 Hash:7cffcb3d6a6feb3fdb1662d251b2cc2f56ea2932
Date: 2008-09-18 16:08:32
User: stephan
Comment:added experimental r_until and r_error

Tags And Properties
Changes
[hide diffs]

Changes to parsepp.hpp

@@ -891,14 +891,14 @@
 	    parser_state ps(in);
 	    return parse<Rule>( ps, st );
 	}
 	catch( parse_error const & ex )
 	{
-	    // recreate exception without the state,
+	    // Recreate exception without the state,
 	    // which won't be valid once this function returns.
 	    std::string msg( ex.what() );
-	    msg += "\n Near "+ex.where();
+	    msg += "\nNear "+ex.where();
 	    throw std::runtime_error( msg );
 	}
 	return false;
     }
 
@@ -908,12 +908,11 @@
        immutability.
     */
     template <typename Rule, typename ClientState>
     bool parse( std::string const & in, ClientState & st)
     {
-	parse_iterator it(in);
-	return parse<Rule>( it, st );
+	 return parse<Rule>( parse_iterator(in), st );
     }
 
     /**
        A non-consuming parser which returns Val.
     */
@@ -1174,15 +1173,10 @@
        Matches on eof or if Rule matches. It always returns true, but
        only advances the input if Rule consumes and we're not at
        eof. Note that actions triggered as part of Rule cannot be
        un-done if the rule later fails to match and the input is
        rewound.
-
-       Note that a break_exception thrown from Rule
-       will propagate out of this class, which means
-       that a Break changes the meaning of "optional"
-       (because "break" trumps "optional").
     */
     template <typename Rule>
     struct r_opt
     {
 	typedef r_opt type;
@@ -1249,11 +1243,11 @@
     };
 
     /**
        Matches Rule at least Min times and at most Max times. If it
        does not match it does not consume, but if forward parsing
-       caused Actions to be triggered then they are not un-done by
+       causes Actions to be triggered then they are not un-done by
        rewinding.
 
        Note that once Max is reached, checking stops. That means
        that Max is not a hard-limit - there may be more than Max
        matches in the input.
@@ -1352,10 +1346,70 @@
     };
 
     typedef r_throw_base<parse_error> r_throw;
 
 
+    /**
+       A list of "standard" error IDs, for use with the error_msg<>
+       template.
+    */
+    enum StandardErrorIDs {
+    UnknownError = 0,
+    UnexpectedCharacter = 1,
+    UserIDs = 1000 /* client-side IDs should start here. */
+    };
+    template <int ErrorNumber = UnknownError>
+    struct error_msg
+    {
+	template <typename State>
+	static std::string message( parser_state &, State & )
+	{
+	    return "Unknown/unspecified parsing error";
+	}
+    };
+
+
+    /**
+       Specialization for UnexpectedCharacter errors.
+    */
+    template <>
+    struct error_msg<UnexpectedCharacter>
+    {
+	template <typename State>
+	static std::string message( parser_state & ps, State & )
+	{
+	    std::string msg("Unexpected character '");
+	    msg.push_back(*ps.pos());
+	    msg.push_back('\'');
+	    return msg;
+	}
+    };
+
+    /**
+       Similar to r_throw, this rule throws a parse_error
+       exception. The what() text of the exception is the text of
+       error_msg<ErrorNumber>, allowing one to specialize error_msg to
+       create custom error messages. The where() part of the exception
+
+
+       Design note: another alternative to solve this problem would be
+       to use a static map<int,string>, but then we'd need to provide
+       .cpp files along with the .hpp files for this lib, and i don't
+       wanna do that.
+    */
+    template <int ErrorNumber>
+    struct r_error
+    {
+	typedef r_error type;
+	template <typename State>
+	static bool matches( parser_state & in, State & st )
+	{
+	    throw parse_error( in, error_msg<ErrorNumber>::message(in,st) );
+	    return false;
+	}
+    };
+
 
     /**
        Matches any characters in the range [Min..Max]
     */
     template< int Min, int Max >
@@ -1375,10 +1429,76 @@
 		}
 	    }
 	    return false;
 	}
     };
+
+    /**
+       EXPERIMENTAL!
+
+       Matches all input up to the point where Rule::matches()
+       returns true. Each time Rule does not match, the input
+       iterator is bumped up by one and we try again.
+
+       This rule returns true only if Rule is ever matched, but it may
+       or may not consume input. If Rule matches immediately, no input
+       is consumed, otherwise input is consumed up to the point where
+       Rule will match. Thus when this rule finishes, either Rule
+       *will* match the next input or we are at eof.
+
+       Caveats:
+
+       [In theory] the given Rule should not normally call actions
+       because it would then be easy to accidentally trigger the
+       action twice in down-stream parse rules. So...
+
+       Instead of this:
+
+       r_until< r_action<MyRule, MyAction > >
+
+       Use:
+
+       r_action< r_until<MyRule>, MyAction >
+
+       Additionally:
+
+       - Rule should not be a rule which doesn't consume (e.g. r_at).
+
+       - DO NOT use r_eof (or equivalent) as Rule - it won't behave as
+       expected because this routine also has to do its own eof checks
+       and we run into an ambiguity.
+    */
+    template <typename Rule>
+    struct r_until
+    {
+	typedef r_until type;
+	template <typename ClientState>
+	inline static bool matches( parser_state & in, ClientState & st )
+	{
+	    parse_iterator thepos(in.pos());
+	    parse_iterator begin(thepos);
+	    bool gotMatch = Rule::matches( in, st );
+	    if( gotMatch )
+	    {
+		in.pos(thepos);
+		return true;
+	    }
+	    while( ! gotMatch )
+	    {
+		thepos = ++in;
+ 		if( in.eof() )
+ 		{
+ 		    break;
+ 		}
+		gotMatch = Rule::matches( in, st );
+	    }
+	    in.pos( gotMatch ? thepos : begin );
+	    return gotMatch;
+	}
+    };
+
+
 
     /**
        Matches only the character CH. If CaseSensitive is true then CH
        must match exactly, otherwise the upper- or lower-case form of
        CH will also match. Also, if CaseSensitive is true then CH MUST
@@ -1757,8 +1877,10 @@
 	Detail::line_col_state st;
 	parse<Detail::r_linecol>( inp, st );
 	line = st.line;
 	col = st.col;
     }
+
+
 } // namespace
 
 #endif // s11n_net_PARSEPP_HPP_INCLUDED