Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
| SHA1 Hash: | 3f173c77014b5a2cc9adee7306ca7aec828ae6fb |
|---|---|
| Date: | 2008-12-24 13:07:02 |
| User: | stephan |
| Comment: | documentation cleanups |
Tags And Properties
- branch=trunk inherited from [a870fea998]
- sym-trunk inherited from [a870fea998]
Changes
Changes to src/pegc.c
| Old (9f995621906b4f54) | New (184024d0cb32a8d9) | |||
|---|---|---|---|---|
| 1 | #include <stdio.h> | 1 | #include <stdio.h> | |
| 2 | #include <stdlib.h> | 2 | #include <stdlib.h> | |
| 3 | #include <string.h> | 3 | #include <string.h> | |
| 4 | #include <ctype.h> | 4 | #include <ctype.h> | |
| 5 | 5 | |||
| 16 hidden lines | ||||
| 22 | #include "pegc.h" | 22 | #include "pegc.h" | |
| 23 | #include "whclob.h" | 23 | #include "whclob.h" | |
| 24 | #include "whgc.h" | 24 | #include "whgc.h" | |
| 25 | 25 | |||
| 26 | 26 | |||
| 27 | < | |||
| 28 | const pegc_cursor pegc_cursor_init = PEGC_CURSOR_INIT; | 27 | const pegc_cursor pegc_cursor_init = PEGC_CURSOR_INIT; | |
| 29 | const PegcRule PegcRule_init = PEGCRULE_INIT; | 28 | const PegcRule PegcRule_init = PEGCRULE_INIT; | |
| 30 | const PegcRule PegcRule_invalid = PEGCRULE_INIT; | 29 | const PegcRule PegcRule_invalid = PEGCRULE_INIT; | |
| 31 | 30 | |||
| 32 | size_t pegc_strnlen( size_t n, pegc_const_iterator c ) | 31 | size_t pegc_strnlen( size_t n, pegc_const_iterator c ) | |
| 22 hidden lines | ||||
| 55 | pegc_match_listener func; | 54 | pegc_match_listener func; | |
| 56 | void * data; | 55 | void * data; | |
| 57 | struct pegc_match_listener_data * next; | 56 | struct pegc_match_listener_data * next; | |
| 58 | }; | 57 | }; | |
| 59 | typedef struct pegc_match_listener_data pegc_match_listener_data; | 58 | typedef struct pegc_match_listener_data pegc_match_listener_data; | |
| > | 59 | /** | ||
| > | 60 | Empty initializer for pegc_match_listener_data. | ||
| > | 61 | */ | ||
| 60 | const static pegc_match_listener_data pegc_match_listener_data_init = {0,0,0}; | 62 | const static pegc_match_listener_data pegc_match_listener_data_init = {0,0,0}; | |
| 61 | 63 | |||
| 62 | 64 | |||
| 63 | 65 | |||
| 64 | /** | 66 | /** | |
| 514 hidden lines | ||||
| 579 | r.begin = r.pos = mark; | 581 | r.begin = r.pos = mark; | |
| 580 | r.end = c; | 582 | r.end = c; | |
| 581 | return r; | 583 | return r; | |
| 582 | } | 584 | } | |
| 583 | 585 | |||
| 584 | char * pegc_cursor_tostring( pegc_cursor const cur ) | | | 586 | pegc_iterator pegc_cursor_tostring( pegc_cursor const cur ) |
| 585 | { | 587 | { | |
| 586 | if( !cur.begin | 588 | if( !cur.begin | |
| 587 | ||!*(cur.begin) | 589 | ||!*(cur.begin) | |
| 588 | ||(cur.end<=cur.begin) | 590 | ||(cur.end<=cur.begin) | |
| 589 | ) | 591 | ) | |
| 1779 hidden lines | ||||
| 2369 | #undef PEGCACTION_INIT | 2371 | #undef PEGCACTION_INIT | |
| 2370 | 2372 | |||
| 2371 | #if defined(__cplusplus) | 2373 | #if defined(__cplusplus) | |
| 2372 | } /* extern "C" */ | 2374 | } /* extern "C" */ | |
| 2373 | #endif | 2375 | #endif | |
Changes to src/pegc.h
| Old (4258eed48c1eb307) | New (64f76882f9be00f1) | |||
|---|---|---|---|---|
| 1 | #ifndef WANDERINGHORSE_NET_PEGC_H_INCLUDED | 1 | #ifndef WANDERINGHORSE_NET_PEGC_H_INCLUDED | |
| 2 | #define WANDERINGHORSE_NET_PEGC_H_INCLUDED | 2 | #define WANDERINGHORSE_NET_PEGC_H_INCLUDED | |
| 3 | /*! | 3 | /*! | |
| 4 | @page pegc_page_main pegc: PEG parser generation library | 4 | @page pegc_page_main pegc: PEG parser generation library | |
| 5 | 5 | |||
| 190 hidden lines | ||||
| 196 | suffixes like <tt>_vv</tt>, and <tt>_ep</tt>. Since C does not allow | 196 | suffixes like <tt>_vv</tt>, and <tt>_ep</tt>. Since C does not allow | |
| 197 | function overloading, we have to add suffixes to functions which have | 197 | function overloading, we have to add suffixes to functions which have | |
| 198 | the same functionality but take different argument types. The | 198 | the same functionality but take different argument types. The | |
| 199 | conventions are: | 199 | conventions are: | |
| 200 | 200 | |||
| 201 | - _a = the argument is a pointer to a null-terminated list. e.g. pegc_r_list_a() | | | 201 | - _a = the argument is a pointer to a null-terminated array. e.g. pegc_r_list_a() |
| 202 | - _p = the argument is a non-null pointer. e.g. pegc_copy_r_p() | 202 | - _p = the argument is a non-null pointer. e.g. pegc_copy_r_p() | |
| 203 | - _v = the argument is a va_list. e.g. pegc_set_error_v() | 203 | - _v = the argument is a va_list. e.g. pegc_set_error_v() | |
| 204 | - _e = the argument is an elipse list. e.g. pegc_set_error_v() | | | 204 | - _e = the argument is an elipse list. e.g. pegc_set_error_e() |
| 205 | - _vv = the argument is a va_list containing full-fledged VALUES of | 205 | - _vv = the argument is a va_list containing full-fledged VALUES of | |
| 206 | the type documented for the function. e.g. pegc_r_list_vv(). | 206 | the type documented for the function. e.g. pegc_r_list_vv(). | |
| 207 | - _vp = the argument is a va_list containing POINTERS to object of | | | 207 | - _vp = the argument is a va_list containing POINTERS to objects of |
| 208 | the type documented for the function. e.g. pegc_r_list_vp() | 208 | the type documented for the function. e.g. pegc_r_list_vp() | |
| 209 | - _ev = as _vv but an elipse list instead of a va_list. e.g. pegc_r_or_ev(). | 209 | - _ev = as _vv but an elipse list instead of a va_list. e.g. pegc_r_or_ev(). | |
| 210 | - _ep = as _vp but an elipse list instead of a va_list. e.g. pegc_r_and_ep(). | 210 | - _ep = as _vp but an elipse list instead of a va_list. e.g. pegc_r_and_ep(). | |
| 211 | 211 | |||
| 212 | These seem a little unweildy at first, but one gets used to them. | 212 | These seem a little unweildy at first, but one gets used to them. | |
| 151 hidden lines | ||||
| 364 | error handling. | 364 | error handling. | |
| 365 | 365 | |||
| 366 | Example: | 366 | Example: | |
| 367 | 367 | |||
| 368 | \code | 368 | \code | |
| 369 | pegc_parser * p = pegc_create_parser( &p, "...", -1 ); | | | 369 | pegc_parser * p = pegc_create_parser( "...", -1 ); |
| 370 | if( ! p ) { ... error... } | 370 | if( ! p ) { ... error... } | |
| 371 | ... | 371 | ... | |
| 372 | pegc_destroy_parser(p); | 372 | pegc_destroy_parser(p); | |
| 373 | \endcode | 373 | \endcode | |
| 374 | 374 | |||
| 27 hidden lines | ||||
| 402 | If length is less than 0 then pegc_strlen(begin) will be used to | 402 | If length is less than 0 then pegc_strlen(begin) will be used to | |
| 403 | calculate the end point. | 403 | calculate the end point. | |
| 404 | 404 | |||
| 405 | If (!st) then false is returned. null input is legal (but not | 405 | If (!st) then false is returned. null input is legal (but not | |
| 406 | parseable). | 406 | parseable). | |
| 407 | < | |||
| 408 | When re-mapping a parser to a different input source than | < | ||
| 409 | previously used, be sure to call pegc_set_error_e() to clear the | < | ||
| 410 | error state, or most parse operations will fail. | < | ||
| 411 | */ | 407 | */ | |
| 412 | bool pegc_set_input( pegc_parser * st, pegc_const_iterator begin, long length ); | 408 | bool pegc_set_input( pegc_parser * st, pegc_const_iterator begin, long length ); | |
| 413 | 409 | |||
| 414 | /** | 410 | /** | |
| 415 | Sets a descriptive name for the parser. Intended for debugging | 411 | Sets a descriptive name for the parser. Intended for debugging | |
| 14 hidden lines | ||||
| 430 | ever refactored to support string types other than (char *). | 426 | ever refactored to support string types other than (char *). | |
| 431 | 427 | |||
| 432 | Returns the length of c, stopping when a literal null or a null | 428 | Returns the length of c, stopping when a literal null or a null | |
| 433 | character, or n characters have been traversed. A value of 0 for n | 429 | character, or n characters have been traversed. A value of 0 for n | |
| 434 | means "unlimited" (i.e. only stop at a null). | 430 | means "unlimited" (i.e. only stop at a null). | |
| 435 | < | |||
| 436 | */ | 431 | */ | |
| 437 | size_t pegc_strnlen( size_t n, pegc_const_iterator c ); | 432 | size_t pegc_strnlen( size_t n, pegc_const_iterator c ); | |
| 438 | 433 | |||
| 439 | /** | 434 | /** | |
| 440 | Equivalent to pegc_strnlen(0,c). | 435 | Equivalent to pegc_strnlen(0,c). | |
| 62 hidden lines | ||||
| 503 | */ | 498 | */ | |
| 504 | bool pegc_line_col( pegc_parser const * st, size_t * line, size_t * col ); | 499 | bool pegc_line_col( pegc_parser const * st, size_t * line, size_t * col ); | |
| 505 | 500 | |||
| 506 | /** | 501 | /** | |
| 507 | Gets the current error string (which may be 0), line, and | 502 | Gets the current error string (which may be 0), line, and | |
| 508 | column. The string is owned by the parser and will be invalided | | | 503 | column. |
| 509 | the next time pegc_set_error_e() is called. | | | ||
| 510 | 504 | |||
| 511 | Any of the integer pointers may be 0. | 505 | Any of the integer pointers may be 0. | |
| 512 | 506 | |||
| 513 | It returns 0 if: | 507 | It returns 0 if: | |
| 514 | 508 | |||
| 11 hidden lines | ||||
| 526 | 520 | |||
| 527 | /** | 521 | /** | |
| 528 | Copies the given null-terminated string as the current error | 522 | Copies the given null-terminated string as the current error | |
| 529 | message for the parser. Also sets the line/column position. | 523 | message for the parser. Also sets the line/column position. | |
| 530 | The error can be fetched with pegc_get_error(). | 524 | The error can be fetched with pegc_get_error(). | |
| 531 | < | |||
| 532 | The clientNumber parameter is a client-determined number which | < | ||
| 533 | is not used by this library but is returned by pegc_get_error(). | < | ||
| 534 | 525 | |||
| 535 | If msg if NULL then the error state is cleared. | 526 | If msg if NULL then the error state is cleared. | |
| 536 | 527 | |||
| 537 | Returns false if: | 528 | Returns false if: | |
| 538 | 529 | |||
| 112 hidden lines | ||||
| 651 | /** | 642 | /** | |
| 652 | Returns a copy of the string delimited by curs, or 0 if there | 643 | Returns a copy of the string delimited by curs, or 0 if there | |
| 653 | is no match or there is a length-zero match. The caller is | 644 | is no match or there is a length-zero match. The caller is | |
| 654 | responsible for deallocating the returned string using free(). | 645 | responsible for deallocating the returned string using free(). | |
| 655 | */ | 646 | */ | |
| 656 | char * pegc_cursor_tostring( pegc_cursor const curs ); | | | 647 | pegc_iterator pegc_cursor_tostring( pegc_cursor const curs ); |
| 657 | 648 | |||
| 658 | 649 | |||
| 659 | /** | 650 | /** | |
| 660 | Equivalent to pegc_cursor_tostring( pegc_get_match_cursor(st) ). | 651 | Equivalent to pegc_cursor_tostring( pegc_get_match_cursor(st) ). | |
| 661 | */ | 652 | */ | |
| 67 hidden lines | ||||
| 729 | 720 | |||
| 730 | 721 | |||
| 731 | struct PegcRule; | 722 | struct PegcRule; | |
| 732 | /*! @typedef bool (*PegcRule_mf) ( struct PegcRule const * self, pegc_parser * state ) | 723 | /*! @typedef bool (*PegcRule_mf) ( struct PegcRule const * self, pegc_parser * state ) | |
| 733 | 724 | |||
| 734 | A typedef for "member functions" of PegcRule objects. | | | 725 | A typedef for "member functions" of PegcRule objects. These represent |
| | | 726 | the implementations of parsing rules. | ||
| 735 | 727 | |||
| 736 | Conventions: | 728 | Conventions: | |
| 737 | 729 | |||
| 738 | If the rule can match then true is returned and st is advanced | 730 | If the rule can match then true is returned and st is advanced | |
| 739 | to one place after the last consumed token. It is legal to not | | | 731 | to one place after the last consumed token. |
| 740 | consume even on a match, but this is best reserved for certain | | | ||
| 741 | cases, and it should be well documented in the API docs. | | | ||
| 742 | 732 | |||
| 743 | If the rule cannot match it must not consume input. That is, if | 733 | If the rule cannot match it must not consume input. That is, if | |
| 744 | it doesn't match then it must ensure that pegc_pos(st) returns | 734 | it doesn't match then it must ensure that pegc_pos(st) returns | |
| 745 | the same value after this call as it does before this call. It | 735 | the same value after this call as it does before this call. It | |
| 746 | should use pegc_set_pos() to force the position back to the | 736 | should use pegc_set_pos() to force the position back to the | |
| 747 | pre-call starting point if needed. | | | 737 | pre-call starting point if needed. It is legal to not consume |
| | | 738 | even on a match, but this is best reserved for certain cases | ||
| | | 739 | and it must be well documented in the API docs. | ||
| 748 | 740 | |||
| 749 | The self pointer is the "this" object - the object context in | 741 | The self pointer is the "this" object - the object context in | |
| 750 | which this function is called. Implementations may (and | 742 | which this function is called. Implementations may (and | |
| 751 | probably do) require a certain type of data to be set in | 743 | probably do) require a certain type of data to be set in | |
| 752 | self->data (e.g. a string to match against). The exact type of | 744 | self->data (e.g. a string to match against). The exact type of | |
| 2 hidden lines | ||||
| 755 | */ | 747 | */ | |
| 756 | typedef bool (*PegcRule_mf) ( struct PegcRule const * self, pegc_parser * state ); | 748 | typedef bool (*PegcRule_mf) ( struct PegcRule const * self, pegc_parser * state ); | |
| 757 | 749 | |||
| 758 | /** | 750 | /** | |
| 759 | PegcRule objects hold data used for implement parsing rules. | 751 | PegcRule objects hold data used for implement parsing rules. | |
| > | 752 | These are the core objects for implementing grammars. Each Rule | ||
| > | 753 | can be as "small" or as "big" as necessary, and rules can be | ||
| > | 754 | combined to form grammars of arbitrary complexity. | ||
| 760 | 755 | |||
| 761 | Each object holds an PegcRule_mf "member function" and a void | | | 756 | Each object holds a PegcRule_mf "member function" and a void |
| 762 | data pointer. The data pointer holds information used by the | 757 | data pointer. The data pointer holds information used by the | |
| 763 | member function. Some rules hold a (char const *) here and | 758 | member function. Some rules hold a (char const *) here and | |
| 764 | match against a string or the characters in the string. | 759 | match against a string or the characters in the string. | |
| 765 | Non-string rules may have other uses for the data pointer. | 760 | Non-string rules may have other uses for the data pointer. | |
| 766 | 761 | |||
| 10 hidden lines | ||||
| 777 | PegcRule.data key. That approach ensures that copies of such | 772 | PegcRule.data key. That approach ensures that copies of such | |
| 778 | PegcRule object end up using the same shared data. While it | 773 | PegcRule object end up using the same shared data. While it | |
| 779 | might be tempting to use a rule's address as the key, this is | 774 | might be tempting to use a rule's address as the key, this is | |
| 780 | only useful if the rule is created on the heap (and then | 775 | only useful if the rule is created on the heap (and then | |
| 781 | (rule->data=rule) should be set so that copies of the object | 776 | (rule->data=rule) should be set so that copies of the object | |
| 782 | get the same key address. | | | 777 | get the same key address). |
| 783 | 778 | |||
| 784 | 779 | |||
| 785 | PegcRules must comply with a few guidelines if they want to | 780 | PegcRules must comply with a few guidelines if they want to | |
| 786 | be sure to work with the core rules: | 781 | be sure to work with the core rules: | |
| 787 | 782 | |||
| 10 hidden lines | ||||
| 798 | The origin need not outlive the copies, as long as ownership | 793 | The origin need not outlive the copies, as long as ownership | |
| 799 | of any shared data is well defined and the referenced data | 794 | of any shared data is well defined and the referenced data | |
| 800 | outlives all copies of the rule. Again, see the code for some | 795 | outlives all copies of the rule. Again, see the code for some | |
| 801 | of the core rules, and this will become clear. | 796 | of the core rules, and this will become clear. | |
| 802 | 797 | |||
| 803 | - Rules should considered const after creation. Ideally they | | | 798 | - Rules should be considered const after creation. Ideally they |
| 804 | are only configurable via factory functions (e.g. the | 799 | are only configurable via factory functions (e.g. the | |
| 805 | pegc_r_xxx() functions). Once the factory is done configuring | 800 | pegc_r_xxx() functions). Once the factory is done configuring | |
| 806 | them, clients must not change any state in the rule (with the | 801 | them, clients must not change any state in the rule (with the | |
| 807 | exception of the 'client' member, which is reserved for | 802 | exception of the 'client' member, which is reserved for | |
| 808 | client-side use). | 803 | client-side use). | |
| 4 hidden lines | ||||
| 813 | objects. | 808 | objects. | |
| 814 | */ | 809 | */ | |
| 815 | struct PegcRule | 810 | struct PegcRule | |
| 816 | { | 811 | { | |
| 817 | /** | 812 | /** | |
| 818 | This object's rule function. An object with a rule of 0 | | | 813 | This object's rule function. An object with a rule of 0 is |
| 819 | is said to be "invalid" (several API routines use this | | | 814 | said to be "invalid" (several API routines use this |
| 820 | term). | | | 815 | term). All invalid rules are considered equal for |
| | | 816 | comparison purposes. | ||
| 821 | */ | 817 | */ | |
| 822 | PegcRule_mf rule; | 818 | PegcRule_mf rule; | |
| 823 | 819 | |||
| 824 | /** | 820 | /** | |
| 825 | Data used by the rule function. The exact format of the | 821 | Data used by the rule function. The exact format of the | |
| 46 hidden lines | ||||
| 872 | D /* data */,\ | 868 | D /* data */,\ | |
| 873 | 0 /* proxy */,\ | 869 | 0 /* proxy */,\ | |
| 874 | /* client */ { 0/* flags */,0 /* data */}, \ | 870 | /* client */ { 0/* flags */,0 /* data */}, \ | |
| 875 | N \ | 871 | N \ | |
| 876 | } | 872 | } | |
| > | 873 | /** See PEGCRULE_INIT3(). */ | ||
| 877 | #define PEGCRULE_INIT2(RF,D) PEGCRULE_INIT3(RF,D,# RF) | 874 | #define PEGCRULE_INIT2(RF,D) PEGCRULE_INIT3(RF,D,# RF) | |
| 878 | /** | | | 875 | /** See PEGCRULE_INIT3(). */ |
| 879 | A rule using RF as its rule function. | | | ||
| 880 | */ | | | ||
| 881 | #define PEGCRULE_INIT1(RF) PEGCRULE_INIT2(RF,0) | 876 | #define PEGCRULE_INIT1(RF) PEGCRULE_INIT2(RF,0) | |
| 882 | /** | | | 877 | /** |
| 883 | An invalid rule. | | | 878 | Initializer for an empty/invalid rule. |
| 884 | */ | | | 879 | */ |
| 885 | #define PEGCRULE_INIT PEGCRULE_INIT3(0,0,"invalid") | 880 | #define PEGCRULE_INIT PEGCRULE_INIT3(0,0,"invalid") | |
| 886 | 881 | |||
| 887 | /** | 882 | /** | |
| 888 | This object can (should) be used as an initializer to ensure a | 883 | This object can (should) be used as an initializer to ensure a | |
| 889 | clean slate for the internal members of PegcRule objects. Simply | 884 | clean slate for the internal members of PegcRule objects. Simply | |
| 890 | copy this over the object. It is an invalid rule. | 885 | copy this over the object. It is an invalid rule. | |
| 891 | */ | 886 | */ | |
| 892 | extern const PegcRule PegcRule_init; | 887 | extern const PegcRule PegcRule_init; | |
| 893 | < | |||
| 894 | 888 | |||
| 895 | /** | 889 | /** | |
| 896 | Always returns false and does nothing. | 890 | Always returns false and does nothing. | |
| 897 | */ | 891 | */ | |
| 898 | bool PegcRule_mf_failure( PegcRule const * self, pegc_parser * st ); | 892 | bool PegcRule_mf_failure( PegcRule const * self, pegc_parser * st ); | |
| 169 hidden lines | ||||
| 1068 | */ | 1062 | */ | |
| 1069 | PegcRule pegc_r_opt_v( pegc_parser * st, PegcRule const proxy ); | 1063 | PegcRule pegc_r_opt_v( pegc_parser * st, PegcRule const proxy ); | |
| 1070 | 1064 | |||
| 1071 | /** | 1065 | /** | |
| 1072 | Creates a rule which will match the given string. The string | 1066 | Creates a rule which will match the given string. The string | |
| 1073 | must outlive the rule, as it is not copied. | | | 1067 | must outlive the rule, as it is not copied. If caseSensitive is |
| | | 1068 | false then a case-insensitive check is done. | ||
| 1074 | */ | 1069 | */ | |
| 1075 | PegcRule pegc_r_string( pegc_const_iterator input, bool caseSensitive ); | 1070 | PegcRule pegc_r_string( pegc_const_iterator input, bool caseSensitive ); | |
| 1076 | 1071 | |||
| 1077 | /** | 1072 | /** | |
| 1078 | Matches if that string case-sensitively matches the next | 1073 | Matches if that string case-sensitively matches the next | |
| 64 hidden lines | ||||
| 1143 | If st or spec are null, or the first character of spec | 1138 | If st or spec are null, or the first character of spec | |
| 1144 | is not a '[' then an invalid rule is returned. | 1139 | is not a '[' then an invalid rule is returned. | |
| 1145 | */ | 1140 | */ | |
| 1146 | PegcRule pegc_r_char_spec( pegc_parser * st, char const * spec ); | 1141 | PegcRule pegc_r_char_spec( pegc_parser * st, char const * spec ); | |
| 1147 | 1142 | |||
| 1148 | < | |||
| 1149 | /** | 1143 | /** | |
| 1150 | Creates a rule which matches if proxy matches, but does not | 1144 | Creates a rule which matches if proxy matches, but does not | |
| 1151 | consume. proxy must not be 0 and must outlive the returned | 1145 | consume. proxy must not be 0 and must outlive the returned | |
| 1152 | object. | 1146 | object. | |
| 1153 | */ | 1147 | */ | |
| 1154 | PegcRule pegc_r_at_p( PegcRule const * proxy ); | 1148 | PegcRule pegc_r_at_p( PegcRule const * proxy ); | |
| > | 1149 | |||
| 1155 | /** | 1150 | /** | |
| 1156 | Functionally equivalent to pegc_r_at_p() except that it must | 1151 | Functionally equivalent to pegc_r_at_p() except that it must | |
| 1157 | allocate a (shallow) copy of the proxy rule. | 1152 | allocate a (shallow) copy of the proxy rule. | |
| 1158 | */ | 1153 | */ | |
| 1159 | PegcRule pegc_r_at_v( pegc_parser * st, PegcRule const proxy ); | 1154 | PegcRule pegc_r_at_v( pegc_parser * st, PegcRule const proxy ); | |
| 2 hidden lines | ||||
| 1162 | The converse of pegc_r_at(), this returns true only if the | 1157 | The converse of pegc_r_at(), this returns true only if the | |
| 1163 | input does not match the given proxy rule. This rule never | 1158 | input does not match the given proxy rule. This rule never | |
| 1164 | consumes. | 1159 | consumes. | |
| 1165 | */ | 1160 | */ | |
| 1166 | PegcRule pegc_r_notat_p( PegcRule const * proxy ); | 1161 | PegcRule pegc_r_notat_p( PegcRule const * proxy ); | |
| > | 1162 | |||
| 1167 | /** | 1163 | /** | |
| 1168 | Functionally equivalent to pegc_r_noat_p() except that it must | 1164 | Functionally equivalent to pegc_r_noat_p() except that it must | |
| 1169 | allocate a (shallow) copy of the proxy rule. | 1165 | allocate a (shallow) copy of the proxy rule. | |
| 1170 | */ | 1166 | */ | |
| 1171 | PegcRule pegc_r_notat_v( pegc_parser * st, PegcRule const proxy ); | 1167 | PegcRule pegc_r_notat_v( pegc_parser * st, PegcRule const proxy ); | |
| 14 hidden lines | ||||
| 1186 | Functionally equivalent to pegc_r_until_p() except that it must | 1182 | Functionally equivalent to pegc_r_until_p() except that it must | |
| 1187 | allocate a (shallow) copy of the proxy rule. | 1183 | allocate a (shallow) copy of the proxy rule. | |
| 1188 | */ | 1184 | */ | |
| 1189 | PegcRule pegc_r_until_v( pegc_parser * st, PegcRule const proxy ); | 1185 | PegcRule pegc_r_until_v( pegc_parser * st, PegcRule const proxy ); | |
| 1190 | 1186 | |||
| 1191 | < | |||
| 1192 | /** | 1187 | /** | |
| 1193 | Creates a rule which performs either an OR operation (if orOp | 1188 | Creates a rule which performs either an OR operation (if orOp | |
| 1194 | is true) or an AND operation (if orOp is false) on the given | 1189 | is true) or an AND operation (if orOp is false) on the given | |
| 1195 | list of rules. The list MUST be terminated with either NULL, or | 1190 | list of rules. The list MUST be terminated with either NULL, or | |
| 1196 | an entry where entry->rule is 0 (i.e. an invalid rule), or | | | 1191 | an entry where entry->rule is 0 (i.e. an invalid rule) or |
| 1197 | results are undefined (almost certainly an overflow). | 1192 | results are undefined (almost certainly an overflow). | |
| 1198 | 1193 | |||
| 1199 | All rules in li must outlive the returned object. | 1194 | All rules in li must outlive the returned object. | |
| 1200 | (BUG: all rules in li are currently copied (shallowly) instead | < | ||
| 1201 | of pointed to.) | < | ||
| 1202 | 1195 | |||
| 1203 | This routine allocates resources for the returned rule which | | | 1196 | If li is null then an invalid rule is returned. |
| 1204 | belong to this API and are freed when st is destroyed. | | | ||
| 1205 | | | |||
| 1206 | If st or li are null then an invalid rule is returned. | | | ||
| 1207 | 1197 | |||
| 1208 | The null-termination approach was chosen over the client | 1198 | The null-termination approach was chosen over the client | |
| 1209 | explicitly providing the length of the list because when | 1199 | explicitly providing the length of the list because when | |
| 1210 | editing rule lists (which happens a lot during development) it | 1200 | editing rule lists (which happens a lot during development) it | |
| 1211 | is more problematic to verify and change that number than it is | 1201 | is more problematic to verify and change that number than it is | |
| 1212 | to add a trailing 0 to the list (which only has to be done | 1202 | to add a trailing 0 to the list (which only has to be done | |
| 1213 | once). Alternately, you can use an invalid rule to mark the | 1203 | once). Alternately, you can use an invalid rule to mark the | |
| 1214 | end of the list. | 1204 | end of the list. | |
| 1215 | 1205 | |||
| 1216 | The objects pointed to in the list must outlive the rule, | < | ||
| 1217 | though the implementation currently copies them (that's a bug). | < | ||
| 1218 | < | |||
| 1219 | Pneumonic: the 'a' suffix refers to the 'a'rray parameter. | 1206 | Pneumonic: the 'a' suffix refers to the 'a'rray parameter. | |
| 1220 | 1207 | |||
| 1221 | Of the various pegc_r_list_X() implementations, this one is | 1208 | Of the various pegc_r_list_X() implementations, this one is | |
| 1222 | most efficient (the others synthesize an array, which causes | 1209 | most efficient (the others synthesize an array, which causes | |
| 1223 | extra allocations, and call this routine). | 1210 | extra allocations, and call this routine). | |
| 1224 | */ | 1211 | */ | |
| 1225 | PegcRule pegc_r_list_a( bool orOp, PegcRule const * li ); | 1212 | PegcRule pegc_r_list_a( bool orOp, PegcRule const * li ); | |
| 1226 | //PegcRule pegc_r_list_a( pegc_parser * st, bool orOp, PegcRule const * li ); | | | 1213 | //older impl: PegcRule pegc_r_list_a( pegc_parser * st, bool orOp, PegcRule const * li ); |
| 1227 | 1214 | |||
| 1228 | /** | 1215 | /** | |
| 1229 | Works like pegc_r_list_a() but requires a NULL-terminated list of | 1216 | Works like pegc_r_list_a() but requires a NULL-terminated list of | |
| 1230 | (PegcRule const *). The objects pointed to must outlive the | 1217 | (PegcRule const *). The objects pointed to must outlive the | |
| 1231 | returned rule. | 1218 | returned rule. | |
| 14 hidden lines | ||||
| 1246 | for some reason then an invalid rule is returned. | 1233 | for some reason then an invalid rule is returned. | |
| 1247 | 1234 | |||
| 1248 | Pneumonic: the 'v' suffix refers to the 'v'a_list parameters. | 1235 | Pneumonic: the 'v' suffix refers to the 'v'a_list parameters. | |
| 1249 | */ | 1236 | */ | |
| 1250 | PegcRule pegc_r_list_vp( pegc_parser * st, bool orOp, va_list ap ); | 1237 | PegcRule pegc_r_list_vp( pegc_parser * st, bool orOp, va_list ap ); | |
| > | 1238 | |||
| 1251 | /** | 1239 | /** | |
| 1252 | Works like pegc_r_list_a(), but requires a list of PegcRule | 1240 | Works like pegc_r_list_a(), but requires a list of PegcRule | |
| 1253 | objects (NOT pointers) which is termined by an invalid | 1241 | objects (NOT pointers) which is termined by an invalid | |
| 1254 | rule. If the internal list cannot be constructed for some | 1242 | rule. If the internal list cannot be constructed for some | |
| 1255 | reason then an invalid rule is returned. | 1243 | reason then an invalid rule is returned. | |
| 12 hidden lines | ||||
| 1268 | 1256 | |||
| 1269 | /** | 1257 | /** | |
| 1270 | Convenience form of pegc_r_list_ep( st, true, ... ); | 1258 | Convenience form of pegc_r_list_ep( st, true, ... ); | |
| 1271 | */ | 1259 | */ | |
| 1272 | PegcRule pegc_r_or_ep( pegc_parser * st, ... ); | 1260 | PegcRule pegc_r_or_ep( pegc_parser * st, ... ); | |
| > | 1261 | |||
| 1273 | /** | 1262 | /** | |
| 1274 | Convenience form of pegc_r_list_ev(st,true,...). | 1263 | Convenience form of pegc_r_list_ev(st,true,...). | |
| 1275 | */ | 1264 | */ | |
| 1276 | PegcRule pegc_r_or_ev( pegc_parser * st, ... ); | 1265 | PegcRule pegc_r_or_ev( pegc_parser * st, ... ); | |
| 1277 | 1266 | |||
| 1278 | /** | 1267 | /** | |
| 1279 | Convenience form of pegc_r_list_ep(st,false,...); | 1268 | Convenience form of pegc_r_list_ep(st,false,...); | |
| 1280 | */ | 1269 | */ | |
| 1281 | PegcRule pegc_r_and_ep( pegc_parser * st, ... ); | 1270 | PegcRule pegc_r_and_ep( pegc_parser * st, ... ); | |
| > | 1271 | |||
| 1282 | /** | 1272 | /** | |
| 1283 | Convenience form of pegc_r_list_ev(st,false,...). | 1273 | Convenience form of pegc_r_list_ev(st,false,...). | |
| 1284 | */ | 1274 | */ | |
| 1285 | PegcRule pegc_r_and_ev( pegc_parser * st, ... ); | 1275 | PegcRule pegc_r_and_ev( pegc_parser * st, ... ); | |
| > | 1276 | |||
| 1286 | /** | 1277 | /** | |
| 1287 | A callback type for semantic actions - functions which are | 1278 | A callback type for semantic actions - functions which are | |
| 1288 | called when their proxy rule matches. "Immediate" actions, created with pegc_r_action_i(), are triggered | | | 1279 | called when their proxy rule matches. |
| 1289 | as soon as a match is found. | | | 1280 | |
| | | 1281 | "Immediate" actions, created with pegc_r_action_i(), are | ||
| | | 1282 | triggered as soon as a match is found. | ||
| 1290 | 1283 | |||
| 1291 | "Delayed" rules, generated with pegc_r_action_d(), are queued | 1284 | "Delayed" rules, generated with pegc_r_action_d(), are queued | |
| 1292 | on every match and executed with pegc_trigger_actions() (presumably | 1285 | on every match and executed with pegc_trigger_actions() (presumably | |
| 1293 | after the parser has successfully handled an entire grammar). | 1286 | after the parser has successfully handled an entire grammar). | |
| 1294 | 1287 | |||
| 16 hidden lines | ||||
| 1311 | 1304 | |||
| 1312 | - clientData: arbitrary client-side data, as passed to | 1305 | - clientData: arbitrary client-side data, as passed to | |
| 1313 | pegc_r_action_d() or pegc_r_action_i(). | 1306 | pegc_r_action_d() or pegc_r_action_i(). | |
| 1314 | 1307 | |||
| 1315 | If an action returns false then the effect is the same as a rule | 1308 | If an action returns false then the effect is the same as a rule | |
| 1316 | returning false | | | 1309 | returning false. |
| 1317 | 1310 | |||
| 1318 | Actions can act on client-side data in two ways: | 1311 | Actions can act on client-side data in two ways: | |
| 1319 | 1312 | |||
| 1320 | - By passing a data object (the clientData parameter) to | 1313 | - By passing a data object (the clientData parameter) to | |
| 1321 | pegc_r_action_i() or pegc_r_action_d(). This approach is useful | 1314 | pegc_r_action_i() or pegc_r_action_d(). This approach is useful | |
| 1322 | if different subparsers need different types of state. | 1315 | if different subparsers need different types of state. | |
| 1323 | 1316 | |||
| 1324 | - By calling pegc_set_client_data() and accessing it from the | 1317 | - By calling pegc_set_client_data() and accessing it from the | |
| 1325 | action. If all actions access the same shared state, this is | 1318 | action. If all actions access the same shared state, this is | |
| 1326 | the simplest approach. | 1319 | the simplest approach. | |
| > | 1320 | |||
| > | 1321 | If you need to pass const clientData, don't cast away the const, but | ||
| > | 1322 | use a wrapper instead. For example: | ||
| > | 1323 | |||
| > | 1324 | @code | ||
| > | 1325 | typedef struct mydata { char const * string; } mydata; | ||
| > | 1326 | ... | ||
| > | 1327 | mydata m; | ||
| > | 1328 | m.string = "..."; | ||
| > | 1329 | @endcode | ||
| > | 1330 | |||
| > | 1331 | Then pass (&m) to the action. | ||
| 1327 | */ | 1332 | */ | |
| 1328 | typedef bool (*pegc_action_f)( pegc_parser * st, | 1333 | typedef bool (*pegc_action_f)( pegc_parser * st, | |
| 1329 | pegc_cursor const *match, | 1334 | pegc_cursor const *match, | |
| 1330 | void * clientData ); | 1335 | void * clientData ); | |
| 1331 | 1336 | |||
| 1332 | /* | 1337 | /* | |
| 1333 | Creates rule which, when it matches, triggers an action | | | 1338 | Creates a rule which, when it matches, triggers an action |
| 1334 | immediately. If rule matches then onMatch(st,clientData) is | 1339 | immediately. If rule matches then onMatch(st,clientData) is | |
| 1335 | called. onMatch can fetch the matched string using | 1340 | called. onMatch can fetch the matched string using | |
| 1336 | the pegc_cursor argument to the callback or via | 1341 | the pegc_cursor argument to the callback or via | |
| 1337 | pegc_get_match_string() or pegc_get_match_cursor(). | 1342 | pegc_get_match_string() or pegc_get_match_cursor(). | |
| 1338 | 1343 | |||
| 50 hidden lines | ||||
| 1389 | PegcRule const rule, | 1394 | PegcRule const rule, | |
| 1390 | pegc_action_f onMatch, | 1395 | pegc_action_f onMatch, | |
| 1391 | void * clientData ); | 1396 | void * clientData ); | |
| 1392 | 1397 | |||
| 1393 | /** | 1398 | /** | |
| 1394 | Causes queued actions to be activated, in the order they were | | | 1399 | Causes queued actions to be activated in the order they were |
| 1395 | queued. This function returns true if there are no queued | 1400 | queued. This function returns true if there are no queued | |
| 1396 | actions or if all queued actions return true. If an action | 1401 | actions or if all queued actions return true. If an action | |
| 1397 | returns false then this function stops processing actions and | 1402 | returns false then this function stops processing actions and | |
| 1398 | returns false. If st is null or pegc_has_error() returns true | 1403 | returns false. If st is null or pegc_has_error() returns true | |
| 1399 | then this routine returns false. On a severe error | 1404 | then this routine returns false. On a severe error | |
| 15 hidden lines | ||||
| 1415 | A rule which triggers and clears the action queue. It returns | 1420 | A rule which triggers and clears the action queue. It returns | |
| 1416 | the same as pegc_trigger_actions(). Note that the whole queue | 1421 | the same as pegc_trigger_actions(). Note that the whole queue | |
| 1417 | is cleared, even if processing stops due to a failed action. | 1422 | is cleared, even if processing stops due to a failed action. | |
| 1418 | */ | 1423 | */ | |
| 1419 | bool PegcRule_mf_flush_actions( PegcRule const * self, pegc_parser * st ); | 1424 | bool PegcRule_mf_flush_actions( PegcRule const * self, pegc_parser * st ); | |
| > | 1425 | |||
| 1420 | /** | 1426 | /** | |
| 1421 | A rule for PegcRule_mf_flush_actions(). | | | 1427 | Returns a Rule object wrapping PegcRule_mf_flush_actions(). |
| 1422 | */ | 1428 | */ | |
| 1423 | extern const PegcRule PegcRule_flush_actions; | 1429 | extern const PegcRule PegcRule_flush_actions; | |
| > | 1430 | |||
| 1424 | /** | 1431 | /** | |
| 1425 | Returns PegcRule_flush_actions. | 1432 | Returns PegcRule_flush_actions. | |
| 1426 | */ | 1433 | */ | |
| 1427 | PegcRule pegc_r_flush_actions(); | 1434 | PegcRule pegc_r_flush_actions(); | |
| 1428 | 1435 | |||
| 11 hidden lines | ||||
| 1440 | For those specific cases, the st parameter may be 0, as they do | 1447 | For those specific cases, the st parameter may be 0, as they do | |
| 1441 | not allocate any extra resources. For all other cases, st must | 1448 | not allocate any extra resources. For all other cases, st must | |
| 1442 | be valid so that we can allocate the resources needed for the | 1449 | be valid so that we can allocate the resources needed for the | |
| 1443 | rule mapping. | 1450 | rule mapping. | |
| 1444 | 1451 | |||
| 1445 | On error ((max<min), st or rule are null, or eof), an invalid | | | 1452 | On error ((max<min), st or rule are null), an invalid rule is |
| 1446 | rule is returned. | | | 1453 | returned. |
| 1447 | */ | 1454 | */ | |
| 1448 | PegcRule pegc_r_repeat( pegc_parser * st, | 1455 | PegcRule pegc_r_repeat( pegc_parser * st, | |
| 1449 | PegcRule const * rule, | 1456 | PegcRule const * rule, | |
| 1450 | size_t min, | 1457 | size_t min, | |
| 1451 | size_t max ); | 1458 | size_t max ); | |
| 3 hidden lines | ||||
| 1455 | 1462 | |||
| 1456 | ((leftRule*) && mainRule && (rightRule*)) | 1463 | ((leftRule*) && mainRule && (rightRule*)) | |
| 1457 | 1464 | |||
| 1458 | This is normally used to match leading or trailing spaces. | 1465 | This is normally used to match leading or trailing spaces. | |
| 1459 | 1466 | |||
| 1460 | Either or both of leftRule and rightRule to be 0, but both st | | | 1467 | Either or both of leftRule and rightRule may be 0, but both st |
| 1461 | and mainRule must be valid. As a special case, if both | 1468 | and mainRule must be valid. As a special case, if both | |
| 1462 | leftRule and leftRule are 0 then the returned rule is a bitwise | 1469 | leftRule and leftRule are 0 then the returned rule is a bitwise | |
| 1463 | copy of mainRule and no extra resources need to be allocated. | 1470 | copy of mainRule and no extra resources need to be allocated. | |
| 1464 | 1471 | |||
| 1465 | There are two policies for how the matched string is set by | 1472 | There are two policies for how the matched string is set by | |
| 27 hidden lines | ||||
| 1493 | PegcRule pegc_r_pad_p( pegc_parser * st, | 1500 | PegcRule pegc_r_pad_p( pegc_parser * st, | |
| 1494 | PegcRule const * leftRule, | 1501 | PegcRule const * leftRule, | |
| 1495 | PegcRule const * mainRule, | 1502 | PegcRule const * mainRule, | |
| 1496 | PegcRule const * rightRule, | 1503 | PegcRule const * rightRule, | |
| 1497 | bool discardLeftRight); | 1504 | bool discardLeftRight); | |
| > | 1505 | /** | ||
| > | 1506 | Equivalent to pegc_r_pad_p() but takes rule objects instead of | ||
| > | 1507 | pointers. | ||
| > | 1508 | */ | ||
| 1498 | PegcRule pegc_r_pad_v( pegc_parser * st, | 1509 | PegcRule pegc_r_pad_v( pegc_parser * st, | |
| 1499 | PegcRule const leftRule, | 1510 | PegcRule const leftRule, | |
| 1500 | PegcRule const mainRule, | 1511 | PegcRule const mainRule, | |
| 1501 | PegcRule const rightRule, | 1512 | PegcRule const rightRule, | |
| 1502 | bool discardLeftRight); | 1513 | bool discardLeftRight); | |
| 109 hidden lines | ||||
| 1612 | legal. | 1623 | legal. | |
| 1613 | 1624 | |||
| 1614 | This rule requires a "relatively" large amount of dynamic | 1625 | This rule requires a "relatively" large amount of dynamic | |
| 1615 | resources (for several sub-rules), but they are not allocated | 1626 | resources (for several sub-rules), but they are not allocated | |
| 1616 | until the parsing starts, and it caches the rules on a | 1627 | until the parsing starts, and it caches the rules on a | |
| 1617 | per-parser basis. This subsequent calls with the same parser | | | 1628 | per-parser basis. Thus subsequent calls with the same parser |
| 1618 | argument re-use the same object. | 1629 | argument re-use the same object. | |
| 1619 | */ | 1630 | */ | |
| 1620 | bool PegcRule_mf_int_dec_strict( PegcRule const * self, pegc_parser * st ); | 1631 | bool PegcRule_mf_int_dec_strict( PegcRule const * self, pegc_parser * st ); | |
| 1621 | 1632 | |||
| 1622 | /** | 1633 | /** | |
| 86 hidden lines | ||||
| 1709 | PegcRule pegc_r_error( char const * msg ); | 1720 | PegcRule pegc_r_error( char const * msg ); | |
| 1710 | 1721 | |||
| 1711 | /** | 1722 | /** | |
| 1712 | Creates a rule which always returns false, never consumes, and | 1723 | Creates a rule which always returns false, never consumes, and | |
| 1713 | sets the parser error string to the printf-style formated | 1724 | sets the parser error string to the printf-style formated | |
| 1714 | string. In contrast to pegc_r_error(), the string is copied | | | 1725 | string. In contrast to pegc_r_error(), the string must be |
| 1715 | when the rule is created. | | | 1726 | copied when the rule is created. |
| 1716 | */ | 1727 | */ | |
| 1717 | PegcRule pegc_r_error_v( pegc_parser * st, char const * fmt, va_list ); | 1728 | PegcRule pegc_r_error_v( pegc_parser * st, char const * fmt, va_list ); | |
| 1718 | 1729 | |||
| 1719 | /** | 1730 | /** | |
| 1720 | Identical to pegc_r_error_v() except that it takes (...) instead of a va_list. | 1731 | Identical to pegc_r_error_v() except that it takes (...) instead of a va_list. | |
| 34 hidden lines | ||||
| 1755 | PegcRule const Else ); | 1766 | PegcRule const Else ); | |
| 1756 | 1767 | |||
| 1757 | /** | 1768 | /** | |
| 1758 | Allocates a new printf-style string on the heap. If st is not | 1769 | Allocates a new printf-style string on the heap. If st is not | |
| 1759 | null then the string is owned by st, otherwise the caller owns | 1770 | null then the string is owned by st, otherwise the caller owns | |
| 1760 | it. Returns 0 if fmt is 0 or the result string is 0 bytes. | | | 1771 | it and must free it using free(). Returns 0 if fmt is 0 or the |
| | | 1772 | result string is 0 bytes. | ||
| 1761 | */ | 1773 | */ | |
| 1762 | char * pegc_vmprintf( pegc_parser * st, char const * fmt, va_list args ); | 1774 | char * pegc_vmprintf( pegc_parser * st, char const * fmt, va_list args ); | |
| 1763 | 1775 | |||
| 1764 | /** | 1776 | /** | |
| 1765 | Equivalent to pegc_vmprintf() except that it takes (...) instead of | 1777 | Equivalent to pegc_vmprintf() except that it takes (...) instead of | |
| 107 hidden lines | ||||
| 1873 | All strings allocated by this rule are owned by the parser. They | 1885 | All strings allocated by this rule are owned by the parser. They | |
| 1874 | are freed in two cases: | 1886 | are freed in two cases: | |
| 1875 | 1887 | |||
| 1876 | a) When this rule successfully matches, any previous match is | 1888 | a) When this rule successfully matches, any previous match is | |
| 1877 | free()d and replaced with a new string. A failed attempt to | 1889 | free()d and replaced with a new string. A failed attempt to | |
| 1878 | match match will not clear the previous successful match. | | | 1890 | match will not clear the previous successful match. |
| 1879 | 1891 | |||
| 1880 | b) When pegc_destroy_parser() is called, all underlying | 1892 | b) When pegc_destroy_parser() is called, all underlying | |
| 1881 | metadata is freed (which includes the previous match string). | 1893 | metadata is freed (which includes the previous match string). | |
| 1882 | 1894 | |||
| 1883 | If you want to capture the string during the parse, you could | 1895 | If you want to capture the string during the parse, you could | |
| 36 hidden lines | ||||
| 1920 | #ifdef __cplusplus | 1932 | #ifdef __cplusplus | |
| 1921 | } // extern "C" | 1933 | } // extern "C" | |
| 1922 | #endif | 1934 | #endif | |
| 1923 | 1935 | |||
| 1924 | #endif // WANDERINGHORSE_NET_PEGC_H_INCLUDED | 1936 | #endif // WANDERINGHORSE_NET_PEGC_H_INCLUDED | |