]>
Commit | Line | Data |
---|---|---|
4c45dbaf | 1 | #!/usr/bin/ruby |
2 | # | |
3 | # A derivative of StringScanner that can scan for delimited constructs in | |
4 | # addition to regular expressions. It is a loose port of the Text::Balanced | |
5 | # module for Perl by Damian Conway <damian@cs.monash.edu.au>. | |
6 | # | |
7 | # == Synopsis | |
8 | # | |
9 | # se = DelimScanner::new( myString ) | |
10 | # | |
11 | # == Authors | |
12 | # | |
13 | # * Michael Granger <ged@FaerieMUD.org> | |
14 | # | |
15 | # Copyright (c) 2002, 2003 The FaerieMUD Consortium. Most rights reserved. | |
16 | # | |
17 | # This work is licensed under the Creative Commons Attribution License. To view | |
18 | # a copy of this license, visit http://creativecommons.org/licenses/by/1.0 or | |
19 | # send a letter to Creative Commons, 559 Nathan Abbott Way, Stanford, California | |
20 | # 94305, USA. | |
21 | # | |
22 | # == Version | |
23 | # | |
24 | # $Id$ | |
25 | # | |
26 | ||
27 | require 'strscan' | |
28 | require 'forwardable' | |
29 | ||
30 | ### Add some stuff to the String class to allow easy transformation to Regexp | |
31 | ### and in-place interpolation. | |
32 | class String | |
33 | def to_re( casefold=false, extended=false ) | |
34 | return Regexp::new( self.dup ) | |
35 | end | |
36 | ||
37 | ### Ideas for String-interpolation stuff courtesy of Hal E. Fulton | |
38 | ### <hal9000@hypermetrics.com> via ruby-talk | |
39 | ||
40 | def interpolate( scope ) | |
41 | unless scope.is_a?( Binding ) | |
42 | raise TypeError, "Argument to interpolate must be a Binding, not "\ | |
43 | "a #{scope.class.name}" | |
44 | end | |
45 | ||
46 | # $stderr.puts ">>> Interpolating '#{self}'..." | |
47 | ||
48 | copy = self.gsub( /"/, %q:\": ) | |
49 | eval( '"' + copy + '"', scope ) | |
50 | end | |
51 | ||
52 | end | |
53 | ||
54 | ||
55 | ### A derivative of StringScanner that can scan for delimited constructs in | |
56 | ### addition to regular expressions. | |
57 | class DelimScanner | |
58 | ||
59 | ### Scanner exception classes | |
60 | class MatchFailure < RuntimeError ; end | |
61 | class DelimiterError < RuntimeError ; end | |
62 | ||
63 | ||
64 | extend Forwardable | |
65 | StringScanner.must_C_version | |
66 | ||
67 | ||
68 | ### Class constants | |
69 | Version = /([\d\.]+)/.match( %q{$Revision$} )[1] | |
70 | Rcsid = %q$Id$ | |
71 | ||
72 | # Pattern to match a valid XML name | |
73 | XmlName = '[a-zA-Z_:][a-zA-Z0-9:.-]*' | |
74 | ||
75 | ||
76 | ### Namespace module for DelimString constants | |
77 | module Default | |
78 | ||
79 | # The list of default opening => closing codeblock delimiters to use for | |
80 | # scanCodeblock. | |
81 | CodeblockDelimiters = { | |
82 | '{' => '}', | |
83 | 'begin' => 'end', | |
84 | 'do' => 'end', | |
85 | } | |
86 | ||
87 | # Default scanMultiple operations and their arguments | |
88 | MultipleFunctions = [ | |
89 | :scanVariable => [], | |
90 | :scanQuotelike => [], | |
91 | :scanCodeblock => [], | |
92 | ] | |
93 | ||
94 | end | |
95 | include Default | |
96 | ||
97 | ||
98 | ### Define delegating methods that cast their argument to a Regexp from a | |
99 | ### String. This allows the scanner's scanning methods to be called with | |
100 | ### Strings in addition to Regexps. This was mostly stolen from | |
101 | ### forwardable.rb. | |
102 | def self.def_casting_delegators( *methods ) | |
103 | methods.each {|methodName| | |
104 | class_eval( <<-EOF, "(--def_casting_delegators--)", 1 ) | |
105 | def #{methodName}( pattern ) | |
106 | pattern = pattern.to_s.to_re unless pattern.is_a?( Regexp ) | |
107 | @scanner.#{methodName}( pattern ) | |
108 | end | |
109 | EOF | |
110 | } | |
111 | end | |
112 | ||
113 | ||
114 | ### Create a new DelimScanner object for the specified <tt>string</tt>. If | |
115 | ### <tt>dup</tt> is <tt>true</tt>, a duplicate of the target string will be | |
116 | ### used instead of the one given. The target string will be frozen after | |
117 | ### the scanner is created. | |
118 | def initialize( string, dup=true ) | |
119 | @scanner = StringScanner::new( string, dup ) | |
120 | @matchError = nil | |
121 | @debugLevel = 0 | |
122 | end | |
123 | ||
124 | ||
125 | ||
126 | ###### | |
127 | public | |
128 | ###### | |
129 | ||
130 | # Here, some delegation trickery is done to make a DelimScanner behave like | |
131 | # a StringScanner. Some methods are directly delegated, while some are | |
132 | # delegated via a method which casts its argument to a Regexp first so some | |
133 | # scanner methods can be called with Strings as well as Regexps. | |
134 | ||
135 | # A list of delegated methods that need casting. | |
136 | NeedCastingDelegators = :scan, :skip, :match?, :check, | |
137 | :scan_until, :skip_until, :exist?, :check_until | |
138 | ||
139 | # Delegate all StringScanner instance methods to the associated scanner | |
140 | # object, except those that need a casting delegator, which uses an indirect | |
141 | # delegation method. | |
142 | def_delegators :@scanner, | |
143 | *( StringScanner.instance_methods - NeedCastingDelegators.collect {|sym| sym.id2name} ) | |
144 | ||
145 | def_casting_delegators( *NeedCastingDelegators ) | |
146 | ||
147 | ||
148 | ||
149 | # The last match error encountered by the scanner | |
150 | attr_accessor :matchError | |
151 | protected :matchError= ; # ; is to work around a ruby-mode indent bug | |
152 | ||
153 | # Debugging level | |
154 | attr_accessor :debugLevel | |
155 | ||
156 | ||
157 | ||
158 | ### Returns <tt>true</tt> if the scanner has encountered a match error. | |
159 | def matchError? | |
160 | return ! @matchError.nil? | |
161 | end | |
162 | ||
163 | ||
164 | ### Starting at the scan pointer, try to match a substring delimited by the | |
165 | ### specified <tt>delimiters</tt>, skipping the specified <tt>prefix</tt> | |
166 | ### and any character escaped by the specified <tt>escape</tt> | |
167 | ### character/s. If matched, advances the scan pointer and returns a Hash | |
168 | ### with the following key/value pairs on success: | |
169 | ### | |
170 | ### [<tt>:match</tt>] | |
171 | ### The text of the match, including delimiters. | |
172 | ### [<tt>:prefix</tt>] | |
173 | ### The matched prefix, if any. | |
174 | ### | |
175 | ### If the match fails, returns nil. | |
176 | def scanDelimited( delimiters="'\"`", prefix='\\s*', escape='\\' ) | |
177 | delimiters ||= "'\"`" | |
178 | prefix ||= '\\s*' | |
179 | escape ||= '\\' | |
180 | ||
181 | debugMsg( 1, "Scanning for delimited text: delim = (%s), prefix=(%s), escape=(%s)", | |
182 | delimiters, prefix, escape ) | |
183 | self.matchError = nil | |
184 | ||
185 | # Try to match the prefix first to get the length | |
186 | unless (( prefixLength = self.match?(prefix.to_re) )) | |
187 | self.matchError = "Failed to match prefix '%s' at offset %d" % | |
188 | [ prefix, self.pointer ] | |
189 | return nil | |
190 | end | |
191 | ||
192 | # Now build a delimited pattern with the specified parameters. | |
193 | delimPattern = makeDelimPattern( delimiters, escape, prefix ) | |
194 | debugMsg( 2, "Delimiter pattern is %s" % delimPattern.inspect ) | |
195 | ||
196 | # Fail if no match | |
197 | unless (( matchedString = self.scan(delimPattern) )) | |
198 | self.matchError = "No delimited string found." | |
199 | return nil | |
200 | end | |
201 | ||
202 | return { | |
203 | :match => matchedString[prefixLength .. -1], | |
204 | :prefix => matchedString[0..prefixLength-1], | |
205 | } | |
206 | end | |
207 | ||
208 | ||
209 | ### Match using the #scanDelimited method, but only return the match or nil. | |
210 | def extractDelimited( *args ) | |
211 | rval = scanDelimited( *args ) or return nil | |
212 | return rval[:match] | |
213 | end | |
214 | ||
215 | ||
216 | ### Starting at the scan pointer, try to match a substring delimited by the | |
217 | ### specified <tt>delimiters</tt>, skipping the specified <tt>prefix</tt> | |
218 | ### and any character escaped by the specified <tt>escape</tt> | |
219 | ### character/s. If matched, advances the scan pointer and returns the | |
220 | ### length of the matched string; if it fails the match, returns nil. | |
221 | def skipDelimited( delimiters="'\"`", prefix='\\s*', escape='\\' ) | |
222 | delimiters ||= "'\"`" | |
223 | prefix ||= '\\s*' | |
224 | escape ||= '\\' | |
225 | ||
226 | self.matchError = nil | |
227 | return self.skip( makeDelimPattern(delimiters, escape, prefix) ) | |
228 | end | |
229 | ||
230 | ||
231 | ### Starting at the scan pointer, try to match a substring delimited by | |
232 | ### balanced <tt>delimiters</tt> of the type specified, after skipping the | |
233 | ### specified <tt>prefix</tt>. On a successful match, this method advances | |
234 | ### the scan pointer and returns a Hash with the following key/value pairs: | |
235 | ### | |
236 | ### [<tt>:match</tt>] | |
237 | ### The text of the match, including the delimiting brackets. | |
238 | ### [<tt>:prefix</tt>] | |
239 | ### The matched prefix, if any. | |
240 | ### | |
241 | ### On failure, returns nil. | |
242 | def scanBracketed( delimiters="{([<", prefix='\s*' ) | |
243 | delimiters ||= "{([<" | |
244 | prefix ||= '\s*' | |
245 | ||
246 | prefix = prefix.to_re unless prefix.kind_of?( Regexp ) | |
247 | ||
248 | debugMsg( 1, "Scanning for bracketed text: delimiters = (%s), prefix = (%s)", | |
249 | delimiters, prefix ) | |
250 | ||
251 | self.matchError = nil | |
252 | ||
253 | # Split the left-delimiters (brackets) from the quote delimiters. | |
254 | ldel = delimiters.dup | |
255 | qdel = ldel.squeeze.split(//).find_all {|char| char =~ /["'`]/ }.join('|') | |
256 | qdel = nil if qdel.empty? | |
257 | quotelike = true if ldel =~ /q/ | |
258 | ||
259 | # Change all instances of delimiters to the left-hand versions, and | |
260 | # strip away anything but bracketing delimiters | |
261 | ldel = ldel.tr( '[](){}<>', '[[(({{<<' ).gsub(/[^#{Regexp.quote('[\\](){}<>')}]+/, '').squeeze | |
262 | ||
263 | ### Now build the right-delim equivalent of the left delim string | |
264 | rdel = ldel.dup | |
265 | unless rdel.tr!( '[({<', '])}>' ) | |
266 | raise DelimiterError, "Did not find a suitable bracket in delimiter: '#{delimiters}'" | |
267 | end | |
268 | ||
269 | # Build regexps from both bracketing delimiter strings | |
270 | ldel = ldel.split(//).collect {|ch| Regexp.quote(ch)}.join('|') | |
271 | rdel = rdel.split(//).collect {|ch| Regexp.quote(ch)}.join('|') | |
272 | ||
273 | depth = self.scanDepth | |
274 | result = nil | |
275 | startPos = self.pointer | |
276 | ||
277 | begin | |
278 | result = matchBracketed( prefix, ldel, qdel, quotelike, rdel ) | |
279 | rescue MatchFailure => e | |
280 | debugMsg( depth + 1, "Match error: %s" % e.message ) | |
281 | self.matchError = e.message | |
282 | self.pointer = startPos | |
283 | result = nil | |
284 | rescue => e | |
285 | self.pointer = startPos | |
286 | Kernel::raise | |
287 | end | |
288 | ||
289 | return result | |
290 | end | |
291 | ||
292 | ||
293 | ### Match using the #scanBracketed method, but only return the match or nil. | |
294 | def extractBracketed( *args ) | |
295 | rval = scanBracketed( *args ) or return nil | |
296 | return rval[:match] | |
297 | end | |
298 | ||
299 | ||
300 | ### Starting at the scan pointer, try to match a substring with | |
301 | ### #scanBracketed. On a successful match, this method advances the scan | |
302 | ### pointer and returns the length of the match, including the delimiters | |
303 | ### and any prefix that was skipped. On failure, returns nil. | |
304 | def skipBracketed( *args ) | |
305 | startPos = self.pointer | |
306 | ||
307 | match = scanBracketed( *args ) | |
308 | ||
309 | return nil unless match | |
310 | return match.length + prefix.length | |
311 | ensure | |
312 | debugMsg( 2, "Resetting scan pointer." ) | |
313 | self.pointer = startPos | |
314 | end | |
315 | ||
316 | ||
317 | ### Extracts and segments text from the scan pointer forward that occurs | |
318 | ### between (balanced) specified tags, after skipping the specified | |
319 | ### <tt>prefix</tt>. If the opentag argument is <tt>nil</tt>, a pattern which | |
320 | ### will match any standard HTML/XML tag will be used. If the | |
321 | ### <tt>closetag</tt> argument is <tt>nil</tt>, a pattern is created which | |
322 | ### prepends a <tt>/</tt> character to the matched opening tag, after any | |
323 | ### bracketing characters. The <tt>options</tt> argument is a Hash of one or | |
324 | ### more options which govern the matching operation. They are described in | |
325 | ### more detail in the Description section of 'lib/DelimScanner.rb'. On a | |
326 | ### successful match, this method advances the scan pointer and returns an | |
327 | ### | |
328 | ### [<tt>:match</tt>] | |
329 | ### The text of the match, including the delimiting tags. | |
330 | ### [<tt>:prefix</tt>] | |
331 | ### The matched prefix, if any. | |
332 | ### | |
333 | ### On failure, returns nil. | |
334 | def scanTagged( opentag=nil, closetag=nil, prefix='\s*', options={} ) | |
335 | prefix ||= '\s*' | |
336 | ||
337 | ldel = opentag || %Q,<\\w+(?:#{ makeDelimPattern(%q:'":) }|[^>])*>, | |
338 | rdel = closetag | |
339 | raise ArgumentError, "Options argument must be a hash" unless options.kind_of?( Hash ) | |
340 | ||
341 | failmode = options[:fail] | |
342 | bad = if options[:reject].is_a?( Array ) then | |
343 | options[:reject].join("|") | |
344 | else | |
345 | (options[:reject] || '') | |
346 | end | |
347 | ignore = if options[:ignore].is_a?( Array ) then | |
348 | options[:ignore].join("|") | |
349 | else | |
350 | (options[:ignore] || '') | |
351 | end | |
352 | ||
353 | self.matchError = nil | |
354 | result = nil | |
355 | startPos = self.pointer | |
356 | ||
357 | depth = self.scanDepth | |
358 | ||
359 | begin | |
360 | result = matchTagged( prefix, ldel, rdel, failmode, bad, ignore ) | |
361 | rescue MatchFailure => e | |
362 | debugMsg( depth + 1, "Match error: %s" % e.message ) | |
363 | self.matchError = e.message | |
364 | self.pointer = startPos | |
365 | result = nil | |
366 | rescue => e | |
367 | self.pointer = startPos | |
368 | Kernel::raise | |
369 | end | |
370 | ||
371 | return result | |
372 | end | |
373 | ||
374 | ||
375 | ### Match using the #scanTagged method, but only return the match or nil. | |
376 | def extractTagged( *args ) | |
377 | rval = scanTagged( *args ) or return nil | |
378 | return rval[:match] | |
379 | end | |
380 | ||
381 | ||
382 | ### Starting at the scan pointer, try to match a substring with | |
383 | ### #scanTagged. On a successful match, this method advances the scan | |
384 | ### pointer and returns the length of the match, including any delimiters | |
385 | ### and any prefix that was skipped. On failure, returns nil. | |
386 | def skipTagged( *args ) | |
387 | startPos = self.pointer | |
388 | ||
389 | match = scanTagged( *args ) | |
390 | ||
391 | return nil unless match | |
392 | return match.length + prefix.length | |
393 | ensure | |
394 | debugMsg( 2, "Resetting scan pointer." ) | |
395 | self.pointer = startPos | |
396 | end | |
397 | ||
398 | ||
399 | # :NOTE: | |
400 | # Since the extract_quotelike function isn't documented at all in | |
401 | # Text::Balanced, I'm only guessing this is correct... | |
402 | ||
403 | ### Starting from the scan pointer, try to match any one of the various Ruby | |
404 | ### quotes and quotelike operators after skipping the specified | |
405 | ### <tt>prefix</tt>. Nested backslashed delimiters, embedded balanced | |
406 | ### bracket delimiters (for the quotelike operators), and trailing modifiers | |
407 | ### are all caught. If <tt>matchRawRegex</tt> is <tt>true</tt>, inline | |
408 | ### regexen (eg., <tt>/pattern/</tt>) are matched as well. Advances the scan | |
409 | ### pointer and returns a Hash with the following key/value pairs on | |
410 | ### success: | |
411 | ### | |
412 | ### [<tt>:match</tt>] | |
413 | ### The entire text of the match. | |
414 | ### [<tt>:prefix</tt>] | |
415 | ### The matched prefix, if any. | |
416 | ### [<tt>:quoteOp</tt>] | |
417 | ### The name of the quotelike operator (if any) (eg., '%Q', '%r', etc). | |
418 | ### [<tt>:leftDelim</tt>] | |
419 | ### The left delimiter of the first block of the operation. | |
420 | ### [<tt>:delimText</tt>] | |
421 | ### The text of the first block of the operation. | |
422 | ### [<tt>:rightDelim</tt>] | |
423 | ### The right delimiter of the first block of the operation. | |
424 | ### [<tt>:modifiers</tt>] | |
425 | ### The trailing modifiers on the operation (if any). | |
426 | ### | |
427 | ### On failure, returns nil. | |
428 | def scanQuotelike( prefix='\s*', matchRawRegex=true ) | |
429 | ||
430 | self.matchError = nil | |
431 | result = nil | |
432 | startPos = self.pointer | |
433 | ||
434 | depth = self.scanDepth | |
435 | ||
436 | begin | |
437 | result = matchQuotelike( prefix, matchRawRegex ) | |
438 | rescue MatchFailure => e | |
439 | debugMsg( depth + 1, "Match error: %s" % e.message ) | |
440 | self.matchError = e.message | |
441 | self.pointer = startPos | |
442 | result = nil | |
443 | rescue => e | |
444 | self.pointer = startPos | |
445 | Kernel::raise | |
446 | end | |
447 | ||
448 | return result | |
449 | end | |
450 | ||
451 | ||
452 | ### Match using the #scanQuotelike method, but only return the match or nil. | |
453 | def extractQuotelike( *args ) | |
454 | rval = scanQuotelike( *args ) or return nil | |
455 | return rval[:match] | |
456 | end | |
457 | ||
458 | ||
459 | ### Starting at the scan pointer, try to match a substring with | |
460 | ### #scanQuotelike. On a successful match, this method advances the scan | |
461 | ### pointer and returns the length of the match, including any delimiters | |
462 | ### and any prefix that was skipped. On failure, returns nil. | |
463 | def skipQuotelike( *args ) | |
464 | startPos = self.pointer | |
465 | ||
466 | match = scanQuotelike( *args ) | |
467 | ||
468 | return nil unless match | |
469 | return match.length + prefix.length | |
470 | ensure | |
471 | debugMsg( 2, "Resetting scan pointer." ) | |
472 | self.pointer = startPos | |
473 | end | |
474 | ||
475 | ||
476 | ### Starting from the scan pointer, try to match a Ruby variable after | |
477 | ### skipping the specified prefix. | |
478 | def scanVariable( prefix='\s*' ) | |
479 | self.matchError = nil | |
480 | result = nil | |
481 | startPos = self.pointer | |
482 | ||
483 | depth = self.scanDepth | |
484 | ||
485 | begin | |
486 | result = matchVariable( prefix ) | |
487 | rescue MatchFailure => e | |
488 | debugMsg( depth + 1, "Match error: %s" % e.message ) | |
489 | self.matchError = e.message | |
490 | self.pointer = startPos | |
491 | result = nil | |
492 | rescue => e | |
493 | self.pointer = startPos | |
494 | Kernel::raise | |
495 | end | |
496 | ||
497 | return result | |
498 | end | |
499 | ||
500 | ||
501 | ### Match using the #scanVariable method, but only return the match or nil. | |
502 | def extractVariable( *args ) | |
503 | rval = scanVariable( *args ) or return nil | |
504 | return rval[:match] | |
505 | end | |
506 | ||
507 | ||
508 | ### Starting at the scan pointer, try to match a substring with | |
509 | ### #scanVariable. On a successful match, this method advances the scan | |
510 | ### pointer and returns the length of the match, including any delimiters | |
511 | ### and any prefix that was skipped. On failure, returns nil. | |
512 | def skipVariable( *args ) | |
513 | startPos = self.pointer | |
514 | ||
515 | match = scanVariable( *args ) | |
516 | ||
517 | return nil unless match | |
518 | return match.length + prefix.length | |
519 | ensure | |
520 | debugMsg( 2, "Resetting scan pointer." ) | |
521 | self.pointer = startPos | |
522 | end | |
523 | ||
524 | ||
525 | ### Starting from the scan pointer, and skipping the specified | |
526 | ### <tt>prefix</tt>, try to to recognize and match a balanced bracket-, | |
527 | ### do/end-, or begin/end-delimited substring that may contain unbalanced | |
528 | ### delimiters inside quotes or quotelike operations. | |
529 | def scanCodeblock( innerDelim=CodeblockDelimiters, prefix='\s*', outerDelim=innerDelim ) | |
530 | self.matchError = nil | |
531 | result = nil | |
532 | startPos = self.pointer | |
533 | ||
534 | prefix ||= '\s*' | |
535 | innerDelim ||= CodeblockDelimiters | |
536 | outerDelim ||= innerDelim | |
537 | ||
538 | depth = caller(1).find_all {|frame| | |
539 | frame =~ /in `scan(Variable|Tagged|Codeblock|Bracketed|Quotelike)'/ | |
540 | }.length | |
541 | ||
542 | begin | |
543 | debugMsg 3, "Calling matchCodeBlock( %s, %s, %s )", | |
544 | prefix.inspect, innerDelim.inspect, outerDelim.inspect | |
545 | result = matchCodeblock( prefix, innerDelim, outerDelim ) | |
546 | rescue MatchFailure => e | |
547 | debugMsg( depth + 1, "Match error: %s" % e.message ) | |
548 | self.matchError = e.message | |
549 | self.pointer = startPos | |
550 | result = nil | |
551 | rescue => e | |
552 | self.pointer = startPos | |
553 | Kernel::raise | |
554 | end | |
555 | ||
556 | return result | |
557 | end | |
558 | ||
559 | ||
560 | ### Match using the #scanCodeblock method, but only return the match or nil. | |
561 | def extractCodeblock( *args ) | |
562 | rval = scanCodeblock( *args ) or return nil | |
563 | return rval[:match] | |
564 | end | |
565 | ||
566 | ||
567 | ### Starting at the scan pointer, try to match a substring with | |
568 | ### #scanCodeblock. On a successful match, this method advances the scan | |
569 | ### pointer and returns the length of the match, including any delimiters | |
570 | ### and any prefix that was skipped. On failure, returns nil. | |
571 | def skipCodeblock( *args ) | |
572 | startPos = self.pointer | |
573 | ||
574 | match = scanCodeblock( *args ) | |
575 | ||
576 | return nil unless match | |
577 | return match.length + prefix.length | |
578 | ensure | |
579 | debugMsg( 2, "Resetting scan pointer." ) | |
580 | self.pointer = startPos | |
581 | end | |
582 | ||
583 | ||
584 | ||
585 | ||
586 | ######### | |
587 | protected | |
588 | ######### | |
589 | ||
590 | ### Scan the string from the scan pointer forward, skipping the specified | |
591 | ### <tt>prefix</tt> and trying to match a string delimited by bracketing | |
592 | ### delimiters <tt>ldel</tt> and <tt>rdel</tt> (Regexp objects), and quoting | |
593 | ### delimiters <tt>qdel</tt> (Regexp). If <tt>quotelike</tt> is | |
594 | ### <tt>true</tt>, Ruby quotelike constructs will also be honored. | |
595 | def matchBracketed( prefix, ldel, qdel, quotelike, rdel ) | |
596 | startPos = self.pointer | |
597 | debugMsg( 2, "matchBracketed starting at pos = %d: prefix = %s, "\ | |
598 | "ldel = %s, qdel = %s, quotelike = %s, rdel = %s", | |
599 | startPos, prefix.inspect, ldel.inspect, qdel.inspect, quotelike.inspect, | |
600 | rdel.inspect ) | |
601 | ||
602 | # Test for the prefix, failing if not found | |
603 | raise MatchFailure, "Did not find prefix: #{prefix.inspect}" unless | |
604 | self.skip( prefix ) | |
605 | ||
606 | # Mark this position as the left-delimiter pointer | |
607 | ldelpos = self.pointer | |
608 | debugMsg( 3, "Found prefix. Left delim pointer at %d", ldelpos ) | |
609 | ||
610 | # Match opening delimiter or fail | |
611 | unless (( delim = self.scan(ldel) )) | |
612 | raise MatchFailure, "Did not find opening bracket after prefix: '%s' (%d)" % | |
613 | [ self.string[startPos..ldelpos].chomp, ldelpos ] | |
614 | end | |
615 | ||
616 | # A stack to keep track of nested delimiters | |
617 | nesting = [ delim ] | |
618 | debugMsg( 3, "Found opening bracket. Nesting = %s", nesting.inspect ) | |
619 | ||
620 | while self.rest? | |
621 | ||
622 | debugMsg( 5, "Starting scan loop. Nesting = %s", nesting.inspect ) | |
623 | ||
624 | # Skip anything that's backslashed | |
625 | if self.skip( /\\./ ) | |
626 | debugMsg( 4, "Skipping backslashed literal at offset %d: '%s'", | |
627 | self.pointer - 2, self.string[ self.pointer - 2, 2 ].chomp ) | |
628 | next | |
629 | end | |
630 | ||
631 | # Opening bracket (left delimiter) | |
632 | if self.scan(ldel) | |
633 | delim = self.matched | |
634 | debugMsg( 4, "Found opening delim %s at offset %d", | |
635 | delim.inspect, self.pointer - 1 ) | |
636 | nesting.push delim | |
637 | ||
638 | # Closing bracket (right delimiter) | |
639 | elsif self.scan(rdel) | |
640 | delim = self.matched | |
641 | ||
642 | debugMsg( 4, "Found closing delim %s at offset %d", | |
643 | delim.inspect, self.pointer - 1 ) | |
644 | ||
645 | # :TODO: When is this code reached? | |
646 | if nesting.empty? | |
647 | raise MatchFailure, "Unmatched closing bracket '%s' at offset %d" % | |
648 | [ delim, self.pointer - 1 ] | |
649 | end | |
650 | ||
651 | # Figure out what the compliment of the bracket next off the | |
652 | # stack should be. | |
653 | expected = nesting.pop.tr( '({[<', ')}]>' ) | |
654 | debugMsg( 4, "Got a '%s' bracket off nesting stack", expected ) | |
655 | ||
656 | # Check for mismatched brackets | |
657 | if expected != delim | |
658 | raise MatchFailure, "Mismatched closing bracket at offset %d: "\ | |
659 | "Expected '%s', but found '%s' instead." % | |
660 | [ self.pointer - 1, expected, delim ] | |
661 | end | |
662 | ||
663 | # If we've found the closing delimiter, stop scanning | |
664 | if nesting.empty? | |
665 | debugMsg( 4, "Finished with scan: nesting stack empty." ) | |
666 | break | |
667 | end | |
668 | ||
669 | # Quoted chunk (quoted delimiter) | |
670 | elsif qdel && self.scan(qdel) | |
671 | match = self.matched | |
672 | ||
673 | if self. scan( /[^\\#{match}]*(?:\\.[^\\#{match}]*)*(#{Regexp::quote(match)})/ ) | |
674 | debugMsg( 4, "Skipping quoted chunk. Scan pointer now at offset %d", self.pointer ) | |
675 | next | |
676 | end | |
677 | ||
678 | raise MatchFailure, "Unmatched embedded quote (%s) at offset %d" % | |
679 | [ match, self.pointer - 1 ] | |
680 | ||
681 | # Embedded quotelike | |
682 | elsif quotelike && self.scanQuotelike | |
683 | debugMsg( 4, "Matched a quotelike. Scan pointer now at offset %d", self.pointer ) | |
684 | next | |
685 | ||
686 | # Skip word characters, or a single non-word character | |
687 | else | |
688 | self.skip( /(?:[a-zA-Z0-9]+|.)/m ) | |
689 | debugMsg 5, "Skipping '%s' at offset %d." % | |
690 | [ self.matched, self.pointer ] | |
691 | end | |
692 | ||
693 | end | |
694 | ||
695 | # If there's one or more brackets left on the delimiter stack, we're | |
696 | # missing a closing delim. | |
697 | unless nesting.empty? | |
698 | raise MatchFailure, "Unmatched opening bracket(s): %s.. at offset %d" % | |
699 | [ nesting.join('..'), self.pointer ] | |
700 | end | |
701 | ||
702 | rval = { | |
703 | :match => self.string[ ldelpos .. (self.pointer - 1) ], | |
704 | :prefix => self.string[ startPos, (ldelpos-startPos) ], | |
705 | } | |
706 | debugMsg 1, "matchBracketed succeeded: %s" % rval.inspect | |
707 | return rval | |
708 | end | |
709 | ||
710 | ||
711 | ### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and | |
712 | ### try to match text bracketed by the given left and right tag-delimiters | |
713 | ### (<tt>ldel</tt> and <tt>rdel</tt>). | |
714 | def matchTagged( prefix, ldel, rdel, failmode, bad, ignore ) | |
715 | failmode = failmode.to_s.intern if failmode | |
716 | startPos = self.pointer | |
717 | debugMsg 2, "matchTagged starting at pos = %d: prefix = %s, "\ | |
718 | "ldel = %s, rdel = %s, failmode = %s, bad = %s, ignore = %s", | |
719 | startPos, prefix.inspect, ldel.inspect, rdel.inspect, | |
720 | failmode.inspect, bad.inspect, ignore.inspect | |
721 | ||
722 | rdelspec = '' | |
723 | openTagPos, textPos, paraPos, closeTagPos, endPos = ([nil] * 5) | |
724 | match = nil | |
725 | ||
726 | # Look for the prefix | |
727 | raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless | |
728 | self.skip( prefix ) | |
729 | ||
730 | openTagPos = self.pointer | |
731 | debugMsg 3, "Found prefix. Pointer now at offset %d" % self.pointer | |
732 | ||
733 | # Look for the opening delimiter | |
734 | unless (( match = self.scan(ldel) )) | |
735 | raise MatchFailure, "Did not find opening tag %s at offset %d" % | |
736 | [ ldel.inspect, self.pointer ] | |
737 | end | |
738 | ||
739 | textPos = self.pointer | |
740 | debugMsg 3, "Found left delimiter '%s': offset now %d" % [ match, textPos ] | |
741 | ||
742 | # Make a right delim out of the tag we found if none was specified | |
743 | if rdel.nil? | |
744 | rdelspec = makeClosingTag( match ) | |
745 | debugMsg 3, "Generated right-delimiting tag: %s" % rdelspec.inspect | |
746 | else | |
747 | # Make the regexp-related globals from the match | |
748 | rdelspec = rdel.gsub( /(\A|[^\\])\$([1-9])/, '\1self[\2]' ).interpolate( binding ) | |
749 | debugMsg 3, "Right delimiter (after interpolation) is: %s" % rdelspec.inspect | |
750 | end | |
751 | ||
752 | # Process until we reach the end of the string or find a closing tag | |
753 | while self.rest? && closeTagPos.nil? | |
754 | ||
755 | # Skip backslashed characters | |
756 | if (( self.skip( /^\\./ ) )) | |
757 | debugMsg 4, "Skipping backslashed literal at offset %d" % self.pointer | |
758 | next | |
759 | ||
760 | # Match paragraphs-break for fail == :para | |
761 | elsif (( matchlength = self.skip( /^(\n[ \t]*\n)/ ) )) | |
762 | paraPos ||= self.pointer - matchlength | |
763 | debugMsg 4, "Found paragraph position at offset %d" % paraPos | |
764 | ||
765 | # Match closing tag | |
766 | elsif (( matchlength = self.skip( rdelspec ) )) | |
767 | closeTagPos = self.pointer - matchlength | |
768 | debugMsg 3, "Found closing tag at offset %d" % closeTagPos | |
769 | ||
770 | # If we're ignoring anything, try to match and move beyond it | |
771 | elsif ignore && !ignore.empty? && self.skip(ignore) | |
772 | debugMsg 3, "Skipping ignored text '%s' at offset %d" % | |
773 | [ self.matched, self.pointer - self.matched_size ] | |
774 | next | |
775 | ||
776 | # If there's a "bad" pattern, try to match it, shorting the | |
777 | # outer loop if it matches in para or max mode, or failing with | |
778 | # a match error if not. | |
779 | elsif bad && !bad.empty? && self.match?( bad ) | |
780 | if failmode == :para || failmode == :max | |
781 | break | |
782 | else | |
783 | raise MatchFailure, "Found invalid nested tag '%s' at offset %d" % | |
784 | [ match, self.pointer ] | |
785 | end | |
786 | ||
787 | # If there's another opening tag, make a recursive call to | |
788 | # ourselves to move the cursor beyond it | |
789 | elsif (( match = self.scan( ldel ) )) | |
790 | tag = match | |
791 | self.unscan | |
792 | ||
793 | unless self.matchTagged( prefix, ldel, rdel, failmode, bad, ignore ) | |
794 | break if failmode == :para || failmode == :max | |
795 | ||
796 | raise MatchFailure, "Found unbalanced nested tag '%s' at offset %d" % | |
797 | [ tag, self.pointer ] | |
798 | end | |
799 | ||
800 | else | |
801 | self.pointer += 1 | |
802 | debugMsg 5, "Advanced scan pointer to offset %d" % self.pointer | |
803 | end | |
804 | end | |
805 | ||
806 | # If the closing hasn't been found, then it's a "short" match, which is | |
807 | # okay if the failmode indicates we don't care. Otherwise, it's an error. | |
808 | unless closeTagPos | |
809 | debugMsg 3, "No close tag position found. " | |
810 | ||
811 | if failmode == :max || failmode == :para | |
812 | closeTagPos = self.pointer - 1 | |
813 | debugMsg 4, "Failmode %s tolerates no closing tag. Close tag position set to %d" % | |
814 | [ failmode.inspect, closeTagPos ] | |
815 | ||
816 | # Sync the scan pointer and the paragraph marker if it's set. | |
817 | if failmode == :para && paraPos | |
818 | self.pointer = paraPos + 1 | |
819 | end | |
820 | else | |
821 | raise MatchFailure, "No closing tag found." | |
822 | end | |
823 | end | |
824 | ||
825 | rval = { | |
826 | :match => self.string[ openTagPos .. (self.pointer - 1) ], | |
827 | :prefix => self.string[ startPos, (openTagPos-startPos) ], | |
828 | } | |
829 | debugMsg 1, "matchTagged succeeded: %s" % rval.inspect | |
830 | return rval | |
831 | end | |
832 | ||
833 | ||
834 | ### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and | |
835 | ### try to match text inside a Ruby quotelike construct. If | |
836 | ### <tt>matchRawRegex</tt> is <tt>true</tt>, the regex construct | |
837 | ### <tt>/pattern/</tt> is also matched. | |
838 | def matchQuotelike( prefix, matchRawRegex ) | |
839 | startPos = self.pointer | |
840 | debugMsg 2, "matchQuotelike starting at pos = %d: prefix = %s, "\ | |
841 | "matchRawRegex = %s", | |
842 | startPos, prefix.inspect, matchRawRegex.inspect | |
843 | ||
844 | # Init position markers | |
845 | rval = oppos = preldpos = ldpos = strpos = rdpos = modpos = nil | |
846 | ||
847 | # Look for the prefix | |
848 | raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless | |
849 | self.skip( prefix ) | |
850 | oppos = self.pointer | |
851 | ||
852 | # Peek at the next character | |
853 | # If the initial quote is a simple quote, our job is easy | |
854 | if self.check(/^["`']/) || ( matchRawRegex && self.check(%r:/:) ) | |
855 | initial = self.matched | |
856 | ||
857 | # Build the pattern for matching the simple string | |
858 | pattern = "%s [^\\%s]* (\\.[^\\%s]*)* %s" % | |
859 | [ Regexp.quote(initial), | |
860 | initial, initial, | |
861 | Regexp.quote(initial) ] | |
862 | debugMsg 2, "Matching simple quote at offset %d with /%s/" % | |
863 | [ self.pointer, pattern ] | |
864 | ||
865 | # Search for it, raising an exception if it's not found | |
866 | unless self.scan( /#{pattern}/xism ) | |
867 | raise MatchFailure, | |
868 | "Did not find closing delimiter to match '%s' at '%s...' (offset %d)" % | |
869 | [ initial, self.string[ oppos, 20 ].chomp, self.pointer ] | |
870 | end | |
871 | ||
872 | modpos = self.pointer | |
873 | rdpos = modpos - 1 | |
874 | ||
875 | # If we're matching a regex, look for any trailing modifiers | |
876 | if initial == '/' | |
877 | pattern = if RUBY_VERSION >= "1.7.3" then /[imoxs]*/ else /[imox]*/ end | |
878 | self.scan( pattern ) | |
879 | end | |
880 | ||
881 | rval = { | |
882 | :prefix => self.string[ startPos, (oppos-startPos) ], | |
883 | :match => self.string[ oppos .. (self.pointer - 1) ], | |
884 | :leftDelim => self.string[ oppos, 1 ], | |
885 | :delimText => self.string[ (oppos+1) .. (rdpos-1) ], | |
886 | :rightDelim => self.string[ rdpos, 1 ], | |
887 | :modifiers => self.string[ modpos, (self.pointer-modpos) ], | |
888 | } | |
889 | ||
890 | ||
891 | # If it's one of the fancy quotelike operators, our job is somewhat | |
892 | # complicated (though nothing like Perl's, thank the Goddess) | |
893 | elsif self.scan( %r:%[rwqQx]?(?=\S): ) | |
894 | op = self.matched | |
895 | debugMsg 2, "Matching a real quotelike ('%s') at offset %d" % | |
896 | [ op, self.pointer ] | |
897 | modifiers = nil | |
898 | ||
899 | ldpos = self.pointer | |
900 | strpos = ldpos + 1 | |
901 | ||
902 | # Peek ahead to see what the delimiter is | |
903 | ldel = self.check( /\S/ ) | |
904 | ||
905 | # If it's a bracketing character, just use matchBracketed | |
906 | if ldel =~ /[[(<{]/ | |
907 | rdel = ldel.tr( '[({<', '])}>' ) | |
908 | debugMsg 4, "Left delim is a bracket: %s; looking for compliment: %s" % | |
909 | [ ldel, rdel ] | |
910 | self.matchBracketed( '', Regexp::quote(ldel), nil, nil, Regexp::quote(rdel) ) | |
911 | else | |
912 | debugMsg 4, "Left delim isn't a bracket: '#{ldel}'; looking for closing instance" | |
913 | self.scan( /#{ldel}[^\\#{ldel}]*(\\.[^\\#{ldel}]*)*#{ldel}/ ) or | |
914 | raise MatchFailure, | |
915 | "Can't find a closing delimiter '%s' at '%s...' (offset %d)" % | |
916 | [ ldel, self.rest[0,20].chomp, self.pointer ] | |
917 | end | |
918 | rdelpos = self.pointer - 1 | |
919 | ||
920 | # Match modifiers for Regexp quote | |
921 | if op == '%r' | |
922 | pattern = if RUBY_VERSION >= "1.7.3" then /[imoxs]*/ else /[imox]*/ end | |
923 | modifiers = self.scan( pattern ) || '' | |
924 | end | |
925 | ||
926 | rval = { | |
927 | :prefix => self.string[ startPos, (oppos-startPos) ], | |
928 | :match => self.string[ oppos .. (self.pointer - 1) ], | |
929 | :quoteOp => op, | |
930 | :leftDelim => self.string[ ldpos, 1 ], | |
931 | :delimText => self.string[ strpos, (rdelpos-strpos) ], | |
932 | :rightDelim => self.string[ rdelpos, 1 ], | |
933 | :modifiers => modifiers, | |
934 | } | |
935 | ||
936 | # If it's a here-doc, things get even hairier. | |
937 | elsif self.scan( %r:<<(-)?: ) | |
938 | debugMsg 2, "Matching a here-document at offset %d" % self.pointer | |
939 | op = self.matched | |
940 | ||
941 | # If there was a dash, start with optional whitespace | |
942 | indent = self[1] ? '\s*' : '' | |
943 | ldpos = self.pointer | |
944 | label = '' | |
945 | ||
946 | # Plain identifier | |
947 | if self.scan( /[A-Za-z_]\w*/ ) | |
948 | label = self.matched | |
949 | debugMsg 3, "Setting heredoc terminator to bare identifier '%s'" % label | |
950 | ||
951 | # Quoted string | |
952 | elsif self.scan( / ' ([^'\\]* (?:\\.[^'\\]*)*) ' /sx ) || | |
953 | self.scan( / " ([^"\\]* (?:\\.[^"\\]*)*) " /sx ) || | |
954 | self.scan( / ` ([^`\\]* (?:\\.[^`\\]*)*) ` /sx ) | |
955 | label = self[1] | |
956 | debugMsg 3, "Setting heredoc terminator to quoted identifier '%s'" % label | |
957 | ||
958 | # Ruby, unlike Perl, requires a terminal, even if it's only an empty | |
959 | # string | |
960 | else | |
961 | raise MatchFailure, | |
962 | "Missing heredoc terminator before end of line at "\ | |
963 | "'%s...' (offset %d)" % | |
964 | [ self.rest[0,20].chomp, self.pointer ] | |
965 | end | |
966 | extrapos = self.pointer | |
967 | ||
968 | # Advance to the beginning of the string | |
969 | self.skip( /.*\n/ ) | |
970 | strpos = self.pointer | |
971 | debugMsg 3, "Scanning until /\\n#{indent}#{label}\\n/m" | |
972 | ||
973 | # Match to the label | |
974 | unless self.scan_until( /\n#{indent}#{label}\n/m ) | |
975 | raise MatchFailure, | |
976 | "Couldn't find heredoc terminator '%s' after '%s...' (offset %d)" % | |
977 | [ label, self.rest[0,20].chomp, self.pointer ] | |
978 | end | |
979 | ||
980 | rdpos = self.pointer - self.matched_size | |
981 | ||
982 | rval = { | |
983 | :prefix => self.string[ startPos, (oppos-startPos) ], | |
984 | :match => self.string[ oppos .. (self.pointer - 1) ], | |
985 | :quoteOp => op, | |
986 | :leftDelim => self.string[ ldpos, (extrapos-ldpos) ], | |
987 | :delimText => self.string[ strpos, (rdpos-strpos) ], | |
988 | :rightDelim => self.string[ rdpos, (self.pointer-rdpos) ], | |
989 | } | |
990 | ||
991 | else | |
992 | raise MatchFailure, | |
993 | "No quotelike operator found after prefix at '%s...'" % | |
994 | self.rest[0,20].chomp | |
995 | end | |
996 | ||
997 | ||
998 | debugMsg 1, "matchQuotelike succeeded: %s" % rval.inspect | |
999 | return rval | |
1000 | end | |
1001 | ||
1002 | ||
1003 | ### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and | |
1004 | ### try to match text that is a valid Ruby variable or identifier, ...? | |
1005 | def matchVariable( prefix ) | |
1006 | startPos = self.pointer | |
1007 | debugMsg 2, "matchVariable starting at pos = %d: prefix = %s", | |
1008 | startPos, prefix.inspect | |
1009 | ||
1010 | # Look for the prefix | |
1011 | raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless | |
1012 | self.skip( prefix ) | |
1013 | ||
1014 | varPos = self.pointer | |
1015 | ||
1016 | # If the variable matched is a predefined global, no need to look for an | |
1017 | # identifier | |
1018 | unless self.scan( %r~\$(?:[!@/\\,;.<>$?:_\~&`'+]|-\w|\d+)~ ) | |
1019 | ||
1020 | debugMsg 2, "Not a predefined global at '%s...' (offset %d)" % | |
1021 | [ self.rest[0,20].chomp, self.pointer ] | |
1022 | ||
1023 | # Look for a valid identifier | |
1024 | unless self.scan( /\*?(?:[$@]|::)?(?:[a-z_]\w*(?:::\s*))*[_a-z]\w*/is ) | |
1025 | raise MatchFailure, "No variable found: Bad identifier (offset %d)" % self.pointer | |
1026 | end | |
1027 | end | |
1028 | ||
1029 | debugMsg 2, "Matched '%s' at offset %d" % [ self.matched, self.pointer ] | |
1030 | ||
1031 | # Match methodchain with trailing codeblock | |
1032 | while self.rest? | |
1033 | # Match a regular chained method | |
1034 | next if scanCodeblock( {"("=>")", "do"=>"end", "begin"=>"end", "{"=>"}"}, | |
1035 | /\s*(?:\.|::)\s*[a-zA-Z_]\w+\s*/ ) | |
1036 | ||
1037 | # Match a trailing block or an element ref | |
1038 | next if scanCodeblock( nil, /\s*/, {'{' => '}', '[' => ']'} ) | |
1039 | ||
1040 | # This matched a dereferencer in Perl, which doesn't have any | |
1041 | # equivalent in Ruby. | |
1042 | #next if scanVariable( '\s*(\.|::)\s*' ) | |
1043 | ||
1044 | # Match a method call without parens (?) | |
1045 | next if self.scan( '\s*(\.|::)\s*\w+(?![{([])' ) | |
1046 | ||
1047 | break | |
1048 | end | |
1049 | ||
1050 | rval = { | |
1051 | :match => self.string[ varPos .. (self.pointer - 1) ], | |
1052 | :prefix => self.string[ startPos, (varPos-startPos) ], | |
1053 | } | |
1054 | debugMsg 1, "matchVariable succeeded: %s" % rval.inspect | |
1055 | return rval | |
1056 | end | |
1057 | ||
1058 | ||
1059 | ### Starting from the scan pointer, skip the specified <tt>prefix</tt>, and | |
1060 | ### try to match text inside a Ruby code block construct which must be | |
1061 | ### delimited by the specified <tt>outerDelimPairs</tt>. It may optionally | |
1062 | ### contain sub-blocks delimited with the given <tt>innerDelimPairs</tt>. | |
1063 | def matchCodeblock( prefix, innerDelimPairs, outerDelimPairs ) | |
1064 | startPos = self.pointer | |
1065 | debugMsg 2, "Starting matchCodeblock at offset %d (%s)", startPos, self.rest.inspect | |
1066 | ||
1067 | # Look for the prefix | |
1068 | raise MatchFailure, "Did not find prefix: /#{prefix.inspect}/" unless | |
1069 | self.skip( prefix ) | |
1070 | codePos = self.pointer | |
1071 | debugMsg 3, "Skipped prefix '%s' to offset %d" % | |
1072 | [ self.matched, codePos ] | |
1073 | ||
1074 | # Build a regexp for the outer delimiters | |
1075 | ldelimOuter = "(" + outerDelimPairs.keys .uniq.collect {|delim| Regexp::quote(delim)}.join('|') + ")" | |
1076 | rdelimOuter = "(" + outerDelimPairs.values.uniq.collect {|delim| Regexp::quote(delim)}.join('|') + ")" | |
1077 | debugMsg 4, "Using /%s/ as the outer delim regex" % ldelimOuter | |
1078 | ||
1079 | unless self.scan( ldelimOuter ) | |
1080 | raise MatchFailure, %q:Did not find opening bracket at "%s..." offset %d: % | |
1081 | [ self.rest[0,20].chomp, codePos ] | |
1082 | end | |
1083 | ||
1084 | # Look up the corresponding outer delimiter | |
1085 | closingDelim = outerDelimPairs[self.matched] or | |
1086 | raise DelimiterError, "Could not find closing delimiter for '%s'" % | |
1087 | self.matched | |
1088 | ||
1089 | debugMsg 3, "Scanning for closing delim '#{closingDelim}'" | |
1090 | matched = '' | |
1091 | patvalid = true | |
1092 | ||
1093 | # Scan until the end of the text or until an explicit break | |
1094 | while self.rest? | |
1095 | debugMsg 5, "Scanning from offset %d (%s)", self.pointer, self.rest.inspect | |
1096 | matched = '' | |
1097 | ||
1098 | # Skip comments | |
1099 | debugMsg 5, "Trying to match a comment" | |
1100 | if self.scan( /\s*#.*/ ) | |
1101 | debugMsg 4, "Skipping comment '%s' to offset %d" % | |
1102 | [ self.matched, self.pointer ] | |
1103 | next | |
1104 | end | |
1105 | ||
1106 | # Look for (any) closing delimiter | |
1107 | debugMsg 5, "Trying to match a closing outer delimiter with /\s*(#{rdelimOuter})/" | |
1108 | if self.scan( /\s*(#{rdelimOuter})/ ) | |
1109 | debugMsg 4, "Found a right delimiter '#{self.matched}'" | |
1110 | ||
1111 | # If it's the delimiter we're looking for, stop the scan | |
1112 | if self.matched.strip == closingDelim | |
1113 | matched = self.matched | |
1114 | debugMsg 3, "Found the closing delimiter we've been looking for (#{matched.inspect})." | |
1115 | break | |
1116 | ||
1117 | # Otherwise, it's an error, as we've apparently seen a closing | |
1118 | # delimiter without a corresponding opening one. | |
1119 | else | |
1120 | raise MatchFailure, | |
1121 | %q:Mismatched closing bracket at "%s..." (offset %s). Expected '%s': % | |
1122 | [ self.rest[0,20], self.pointer, closingDelim ] | |
1123 | end | |
1124 | end | |
1125 | ||
1126 | # Try to match a variable or a quoted phrase | |
1127 | debugMsg 5, "Trying to match either a variable or quotelike" | |
1128 | if self.scanVariable( '\s*' ) || self.scanQuotelike( '\s*', patvalid ) | |
1129 | debugMsg 3, "Matched either a variable or quotelike. Offset now %d" % self.pointer | |
1130 | patvalid = false | |
1131 | next | |
1132 | end | |
1133 | ||
1134 | # Match some operators | |
1135 | # :TODO: This hasn't really been ruby-ified | |
1136 | debugMsg 5, "Trying to match an operator" | |
1137 | if self.scan( %r:\s*([-+*x/%^&|.]=? | |
1138 | | [!=]~ | |
1139 | | =(?!>) | |
1140 | | (\*\*|&&|\|\||<<|>>)=? | |
1141 | | split|grep|map|return | |
1142 | ):x ) | |
1143 | debugMsg 3, "Skipped miscellaneous operator '%s' to offset %d." % | |
1144 | [ self.matched, self.pointer ] | |
1145 | patvalid = true | |
1146 | next | |
1147 | end | |
1148 | ||
1149 | # Try to match an embedded codeblock | |
1150 | debugMsg 5, "Trying to match an embedded codeblock with delim pairs: %s", | |
1151 | innerDelimPairs.inspect | |
1152 | if self.scanCodeblock( innerDelimPairs ) | |
1153 | debugMsg 3, "Skipped inner codeblock to offset %d." % self.pointer | |
1154 | patvalid = true | |
1155 | next | |
1156 | end | |
1157 | ||
1158 | # Try to match a stray outer-left delimiter | |
1159 | debugMsg 5, "Trying to match a stray outer-left delimiter (#{ldelimOuter})" | |
1160 | if self.match?( ldelimOuter ) | |
1161 | raise MatchFailure, "Improperly nested codeblock at offset %d: %s... " % | |
1162 | [ self.pointer, self.rest[0,20] ] | |
1163 | end | |
1164 | ||
1165 | patvalid = false | |
1166 | self.scan( /\s*(\w+|[-=>]>|.|\Z)/m ) | |
1167 | debugMsg 3, "Skipped '%s' to offset %d" % | |
1168 | [ self.matched, self.pointer ] | |
1169 | end | |
1170 | ||
1171 | ||
1172 | unless matched | |
1173 | raise MatchFailure, "No match found for opening bracket" | |
1174 | end | |
1175 | ||
1176 | rval = { | |
1177 | :match => self.string[codePos .. (self.pointer - 1)], | |
1178 | :prefix => self.string[startPos, (codePos-startPos)] | |
1179 | } | |
1180 | debugMsg 1, "matchCodeblock succeeded: %s" % rval.inspect | |
1181 | return rval | |
1182 | end | |
1183 | ||
1184 | ||
1185 | ### Attempt to derive and return the number of scan methods traversed up to | |
1186 | ### this point by examining the call stack. | |
1187 | def scanDepth | |
1188 | return caller(2).find_all {|frame| | |
1189 | frame =~ /in `scan(Variable|Tagged|Codeblock|Bracketed|Quotelike)'/ | |
1190 | }.length | |
1191 | end | |
1192 | ||
1193 | ||
1194 | ####### | |
1195 | private | |
1196 | ####### | |
1197 | ||
1198 | ### Print the specified <tt>message</tt> to STDERR if the scanner's | |
1199 | ### debugging level is greater than or equal to <tt>level</tt>. | |
1200 | def debugMsg( level, msgFormat, *args ) | |
1201 | return unless level.nonzero? && self.debugLevel >= level | |
1202 | msg = if args.empty? then msgFormat else format(msgFormat, *args) end | |
1203 | $stderr.puts( (" " * (level-1) * 2) + msg ) | |
1204 | end | |
1205 | ||
1206 | ||
1207 | ### Given a series of one or more bracket characters (eg., '<', '[', '{', | |
1208 | ### etc.), return the brackets reversed in order and direction. | |
1209 | def revbracket( bracket ) | |
1210 | return bracket.to_s.reverse.tr( '<[{(', '>]})' ) | |
1211 | end | |
1212 | ||
1213 | ||
1214 | ### Given an opening <tt>tag</tt> of the sort matched by #scanTagged, | |
1215 | ### construct and return a closing tag. | |
1216 | def makeClosingTag( tag ) | |
1217 | debugMsg 3, "Making a closing tag for '%s'" % tag | |
1218 | ||
1219 | closingTag = tag.gsub( /^([[(<{]+)(#{XmlName}).*/ ) { | |
1220 | Regexp.quote( "#{$1}/#{$2}" + revbracket($1) ) | |
1221 | } | |
1222 | ||
1223 | raise MatchFailure, "Unable to construct closing tag to match: #{tag}" unless closingTag | |
1224 | return closingTag | |
1225 | end | |
1226 | ||
1227 | ||
1228 | ### Make and return a new Regexp which matches substrings bounded by the | |
1229 | ### specified +delimiters+, not counting those which have been escaped with | |
1230 | ### the escape characters in +escapes+. | |
1231 | def makeDelimPattern( delimiters, escapes='\\', prefix='\\s*' ) | |
1232 | delimiters = delimiters.to_s | |
1233 | escapes = escapes.to_s | |
1234 | ||
1235 | raise DelimiterError, "Illegal delimiter '#{delimiter}'" unless delimiters =~ /\S/ | |
1236 | ||
1237 | # Pad the escapes string to the same length as the delimiters | |
1238 | escapes.concat( escapes[-1,1] * (delimiters.length - escapes.length) ) | |
1239 | patParts = [] | |
1240 | ||
1241 | # Escape each delimiter and a corresponding escape character, and then | |
1242 | # build a pattern part from them | |
1243 | delimiters.length.times do |i| | |
1244 | del = Regexp.escape( delimiters[i, 1] ) | |
1245 | esc = Regexp.escape( escapes[i, 1] ) | |
1246 | ||
1247 | if del == esc then | |
1248 | patParts.push "#{del}(?:[^#{del}]*(?:(?:#{del}#{del})[^#{del}]*)*)#{del}" | |
1249 | else | |
1250 | patParts.push "#{del}(?:[^#{esc}#{del}]*(?:#{esc}.[^#{esc}#{del}]*)*)#{del}"; | |
1251 | end | |
1252 | end | |
1253 | ||
1254 | # Join all the parts together and return one big pattern | |
1255 | return Regexp::new( "#{prefix}(?:#{patParts.join("|")})" ) | |
1256 | end | |
1257 | ||
1258 | end # class StringExtractor | |
1259 |